Skip to content

Commit

Permalink
1.0.0.180910_beta
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangsjsky committed Sep 10, 2018
0 parents commit c62c7dd
Show file tree
Hide file tree
Showing 178 changed files with 26,967 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Readme.docx
Readme.pdf
35 changes: 35 additions & 0 deletions CNTools.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/env Rscript

library(argparser, quietly = T)


p = arg_parser("Comparae CN (Coyp Number) of any paired samples.
Input tsv:
ID chrom loc.start loc.end seg.mean
TCGA-06-0126-01A-01 1 554267 639580 0.9002
The header is mandatory. The names of header are also mandatory. Order of columns is non-mandatory.
")
p = add_argument(p, "--pdf", help = "[PDF] Output cluster")
argv = parse_args(p)

sink(stderr())
cat(paste0('[DEBUG] ', Sys.time(), ' Check if the following variables are correct as expected:\n'))
str(argv)
cat('\n')

library(CNTools, quietly = T)

data = read.delim(file('stdin'), header = T, check.names = F)
cnseg <- CNSeg(data)
rdseg <- getRS(cnseg, by = "region", imput = FALSE, XY = FALSE, what = "median")
reducedseg <- rs(rdseg)

if(!is.na(argv$pdf)){
pdf(argv$pdf)
hc = hclust(getDist(rdseg, method = "euclidian"), method = "complete")
plot(hc, hang = -1, cex = 0.8, main = "", xlab = "", ylab = "", sub = "")
}


sink()
write.table(reducedseg, stdout(), sep = "\t", quote = F, row.names = F)
71 changes: 71 additions & 0 deletions GOmapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env python

import os
import argparse
import sys

parser = argparse.ArgumentParser(description = 'GO mapping (Gene Symbol list)')
help_desc = "Input Symbol list"
parser.add_argument("-i", help=help_desc, required=True)
help_desc = "GO file"
parser.add_argument("-G", help=help_desc, required=True)
help_desc = "ID convert file(NCBI gene info)"
parser.add_argument("-C", help=help_desc, required=True)

args = parser.parse_args()

Symbolfile = args.i
GOfile = args.G
convertfile = args.C

GOdict = {}
with open(GOfile) as data:
field = data.next().strip().split('\t')
for line in data:
line = line.strip().split('\t')
items = zip(field, line)
item = {}
for (name, value) in items:
item[name] = value
GeneID = item['GeneID']
if GeneID in GOdict:
if item['Category'] == 'Component':
GOdict[GeneID]['Component'].add(item['GO_term'])
elif item['Category'] == 'Process':
GOdict[GeneID]['Process'].add(item['GO_term'])
elif item['Category'] == 'Function':
GOdict[GeneID]['Function'].add(item['GO_term'])
else:
GOdict[GeneID] = {}
GOdict[GeneID]['Component'] = set()
GOdict[GeneID]['Process'] = set()
GOdict[GeneID]['Function'] = set()
if item['Category'] == 'Component':
GOdict[GeneID]['Component'].add(item['GO_term'])
elif item['Category'] == 'Process':
GOdict[GeneID]['Process'].add(item['GO_term'])
elif item['Category'] == 'Function':
GOdict[GeneID]['Function'].add(item['GO_term'])



convertDict = {}
with open(convertfile) as convert:
convert.next()
for line in convert:
line = line.strip().split('\t')
convertDict[line[1]] = set([line[2]]+line[4].split('|'))


genelst = [x.strip() for x in open(Symbolfile)]

geneDict = {}
for gene in genelst:
for geneID in convertDict:
if gene in convertDict[geneID]:
geneDict[gene] = geneID

sys.stdout.write('\t'.join(['Gene','GO_Function','GO_Component','GO_Process'])+'\n')
for gene in genelst:
if geneDict[gene] in GOdict:
sys.stdout.write('\t'.join([gene, ';'.join(GOdict[geneDict[gene]]['Function']), ';'.join(GOdict[geneDict[gene]]['Component']), ';'.join(GOdict[geneDict[gene]]['Process'])])+'\n')
17 changes: 17 additions & 0 deletions KEGGmapping/IDconvert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
def Symbol2ID_dict(convertfile):
convertDict = {}
with open(convertfile) as convert:
convert.next()
for line in convert:
line = line.strip().split('\t')
convertDict[line[2]] = line[1]
return convertDict

def ID2Symbol_dict(convertfile):
convertDict = {}
with open(convertfile) as convert:
convert.next()
for line in convert:
line = line.strip().split('\t')
convertDict[line[1]] = set([line[2]]+line[4].split('|'))
return convertDict
Binary file added KEGGmapping/IDconvert.pyc
Binary file not shown.
210 changes: 210 additions & 0 deletions KEGGmapping/KGMLparser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
# -*- coding: utf-8 -*-
# Copyright 2013 by Leighton Pritchard. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.

"""This module provides classes and functions to parse a KGML pathway map.
The KGML pathway map is parsed into the object structure defined in
KGML_Pathway.py in this module.
Classes:
- KGMLParser Parses KGML file
Functions:
- read Returns a single Pathway object, using KGMLParser
internally
"""

from __future__ import print_function

try:
import xml.etree.cElementTree as ElementTree
except ImportError:
import xml.etree.ElementTree as ElementTree


from Bio._py3k import StringIO

from KGMLpathway import *


def read(handle, debug=0):
"""Parses a single KEGG Pathway from given file handle.
Returns a single Pathway object. There should be one and only
one pathway in each file, but there may well be pathological
examples out there.
"""
iterator = parse(handle, debug)
try:
first = next(iterator)
except StopIteration:
first = None
if first is None:
raise ValueError("No pathways found in handle")
try:
second = next(iterator)
except StopIteration:
second = None
if second is not None:
raise ValueError("More than one pathway found in handle")
return first


def parse(handle, debug=0):
"""Returns an iterator over Pathway elements.
Arguments:
- handle - file handle to a KGML file for parsing
- debug - integer for amount of debug information to print
This is a generator for the return of multiple Pathway objects.
"""
# Check handle
if not hasattr(handle, 'read'):
if isinstance(handle, str):
handle = StringIO(handle)
else:
exc_txt = "An XML-containing handle or an XML string " +\
"must be provided"
raise Exception(exc_txt)
# Parse XML and return each Pathway
for event, elem in \
ElementTree.iterparse(handle, events=('start', 'end')):
if event == "end" and elem.tag == "pathway":
yield KGMLParser(elem).parse()
elem.clear()


class KGMLParser(object):
"""Parses a KGML XML Pathway entry into a Pathway object."""

def __init__(self, elem):
self.entry = elem

def parse(self):
"""Parse the input elements."""

def _parse_pathway(attrib):
for k, v in attrib.items():
self.pathway.__setattr__(k, v)

def _parse_entry(element):
new_entry = Entry()
for k, v in element.attrib.items():
new_entry.__setattr__(k, v)
for subelement in element.getchildren():
if subelement.tag == 'graphics':
_parse_graphics(subelement, new_entry)
elif subelement.tag == 'component':
_parse_component(subelement, new_entry)
self.pathway.add_entry(new_entry)

def _parse_graphics(element, entry):
new_graphics = Graphics(entry)
for k, v in element.attrib.items():
new_graphics.__setattr__(k, v)
entry.add_graphics(new_graphics)

def _parse_component(element, entry):
new_component = Component(entry)
for k, v in element.attrib.items():
new_component.__setattr__(k, v)
entry.add_component(new_component)

def _parse_reaction(element):
new_reaction = Reaction()
for k, v in element.attrib.items():
new_reaction.__setattr__(k, v)
for subelement in element.getchildren():
if subelement.tag == 'substrate':
new_reaction.add_substrate(int(subelement.attrib['id']))
elif subelement.tag == 'product':
new_reaction.add_product(int(subelement.attrib['id']))
self.pathway.add_reaction(new_reaction)

def _parse_relation(element):
new_relation = Relation()
new_relation.entry1 = int(element.attrib['entry1'])
new_relation.entry2 = int(element.attrib['entry2'])
new_relation.type = element.attrib['type']
for subtype in element.getchildren():
name, value = subtype.attrib['name'], subtype.attrib['value']
if name in ('compound', 'hidden compound'):
new_relation.subtypes.append((name, int(value)))
else:
new_relation.subtypes.append((name, value))
self.pathway.add_relation(new_relation)

# ==========
# Initialise Pathway
self.pathway = Pathway()
# Get information about the pathway itself
_parse_pathway(self.entry.attrib)
for element in self.entry.getchildren():
if element.tag == 'entry':
_parse_entry(element)
elif element.tag == 'reaction':
_parse_reaction(element)
elif element.tag == 'relation':
_parse_relation(element)
# Parsing of some elements not implemented - no examples yet
else:
# This should warn us of any unimplemented tags
import warnings
from Bio import BiopythonParserWarning
warnings.warn("Warning: tag %s not implemented in parser" %
element.tag, BiopythonParserWarning)
return self.pathway


if __name__ == '__main__':
pathway = read(open('/Volumes/Macintosh HD 2/Work/Puri/Pathview/KGMLfile/hsa05130.xml', 'rU'))
for gene in pathway.genes:
# print ([x.split(':')[1] for x in gene._names])
for g in gene.graphics:
g._setfgcolor('#000001')
for gene in pathway.genes:
for g in gene.graphics:

print (g.fgcolor)


'''
# Check large metabolism
pathway = read(open('/Volumes/Macintosh HD 2/Work/Puri/Pathview/KGMLfile/hsa05130.xml', 'rU'))
print(pathway)
for k, v in list(pathway.entries.items())[:20]:
print(v)
for r in list(pathway.reactions)[:20]:
print(r)
print(len(pathway.maps))
# Check relations
pathway = read(open('/Volumes/Macintosh HD 2/Work/Puri/Pathview/KGMLfile/hsa05130.xml', 'rU'))
print(pathway)
for k, v in list(pathway.entries.items())[:20]:
print(v)
for r in list(pathway.reactions[:20]):
print(r)
for r in list(pathway.relations[:20]):
print(r)
print(len(pathway.maps))
# Check components
pathway = read(open('/Volumes/Macintosh HD 2/Work/Puri/Pathview/KGMLfile/hsa05130.xml', 'rU'))
print(pathway)
for k, v in pathway.entries.items():
print(v)
print(len(pathway.maps))
# Test XML representation
print(pathway.get_KGML())
# Test bounds of pathway
print(pathway.bounds)
'''
Binary file added KEGGmapping/KGMLparser.pyc
Binary file not shown.
Loading

0 comments on commit c62c7dd

Please sign in to comment.