1.0.0.180910_beta

zhangsjsky · Sep 10, 2018 · c62c7dd · c62c7dd
commit c62c7dd
Show file tree

Hide file tree

Showing 178 changed files with 26,967 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+Readme.docx
+Readme.pdf
diff --git a/CNTools.R b/CNTools.R
@@ -0,0 +1,35 @@
+#!/bin/env Rscript
+
+library(argparser, quietly = T)
+
+
+p = arg_parser("Comparae CN (Coyp Number) of any paired samples.
+Input tsv:
+ID                      chrom  loc.start  loc.end  seg.mean
+TCGA-06-0126-01A-01     1      554267     639580   0.9002
+The header is mandatory. The names of header are also mandatory. Order of columns is non-mandatory.
+")
+p = add_argument(p, "--pdf", help = "[PDF] Output cluster")
+argv = parse_args(p)
+
+sink(stderr())
+cat(paste0('[DEBUG] ', Sys.time(), ' Check if the following variables are correct as expected:\n'))
+str(argv)
+cat('\n')
+
+library(CNTools, quietly = T)
+
+data = read.delim(file('stdin'), header = T, check.names = F)
+cnseg <- CNSeg(data)
+rdseg <- getRS(cnseg, by = "region", imput = FALSE, XY = FALSE, what = "median")
+reducedseg <- rs(rdseg)
+
+if(!is.na(argv$pdf)){
+    pdf(argv$pdf)
+    hc = hclust(getDist(rdseg, method = "euclidian"), method = "complete") 
+    plot(hc, hang = -1, cex = 0.8, main = "", xlab = "", ylab = "", sub = "") 
+}
+
+
+sink()
+write.table(reducedseg, stdout(), sep = "\t", quote = F, row.names = F)
diff --git a/GOmapping.py b/GOmapping.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+
+import os
+import argparse
+import sys
+
+parser = argparse.ArgumentParser(description = 'GO mapping (Gene Symbol list)')
+help_desc = "Input Symbol list"
+parser.add_argument("-i", help=help_desc, required=True)
+help_desc = "GO file"
+parser.add_argument("-G", help=help_desc, required=True)
+help_desc = "ID convert file(NCBI gene info)"
+parser.add_argument("-C", help=help_desc, required=True)
+
+args = parser.parse_args()
+
+Symbolfile = args.i
+GOfile = args.G
+convertfile = args.C
+
+GOdict = {}
+with open(GOfile) as data:
+	field = data.next().strip().split('\t')
+	for line in data:
+		line = line.strip().split('\t')
+		items = zip(field, line)
+		item = {}
+		for (name, value) in items:
+			item[name] = value
+		GeneID = item['GeneID']
+		if GeneID in GOdict:
+			if item['Category'] == 'Component':
+				GOdict[GeneID]['Component'].add(item['GO_term'])
+			elif item['Category'] == 'Process':
+				GOdict[GeneID]['Process'].add(item['GO_term'])
+			elif item['Category'] == 'Function':
+				GOdict[GeneID]['Function'].add(item['GO_term'])
+		else:
+			GOdict[GeneID] = {}
+			GOdict[GeneID]['Component'] = set()
+			GOdict[GeneID]['Process'] = set()
+			GOdict[GeneID]['Function'] = set()
+			if item['Category'] == 'Component':
+				GOdict[GeneID]['Component'].add(item['GO_term'])
+			elif item['Category'] == 'Process':
+				GOdict[GeneID]['Process'].add(item['GO_term'])
+			elif item['Category'] == 'Function':
+				GOdict[GeneID]['Function'].add(item['GO_term'])
+
+
+
+convertDict = {}
+with open(convertfile) as convert:
+	convert.next()
+	for line in convert:
+		line = line.strip().split('\t')
+		convertDict[line[1]] = set([line[2]]+line[4].split('|'))
+
+
+genelst = [x.strip() for x in open(Symbolfile)]
+
+geneDict = {}
+for gene in genelst:
+	for geneID in convertDict:
+		if gene in convertDict[geneID]:
+			geneDict[gene] = geneID
+
+sys.stdout.write('\t'.join(['Gene','GO_Function','GO_Component','GO_Process'])+'\n')
+for gene in genelst:
+	if geneDict[gene] in GOdict:
+		sys.stdout.write('\t'.join([gene, ';'.join(GOdict[geneDict[gene]]['Function']), ';'.join(GOdict[geneDict[gene]]['Component']), ';'.join(GOdict[geneDict[gene]]['Process'])])+'\n')
diff --git a/KEGGmapping/IDconvert.py b/KEGGmapping/IDconvert.py
@@ -0,0 +1,17 @@
+def Symbol2ID_dict(convertfile):
+	convertDict = {}
+	with open(convertfile) as convert:
+		convert.next()
+		for line in convert:
+			line = line.strip().split('\t')
+			convertDict[line[2]] = line[1]
+	return convertDict
+
+def ID2Symbol_dict(convertfile):
+	convertDict = {}
+	with open(convertfile) as convert:
+		convert.next()
+		for line in convert:
+			line = line.strip().split('\t')
+			convertDict[line[1]] = set([line[2]]+line[4].split('|'))
+	return convertDict
diff --git a/KEGGmapping/IDconvert.pyc b/KEGGmapping/IDconvert.pyc
diff --git a/KEGGmapping/KGMLparser.py b/KEGGmapping/KGMLparser.py
@@ -0,0 +1,210 @@
+# -*- coding: utf-8 -*-
+# Copyright 2013 by Leighton Pritchard.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""This module provides classes and functions to parse a KGML pathway map.
+
+The KGML pathway map is parsed into the object structure defined in
+KGML_Pathway.py in this module.
+
+Classes:
+
+    - KGMLParser             Parses KGML file
+
+Functions:
+
+    - read                   Returns a single Pathway object, using KGMLParser
+      internally
+"""
+
+from __future__ import print_function
+
+try:
+    import xml.etree.cElementTree as ElementTree
+except ImportError:
+    import xml.etree.ElementTree as ElementTree
+
+
+from Bio._py3k import StringIO
+
+from KGMLpathway import *
+
+
+def read(handle, debug=0):
+    """Parses a single KEGG Pathway from given file handle.
+
+    Returns a single Pathway object.  There should be one and only
+    one pathway in each file, but there may well be pathological
+    examples out there.
+    """
+    iterator = parse(handle, debug)
+    try:
+        first = next(iterator)
+    except StopIteration:
+        first = None
+    if first is None:
+        raise ValueError("No pathways found in handle")
+    try:
+        second = next(iterator)
+    except StopIteration:
+        second = None
+    if second is not None:
+        raise ValueError("More than one pathway found in handle")
+    return first
+
+
+def parse(handle, debug=0):
+    """Returns an iterator over Pathway elements.
+
+    Arguments:
+    - handle - file handle to a KGML file for parsing
+    - debug - integer for amount of debug information to print
+
+    This is a generator for the return of multiple Pathway objects.
+    """
+    # Check handle
+    if not hasattr(handle, 'read'):
+        if isinstance(handle, str):
+            handle = StringIO(handle)
+        else:
+            exc_txt = "An XML-containing handle or an XML string " +\
+                "must be provided"
+            raise Exception(exc_txt)
+    # Parse XML and return each Pathway
+    for event, elem in \
+            ElementTree.iterparse(handle, events=('start', 'end')):
+        if event == "end" and elem.tag == "pathway":
+            yield KGMLParser(elem).parse()
+            elem.clear()
+
+
+class KGMLParser(object):
+    """Parses a KGML XML Pathway entry into a Pathway object."""
+
+    def __init__(self, elem):
+        self.entry = elem
+
+    def parse(self):
+        """Parse the input elements."""
+
+        def _parse_pathway(attrib):
+            for k, v in attrib.items():
+                self.pathway.__setattr__(k, v)
+
+        def _parse_entry(element):
+            new_entry = Entry()
+            for k, v in element.attrib.items():
+                new_entry.__setattr__(k, v)
+            for subelement in element.getchildren():
+                if subelement.tag == 'graphics':
+                    _parse_graphics(subelement, new_entry)
+                elif subelement.tag == 'component':
+                    _parse_component(subelement, new_entry)
+            self.pathway.add_entry(new_entry)
+
+        def _parse_graphics(element, entry):
+            new_graphics = Graphics(entry)
+            for k, v in element.attrib.items():
+                new_graphics.__setattr__(k, v)
+            entry.add_graphics(new_graphics)
+
+        def _parse_component(element, entry):
+            new_component = Component(entry)
+            for k, v in element.attrib.items():
+                new_component.__setattr__(k, v)
+            entry.add_component(new_component)
+
+        def _parse_reaction(element):
+            new_reaction = Reaction()
+            for k, v in element.attrib.items():
+                new_reaction.__setattr__(k, v)
+            for subelement in element.getchildren():
+                if subelement.tag == 'substrate':
+                    new_reaction.add_substrate(int(subelement.attrib['id']))
+                elif subelement.tag == 'product':
+                    new_reaction.add_product(int(subelement.attrib['id']))
+            self.pathway.add_reaction(new_reaction)
+
+        def _parse_relation(element):
+            new_relation = Relation()
+            new_relation.entry1 = int(element.attrib['entry1'])
+            new_relation.entry2 = int(element.attrib['entry2'])
+            new_relation.type = element.attrib['type']
+            for subtype in element.getchildren():
+                name, value = subtype.attrib['name'], subtype.attrib['value']
+                if name in ('compound', 'hidden compound'):
+                    new_relation.subtypes.append((name, int(value)))
+                else:
+                    new_relation.subtypes.append((name, value))
+            self.pathway.add_relation(new_relation)
+
+        # ==========
+        # Initialise Pathway
+        self.pathway = Pathway()
+        # Get information about the pathway itself
+        _parse_pathway(self.entry.attrib)
+        for element in self.entry.getchildren():
+            if element.tag == 'entry':
+                _parse_entry(element)
+            elif element.tag == 'reaction':
+                _parse_reaction(element)
+            elif element.tag == 'relation':
+                _parse_relation(element)
+            # Parsing of some elements not implemented - no examples yet
+            else:
+                # This should warn us of any unimplemented tags
+                import warnings
+                from Bio import BiopythonParserWarning
+                warnings.warn("Warning: tag %s not implemented in parser" %
+                              element.tag, BiopythonParserWarning)
+        return self.pathway
+
+
+if __name__ == '__main__':
+	pathway = read(open('/Volumes/Macintosh HD 2/Work/Puri/Pathview/KGMLfile/hsa05130.xml', 'rU'))
+	for gene in pathway.genes:
+#		print ([x.split(':')[1] for x in gene._names])
+		for g in gene.graphics:
+			g._setfgcolor('#000001')
+	for gene in pathway.genes:
+		for g in gene.graphics:
+
+			print (g.fgcolor)
+
+
+'''
+    # Check large metabolism
+    pathway = read(open('/Volumes/Macintosh HD 2/Work/Puri/Pathview/KGMLfile/hsa05130.xml', 'rU'))
+    print(pathway)
+    for k, v in list(pathway.entries.items())[:20]:
+        print(v)
+    for r in list(pathway.reactions)[:20]:
+        print(r)
+    print(len(pathway.maps))
+
+    # Check relations
+    pathway = read(open('/Volumes/Macintosh HD 2/Work/Puri/Pathview/KGMLfile/hsa05130.xml', 'rU'))
+    print(pathway)
+    for k, v in list(pathway.entries.items())[:20]:
+        print(v)
+    for r in list(pathway.reactions[:20]):
+        print(r)
+    for r in list(pathway.relations[:20]):
+        print(r)
+    print(len(pathway.maps))
+
+    # Check components
+    pathway = read(open('/Volumes/Macintosh HD 2/Work/Puri/Pathview/KGMLfile/hsa05130.xml', 'rU'))
+    print(pathway)
+    for k, v in pathway.entries.items():
+        print(v)
+    print(len(pathway.maps))
+
+    # Test XML representation
+    print(pathway.get_KGML())
+
+    # Test bounds of pathway
+    print(pathway.bounds)
+'''
diff --git a/KEGGmapping/KGMLparser.pyc b/KEGGmapping/KGMLparser.pyc