-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit c62c7dd
Showing
178 changed files
with
26,967 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Readme.docx | ||
Readme.pdf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/bin/env Rscript | ||
|
||
library(argparser, quietly = T) | ||
|
||
|
||
p = arg_parser("Comparae CN (Coyp Number) of any paired samples. | ||
Input tsv: | ||
ID chrom loc.start loc.end seg.mean | ||
TCGA-06-0126-01A-01 1 554267 639580 0.9002 | ||
The header is mandatory. The names of header are also mandatory. Order of columns is non-mandatory. | ||
") | ||
p = add_argument(p, "--pdf", help = "[PDF] Output cluster") | ||
argv = parse_args(p) | ||
|
||
sink(stderr()) | ||
cat(paste0('[DEBUG] ', Sys.time(), ' Check if the following variables are correct as expected:\n')) | ||
str(argv) | ||
cat('\n') | ||
|
||
library(CNTools, quietly = T) | ||
|
||
data = read.delim(file('stdin'), header = T, check.names = F) | ||
cnseg <- CNSeg(data) | ||
rdseg <- getRS(cnseg, by = "region", imput = FALSE, XY = FALSE, what = "median") | ||
reducedseg <- rs(rdseg) | ||
|
||
if(!is.na(argv$pdf)){ | ||
pdf(argv$pdf) | ||
hc = hclust(getDist(rdseg, method = "euclidian"), method = "complete") | ||
plot(hc, hang = -1, cex = 0.8, main = "", xlab = "", ylab = "", sub = "") | ||
} | ||
|
||
|
||
sink() | ||
write.table(reducedseg, stdout(), sep = "\t", quote = F, row.names = F) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#!/usr/bin/env python | ||
|
||
import os | ||
import argparse | ||
import sys | ||
|
||
parser = argparse.ArgumentParser(description = 'GO mapping (Gene Symbol list)') | ||
help_desc = "Input Symbol list" | ||
parser.add_argument("-i", help=help_desc, required=True) | ||
help_desc = "GO file" | ||
parser.add_argument("-G", help=help_desc, required=True) | ||
help_desc = "ID convert file(NCBI gene info)" | ||
parser.add_argument("-C", help=help_desc, required=True) | ||
|
||
args = parser.parse_args() | ||
|
||
Symbolfile = args.i | ||
GOfile = args.G | ||
convertfile = args.C | ||
|
||
GOdict = {} | ||
with open(GOfile) as data: | ||
field = data.next().strip().split('\t') | ||
for line in data: | ||
line = line.strip().split('\t') | ||
items = zip(field, line) | ||
item = {} | ||
for (name, value) in items: | ||
item[name] = value | ||
GeneID = item['GeneID'] | ||
if GeneID in GOdict: | ||
if item['Category'] == 'Component': | ||
GOdict[GeneID]['Component'].add(item['GO_term']) | ||
elif item['Category'] == 'Process': | ||
GOdict[GeneID]['Process'].add(item['GO_term']) | ||
elif item['Category'] == 'Function': | ||
GOdict[GeneID]['Function'].add(item['GO_term']) | ||
else: | ||
GOdict[GeneID] = {} | ||
GOdict[GeneID]['Component'] = set() | ||
GOdict[GeneID]['Process'] = set() | ||
GOdict[GeneID]['Function'] = set() | ||
if item['Category'] == 'Component': | ||
GOdict[GeneID]['Component'].add(item['GO_term']) | ||
elif item['Category'] == 'Process': | ||
GOdict[GeneID]['Process'].add(item['GO_term']) | ||
elif item['Category'] == 'Function': | ||
GOdict[GeneID]['Function'].add(item['GO_term']) | ||
|
||
|
||
|
||
convertDict = {} | ||
with open(convertfile) as convert: | ||
convert.next() | ||
for line in convert: | ||
line = line.strip().split('\t') | ||
convertDict[line[1]] = set([line[2]]+line[4].split('|')) | ||
|
||
|
||
genelst = [x.strip() for x in open(Symbolfile)] | ||
|
||
geneDict = {} | ||
for gene in genelst: | ||
for geneID in convertDict: | ||
if gene in convertDict[geneID]: | ||
geneDict[gene] = geneID | ||
|
||
sys.stdout.write('\t'.join(['Gene','GO_Function','GO_Component','GO_Process'])+'\n') | ||
for gene in genelst: | ||
if geneDict[gene] in GOdict: | ||
sys.stdout.write('\t'.join([gene, ';'.join(GOdict[geneDict[gene]]['Function']), ';'.join(GOdict[geneDict[gene]]['Component']), ';'.join(GOdict[geneDict[gene]]['Process'])])+'\n') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
def Symbol2ID_dict(convertfile): | ||
convertDict = {} | ||
with open(convertfile) as convert: | ||
convert.next() | ||
for line in convert: | ||
line = line.strip().split('\t') | ||
convertDict[line[2]] = line[1] | ||
return convertDict | ||
|
||
def ID2Symbol_dict(convertfile): | ||
convertDict = {} | ||
with open(convertfile) as convert: | ||
convert.next() | ||
for line in convert: | ||
line = line.strip().split('\t') | ||
convertDict[line[1]] = set([line[2]]+line[4].split('|')) | ||
return convertDict |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
# -*- coding: utf-8 -*- | ||
# Copyright 2013 by Leighton Pritchard. All rights reserved. | ||
# This code is part of the Biopython distribution and governed by its | ||
# license. Please see the LICENSE file that should have been included | ||
# as part of this package. | ||
|
||
"""This module provides classes and functions to parse a KGML pathway map. | ||
The KGML pathway map is parsed into the object structure defined in | ||
KGML_Pathway.py in this module. | ||
Classes: | ||
- KGMLParser Parses KGML file | ||
Functions: | ||
- read Returns a single Pathway object, using KGMLParser | ||
internally | ||
""" | ||
|
||
from __future__ import print_function | ||
|
||
try: | ||
import xml.etree.cElementTree as ElementTree | ||
except ImportError: | ||
import xml.etree.ElementTree as ElementTree | ||
|
||
|
||
from Bio._py3k import StringIO | ||
|
||
from KGMLpathway import * | ||
|
||
|
||
def read(handle, debug=0): | ||
"""Parses a single KEGG Pathway from given file handle. | ||
Returns a single Pathway object. There should be one and only | ||
one pathway in each file, but there may well be pathological | ||
examples out there. | ||
""" | ||
iterator = parse(handle, debug) | ||
try: | ||
first = next(iterator) | ||
except StopIteration: | ||
first = None | ||
if first is None: | ||
raise ValueError("No pathways found in handle") | ||
try: | ||
second = next(iterator) | ||
except StopIteration: | ||
second = None | ||
if second is not None: | ||
raise ValueError("More than one pathway found in handle") | ||
return first | ||
|
||
|
||
def parse(handle, debug=0): | ||
"""Returns an iterator over Pathway elements. | ||
Arguments: | ||
- handle - file handle to a KGML file for parsing | ||
- debug - integer for amount of debug information to print | ||
This is a generator for the return of multiple Pathway objects. | ||
""" | ||
# Check handle | ||
if not hasattr(handle, 'read'): | ||
if isinstance(handle, str): | ||
handle = StringIO(handle) | ||
else: | ||
exc_txt = "An XML-containing handle or an XML string " +\ | ||
"must be provided" | ||
raise Exception(exc_txt) | ||
# Parse XML and return each Pathway | ||
for event, elem in \ | ||
ElementTree.iterparse(handle, events=('start', 'end')): | ||
if event == "end" and elem.tag == "pathway": | ||
yield KGMLParser(elem).parse() | ||
elem.clear() | ||
|
||
|
||
class KGMLParser(object): | ||
"""Parses a KGML XML Pathway entry into a Pathway object.""" | ||
|
||
def __init__(self, elem): | ||
self.entry = elem | ||
|
||
def parse(self): | ||
"""Parse the input elements.""" | ||
|
||
def _parse_pathway(attrib): | ||
for k, v in attrib.items(): | ||
self.pathway.__setattr__(k, v) | ||
|
||
def _parse_entry(element): | ||
new_entry = Entry() | ||
for k, v in element.attrib.items(): | ||
new_entry.__setattr__(k, v) | ||
for subelement in element.getchildren(): | ||
if subelement.tag == 'graphics': | ||
_parse_graphics(subelement, new_entry) | ||
elif subelement.tag == 'component': | ||
_parse_component(subelement, new_entry) | ||
self.pathway.add_entry(new_entry) | ||
|
||
def _parse_graphics(element, entry): | ||
new_graphics = Graphics(entry) | ||
for k, v in element.attrib.items(): | ||
new_graphics.__setattr__(k, v) | ||
entry.add_graphics(new_graphics) | ||
|
||
def _parse_component(element, entry): | ||
new_component = Component(entry) | ||
for k, v in element.attrib.items(): | ||
new_component.__setattr__(k, v) | ||
entry.add_component(new_component) | ||
|
||
def _parse_reaction(element): | ||
new_reaction = Reaction() | ||
for k, v in element.attrib.items(): | ||
new_reaction.__setattr__(k, v) | ||
for subelement in element.getchildren(): | ||
if subelement.tag == 'substrate': | ||
new_reaction.add_substrate(int(subelement.attrib['id'])) | ||
elif subelement.tag == 'product': | ||
new_reaction.add_product(int(subelement.attrib['id'])) | ||
self.pathway.add_reaction(new_reaction) | ||
|
||
def _parse_relation(element): | ||
new_relation = Relation() | ||
new_relation.entry1 = int(element.attrib['entry1']) | ||
new_relation.entry2 = int(element.attrib['entry2']) | ||
new_relation.type = element.attrib['type'] | ||
for subtype in element.getchildren(): | ||
name, value = subtype.attrib['name'], subtype.attrib['value'] | ||
if name in ('compound', 'hidden compound'): | ||
new_relation.subtypes.append((name, int(value))) | ||
else: | ||
new_relation.subtypes.append((name, value)) | ||
self.pathway.add_relation(new_relation) | ||
|
||
# ========== | ||
# Initialise Pathway | ||
self.pathway = Pathway() | ||
# Get information about the pathway itself | ||
_parse_pathway(self.entry.attrib) | ||
for element in self.entry.getchildren(): | ||
if element.tag == 'entry': | ||
_parse_entry(element) | ||
elif element.tag == 'reaction': | ||
_parse_reaction(element) | ||
elif element.tag == 'relation': | ||
_parse_relation(element) | ||
# Parsing of some elements not implemented - no examples yet | ||
else: | ||
# This should warn us of any unimplemented tags | ||
import warnings | ||
from Bio import BiopythonParserWarning | ||
warnings.warn("Warning: tag %s not implemented in parser" % | ||
element.tag, BiopythonParserWarning) | ||
return self.pathway | ||
|
||
|
||
if __name__ == '__main__': | ||
pathway = read(open('/Volumes/Macintosh HD 2/Work/Puri/Pathview/KGMLfile/hsa05130.xml', 'rU')) | ||
for gene in pathway.genes: | ||
# print ([x.split(':')[1] for x in gene._names]) | ||
for g in gene.graphics: | ||
g._setfgcolor('#000001') | ||
for gene in pathway.genes: | ||
for g in gene.graphics: | ||
|
||
print (g.fgcolor) | ||
|
||
|
||
''' | ||
# Check large metabolism | ||
pathway = read(open('/Volumes/Macintosh HD 2/Work/Puri/Pathview/KGMLfile/hsa05130.xml', 'rU')) | ||
print(pathway) | ||
for k, v in list(pathway.entries.items())[:20]: | ||
print(v) | ||
for r in list(pathway.reactions)[:20]: | ||
print(r) | ||
print(len(pathway.maps)) | ||
# Check relations | ||
pathway = read(open('/Volumes/Macintosh HD 2/Work/Puri/Pathview/KGMLfile/hsa05130.xml', 'rU')) | ||
print(pathway) | ||
for k, v in list(pathway.entries.items())[:20]: | ||
print(v) | ||
for r in list(pathway.reactions[:20]): | ||
print(r) | ||
for r in list(pathway.relations[:20]): | ||
print(r) | ||
print(len(pathway.maps)) | ||
# Check components | ||
pathway = read(open('/Volumes/Macintosh HD 2/Work/Puri/Pathview/KGMLfile/hsa05130.xml', 'rU')) | ||
print(pathway) | ||
for k, v in pathway.entries.items(): | ||
print(v) | ||
print(len(pathway.maps)) | ||
# Test XML representation | ||
print(pathway.get_KGML()) | ||
# Test bounds of pathway | ||
print(pathway.bounds) | ||
''' |
Binary file not shown.
Oops, something went wrong.