-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMET_Stop_Codon_Library_InversePCR.py
159 lines (130 loc) · 6.5 KB
/
MET_Stop_Codon_Library_InversePCR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# MET_Stop_Codon_Library_InversePCR.py
# G.Estevam
'''
- The The goal of this code is to produce STOP codon oligo library for staturation mutagnesis of the MET kinase domain
- The code is written to create a TGA oligo library for inverse PCR, with oligos having a matching(ish) Tm
- The script takes an input gene file, where the gene to be mutated is uppercase, and the flanking regions are lowercase
- Primers are created with a starting length of 40, with a NNK subsition per codon position
- If the primers have a Tm within the min/max Tm and have lengths within the min/max lengths, the oligos are saved
- Otherwise, nts are added or subtracted until it meets the requirements listed in the point above
'''
'''
EXPECTED:
5 gcaaaatactgtccacattgacctcagtgctctaAATCCAGAGCTGGTCCAGGCAGTGCAGCATGTAGTGATTGGGCCCAGTAGCCTGATT 3
||||||||||||||||||||||||||||
<- 3 tatgacaggtgtaactggagtcacgagat 5 Reverse Primer
5 NNKCCAGAGCTGGTCCAGGCAGTGCAGCATGTAGTGATT 3 -> Forward Primer
||||||||||||||||||||||||||||||||||||||
3 cgttttatgacaggtgtaactggagtcacgagatTTAGGTCTCGACCAGGTCCGTCACGTCGTACATCACTAACCCGGGTCATCGGACTAA 5
'''
''' code start '''
import os
import sys
import math
import re
import csv
from Bio.SeqUtils import MeltingTemp as mt
''' global variables '''
sequence = open(sys.argv[1], "r")
seq = sequence.read()
protein_gene = ''.join([nt for nt in seq if nt.istitle()]) # uppercase sequence/ gene
a = re.search('[A-Z]', seq).start() # gene uppercase
gene = seq[a:] # gene + 3' lowercase flank
seq_length = len(seq)//3
Stop = 'TGA'
max_primer_length = 51
min_primer_length = 30
max_Tm = 63
min_Tm = 61
gene_codons = len(gene)//3
protein_gene_codons = len(protein_gene)//3
forward_primers = []
reverse_primers = []
''' functions '''
# complement of entire input sequence
def complement(seq):
complement = {'a':'t', 'c':'g', 't':'a', 'g':'c','A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
return ''.join([complement[base] for base in seq])
# reverse complement of input sequence
# MissyEliot
# FlipItAndReverseIt
def reverse_complement(seq):
complement = {'a':'t', 'c':'g', 't':'a', 'g':'c','A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
return ''.join([complement[base] for base in seq[::-1]])
# create primers for each codon substition at each position of the gene
# these are the forward primers, 5' - 3' direction, complementry to sense strand
def create_inverse_PCR_primer(seq, gene, gene_codons, protein_gene, NNK):
for position in range(0, len(gene),36):
codon = gene[position:position + 3]
if codon.isupper():
primer_length = 37
primer_flank = gene[position+3 : position+primer_length]
primer_name = "%d_FW_MET_KD_%s_mut" % ((position/3)+1,codon)
primer = '%s%s' % (Stop, primer_flank)
Tm = mt.Tm_GC(primer_flank) # Tm_GC module uses empirical formulas based on GC content
if Tm < min_Tm:
while Tm < min_Tm and len(primer) >= min_primer_length and len(primer) < max_primer_length:
primer_length += 1
primer_flank = gene[position+3 : position+primer_length]
Tm = mt.Tm_GC(primer_flank)
primer_name = "%d_FW_MET_KD_%s_mut" % ((position/3)+1,codon)
primer = '%s%s' % (Stop, primer_flank)
if Tm > max_Tm:
while Tm > max_Tm and len(primer) >= min_primer_length and len(primer) <= max_primer_length:
primer_length -= 1
primer_flank = gene[position+3 : position+primer_length]
Tm = mt.Tm_GC(primer_flank)
primer_name = "%d_FW_MET_KD_%s_mut" % ((position/3)+1,codon)
primer = '%s%s' % (Stop, primer_flank)
print (primer_name, primer, Tm)
fw_oligos = [primer_name, primer, Tm]
forward_primers.append(fw_oligos)
return forward_primers,primer_name, primer, Tm
# create reverse primers for each codon substition
# these are reverse complement primers, 5' - 3' direction, complementry to antisense strand
def create_reverse_primer (seq, gene, gene_codons, protein_gene):
i = 1
for position in range(0, len(seq),36):
codon = seq[position:position + 3]
if codon.isupper():
codon = position
reverse_primer_length = 30
reverse_primer_name = "%d_RV_MET_KD_%s_mut" % (i,seq[codon:codon+3])
reverse_primer = reverse_complement(seq[codon-reverse_primer_length : codon])
rv_Tm = mt.Tm_GC(reverse_primer)
if rv_Tm < min_Tm:
while rv_Tm < min_Tm and len(reverse_primer) >= min_primer_length and len(reverse_primer) < max_primer_length:
reverse_primer_length += 1
reverse_primer_name = "%d_RV_MET_KD_%s_mut" % (i,seq[codon:codon+3])
reverse_primer = reverse_complement(seq[codon-reverse_primer_length : codon])
rv_Tm = mt.Tm_GC(reverse_primer)
if rv_Tm > max_Tm:
while rv_Tm > max_Tm and len(reverse_primer) >= min_primer_length and len(reverse_primer) <= max_primer_length:
reverse_primer_length -= 1
reverse_primer_name = "%d_RV_MET_KD_%s_mut" % (i,seq[codon:codon+3])
reverse_primer = reverse_complement(seq[codon-reverse_primer_length : codon])
rv_Tm = mt.Tm_GC(reverse_primer)
i += 1
print (reverse_primer_name, reverse_primer, rv_Tm)
rv_oligos = [reverse_primer_name, reverse_primer, rv_Tm]
reverse_primers.append(rv_oligos)
return reverse_primers, reverse_primer_name, reverse_primer, rv_Tm
# create output file
def create_output_file (forward_primers,reverse_primers):
fw_file = open('MET_KD_forward_Stop_primer_library.csv', 'w')
writer = csv.writer(fw_file)
writer.writerow(['Forward Primer Name ','Stop Primer ', 'Tm'])
writer.writerows(forward_primers)
rv_file = open('MET_KD_reverse_Stop_primer_library.csv', 'w')
writer2 = csv.writer(rv_file)
writer2.writerow(['Reverse Primer Name ','Stop Primer ', 'Tm'])
writer2.writerows(reverse_primers)
''' call functions and print outputs '''
#print (gene)
#print (reverse_complement(seq))
#print (complement(seq))
create_inverse_PCR_primer(seq, gene, gene_codons, protein_gene, Stop)
create_reverse_primer (seq, gene, gene_codons, protein_gene)
print (seq)
print (len(gene))
create_output_file(forward_primers,reverse_primers)