diff --git a/pdbfixer/pdbfixer.py b/pdbfixer/pdbfixer.py index d1ac03e..1e376fc 100644 --- a/pdbfixer/pdbfixer.py +++ b/pdbfixer/pdbfixer.py @@ -389,7 +389,7 @@ def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules): # Create the new residue and add existing heavy atoms. - newResidue = newTopology.addResidue(residue.name, newChain, residue.id) + newResidue = newTopology.addResidue(residue.name, newChain, residue.id, residue.insertionCode) for atom in residue.atoms(): if not heavyAtomsOnly or (atom.element is not None and atom.element != hydrogen): if atom.name == 'OXT' and (chain.index, indexInChain+1) in self.missingResidues: @@ -510,7 +510,7 @@ def _addMissingResiduesToChain(self, chain, residueNames, startPosition, endPosi # Create the new residue. - newResidue = chain.topology.addResidue(residueName, chain, "%d" % ((firstIndex+i)%10000)) + newResidue = chain.topology.addResidue(residueName, chain, "%d" % (firstIndex+i)) fraction = (i+1.0)/(numResidues+1.0) translate = startPosition + (endPosition-startPosition)*fraction + loopHeight*math.sin(fraction*math.pi)*loopDirection templateAtoms = list(template.topology.atoms()) @@ -569,7 +569,7 @@ def removeChains(self, chainIndices=None, chainIds=None): return - def findMissingResidues(self): + def findMissingResidues(self,chainWithGapsOverride=None): """Find residues that are missing from the structure. The results are stored into the missingResidues field, which is a dict. Each key is a tuple consisting of @@ -587,13 +587,33 @@ def findMissingResidues(self): chains = [c for c in self.topology.chains() if len(list(c.residues())) > 0] chainWithGaps = {} + # This is PDBFixer's best guess for what might appear in a SEQRES + + knownResidues = set(self.templates.keys()) | set(substitutions.keys()) + for s in self.sequences: + knownResidues.update(s.residues) + # Find the sequence of each chain, with gaps for missing residues. for chain in chains: - minResidue = min(int(r.id) for r in chain.residues()) - maxResidue = max(int(r.id) for r in chain.residues()) - residues = [None]*(maxResidue-minResidue+1) + if chainWithGapsOverride: + if chain.id in chainWithGapsOverride: + chainWithGaps[chain] = chainWithGapsOverride[chain.id] + continue + + seqresResidues = [] for r in chain.residues(): + if r.name in knownResidues: + seqresResidues.append(r) + else: + # assume that everything that follows is a ligand/water + break + + if len(seqresResidues) == 0: continue + minResidue = min(int(r.id) for r in seqresResidues) + maxResidue = max(int(r.id) for r in seqresResidues) + residues = [None]*(maxResidue-minResidue+1) + for r in seqresResidues: residues[int(r.id)-minResidue] = r.name chainWithGaps[chain] = residues @@ -607,6 +627,8 @@ def findMissingResidues(self): continue if chain in chainSequence: continue + if chain not in chainWithGaps: + continue for offset in range(len(sequence.residues)-len(chainWithGaps[chain])+1): if all(a == b or b == None for a,b in zip(sequence.residues[offset:], chainWithGaps[chain])): chainSequence[chain] = sequence