Source code for pdb2pqr.biomolecule

"""The biomolecule object used in PDB2PQR and associated methods.

.. todo:: This module should be broken into separate files.

Authors:  Todd Dolinsky, Yong Huang
"""
import logging
import copy
import pprint
import string
from . import residue as residue_
from . import aa
from . import na
from . import utilities as util
from . import structures as struct
from . import pdb
from . import forcefield
from . import quatfit as quat
from .config import AA_NAMES, NA_NAMES, BONDED_SS_LIMIT, PEPTIDE_DIST


_LOGGER = logging.getLogger(__name__)


[docs]class Biomolecule(object):
    """Biomolecule class.

    This class represents the parsed PDB, and provides a hierarchy of
    information - each Biomolecule object contains a list of Chain objects as
    provided in the PDB file.  Each Chain then contains its associated list of
    Residue objects, and each Residue contains a list of Atom objects,
    completing the hierarchy.
    """

[docs]    def __init__(self, pdblist, definition):
        """Initialize using parsed PDB file

        :param pdblist:  list of objects from :mod:`pdb` from lines of PDB file
        :type pdblist:  list
        :param definition:  topology definition object
        :type definition:  Definition
        """
        self.chainmap = {}
        self.chains = []
        self.residues = []
        self.definition = definition
        self.pdblist = pdblist
        chain_dict = {}
        previous_atom = None
        residue = []
        num_models = 0
        num_chains = 1
        count = 0
        for record in pdblist:  # Find number of chains
            if isinstance(record, pdb.TER):
                num_chains += 1
        for record in pdblist:
            if isinstance(record, (pdb.ATOM, pdb.HETATM)):
                if record.chain_id == "":
                    if num_chains > 1 and record.res_name not in [
                        "WAT",
                        "HOH",
                    ]:
                        # Assign a chain ID
                        try:
                            record.chain_id = (
                                string.ascii_uppercase
                                + string.ascii_lowercase
                                + string.digits
                            )[count]
                        except IndexError:
                            raise Exception(
                                "Too many chains exist in biomolecule. "
                                "Consider preparing subsets."
                            )

                chain_id = record.chain_id
                res_seq = record.res_seq
                ins_code = record.ins_code
                if previous_atom is None:
                    previous_atom = record
                if chain_id not in chain_dict:
                    my_chain = struct.Chain(chain_id)
                    chain_dict[chain_id] = my_chain
                if (
                    res_seq != previous_atom.res_seq
                    or ins_code != previous_atom.ins_code
                    or chain_id != previous_atom.chain_id
                ):
                    my_residue = self.create_residue(
                        residue, previous_atom.res_name
                    )
                    chain_dict[previous_atom.chain_id].add_residue(my_residue)
                    residue = []
                residue.append(record)
                previous_atom = record
            elif isinstance(record, pdb.END):
                my_residue = self.create_residue(
                    residue, previous_atom.res_name
                )
                chain_dict[previous_atom.chain_id].add_residue(my_residue)
                residue = []
            elif isinstance(record, pdb.MODEL):
                num_models += 1
                if residue == []:
                    continue
                if num_models > 1:
                    my_residue = self.create_residue(
                        residue, previous_atom.res_name
                    )
                    chain_dict[previous_atom.chain_id].add_residue(my_residue)
                    break
            elif isinstance(record, pdb.TER):
                count += 1
        if residue != [] and num_models <= 1:
            my_residue = self.create_residue(residue, previous_atom.res_name)
            chain_dict[previous_atom.chain_id].add_residue(my_residue)
        # Keep a map for accessing chains via chain_id
        self.chainmap = chain_dict.copy()
        # Make a list for sequential ordering of chains
        if "" in chain_dict:
            chain_dict["ZZ"] = chain_dict[""]
            del chain_dict[""]
        keys = list(chain_dict.keys())
        keys.sort()
        for key in keys:
            self.chains.append(chain_dict[key])
        for chain in self.chains:
            for residue in chain.residues:
                self.residues.append(residue)

    @property
    def num_missing_heavy(self):
        """Return number of missing biomolecular heavy atoms in structure.

        :return:  number of missing heavy atoms in structure
        :rtype:  int
        """
        natom = 0
        for residue in self.residues:
            if not isinstance(residue, (aa.Amino, na.Nucleic)):
                continue
            residue.missing = []
            for refatomname in residue.reference.map:
                if refatomname.startswith("H"):
                    continue
                if refatomname in ["N+1", "C-1"]:
                    continue
                if (
                    refatomname in ["O1P", "O2P"]
                    and residue.has_atom("OP1")
                    and residue.has_atom("OP2")
                ):
                    continue
                if not residue.has_atom(refatomname):
                    _LOGGER.debug(f"Missing {refatomname} in {residue}")
                    natom += 1
                    residue.missing.append(refatomname)
        return natom

    @property
    def num_heavy(self):
        """Return number of biomolecular heavy atoms in structure.

        .. todo::
           Figure out if this is redundant with
           :func:`Biomolecule.num_bio_atoms`

        .. note::
           Includes hydrogens (but those are stripped off eventually)

        :return:  number of heavy atoms
        :rtype:  int
        """
        natom = 0
        for residue in self.residues:
            if not isinstance(residue, (aa.Amino, na.Nucleic)):
                continue
            for refatomname in residue.reference.map:
                if refatomname.startswith("H"):
                    continue
                if refatomname in ["N+1", "C-1"]:
                    continue
                if (
                    refatomname in ["O1P", "O2P"]
                    and residue.has_atom("OP1")
                    and residue.has_atom("OP2")
                ):
                    continue
                natom += 1
        return natom

    @property
    def reference_map(self):
        """Return definition reference map.

        :return:  definition reference map
        :rtype:  dict
        """
        _LOGGER.error(type(self.definition.map))
        return self.definition.map

    @property
    def patch_map(self):
        """Return definition patch maps.

        :return:  definition patch maps
        :rtype:  list
        """
        return self.definition.patches

    @property
    def num_bio_atoms(self):
        """Return the number of ATOM (not HETATM) records in the biomolecule.

        :return:  number of ATOM records
        :rtype:  int
        """
        return sum(1 for atom in self.atoms if atom.type == "ATOM")

[docs]    def set_hip(self):
        """Set all HIS states to HIP."""
        for residue in self.residues:
            if isinstance(residue, aa.HIS):
                self.apply_patch("HIP", residue)

[docs]    def set_termini(self, neutraln=False, neutralc=False):
        """Set the termini for a protein.

        First set all known termini by looking at the ends of the chain. Then
        examine each residue, looking for internal chain breaks.

        .. todo::  This function needs to be cleaned and simplified

        :param neutraln:  indicate whether N-terminus is neutral
        :type neutraln:  bool
        :param neutralc:  indicate whether C-terminus is neutral
        :type neutralc:  bool
        """
        # First assign the known termini
        chain = None
        for chain in self.chains:
            self.assign_termini(chain, neutraln, neutralc)
        # Now determine if there are any hidden chains
        letters = string.ascii_uppercase + string.ascii_lowercase
        ch_num = 0
        while ch_num < len(self.chains):
            chain = self.chains[ch_num]
            reslist = []
            origlist = []
            # origlist holds the original residue list for the chain
            for residue in chain.residues:
                origlist.append(residue)
            for residue in origlist:
                reslist.append(residue)
                # Look for ending termini
                fixflag = 0
                if isinstance(residue, aa.Amino):
                    if residue.has_atom("OXT") and not residue.is_c_term:
                        fixflag = 1
                elif isinstance(residue, na.Nucleic):
                    if (
                        residue.has_atom("H3T") or residue.name.endswith("3")
                    ) and not residue.is3term:
                        fixflag = 1
                if fixflag:
                    # Get an available chain ID
                    chainid = letters[0]
                    id_ = 0
                    id_length = 1
                    while chainid in self.chainmap:
                        id_ += 1
                        if id_ >= len(letters):
                            id_length += 1
                            id_ = 0
                        chainid = letters[id_] * id_length
                    if id_length > 1:
                        message = (
                            "Warning: Reusing chain id: " + chainid[0] + ""
                        )
                        _LOGGER.warning(message)
                    # Make a new chain with these residues
                    newchain = struct.Chain(chainid[0])
                    self.chainmap[chainid] = newchain
                    self.chains.insert(ch_num, newchain)
                    for res in reslist:
                        newchain.add_residue(res)
                        chain.residues.remove(res)
                        res.set_chain_id(chainid[0])
                    self.assign_termini(chain, neutraln, neutralc)
                    self.assign_termini(newchain, neutraln, neutralc)
                    reslist = []
                    ch_num += 1
            ch_num += 1
        # Update the final chain's chain_id if it is "" unless it's all water
        if "" in self.chainmap:
            notwat = 0
            for res in chain.residues:
                if not isinstance(res, aa.WAT):
                    notwat = 1
                    break
            if notwat == 0:
                _LOGGER.debug("Done setting termini.")
                return
            chain = self.chainmap[""]
            chainid = letters[0]
            id_ = 0
            id_length = 1
            while chainid in self.chainmap:
                id_ += 1
                if id_ >= len(letters):
                    id_length += 1
                    id_ = 0
                chainid = letters[id_] * id_length
            if id_length > 1:
                message = "Warning: Reusing chain id: " + chainid[0]
                _LOGGER.warning(message)
            # Use the new chain_id
            self.chainmap[chainid] = chain
            del self.chainmap[""]

            for res in chain.residues:
                res.set_chain_id(chainid[0])
        _LOGGER.debug("Done setting termini.")

[docs]    def set_states(self):
        """Set the state of each residue.

        This is the last step before assigning the forcefield, but is necessary
        so as to distinguish between various protonation states.

        See :mod:`aa` for residue-specific functions.
        """
        for residue in self.residues:
            if isinstance(residue, (aa.Amino, na.Nucleic)):
                residue.set_state()

[docs]    def add_hydrogens(self, hlist=None):
        """Add the hydrogens to the biomolecule.

        This requires either the rebuild_tetrahedral function for tetrahedral
        geometries or the standard quatfit methods.  These methods use three
        nearby bonds to rebuild the atom; the closer the bonds, the more
        accurate the results.  As such the peptide bonds are used when
        available.
        """
        count = 0
        for residue in self.residues:
            if not isinstance(residue, (aa.Amino, na.Nucleic)):
                continue

            reskey = (residue.res_seq, residue.chain_id, residue.ins_code)
            if hlist is not None and reskey in hlist:
                continue

            for atomname in residue.reference.map:
                if not atomname.startswith("H"):
                    continue
                if residue.has_atom(atomname):
                    continue
                if isinstance(residue, aa.CYS) and (
                    residue.ss_bonded and atomname == "HG"
                ):
                    continue
                if hasattr(residue, "rebuild_tetrahedral"):
                    # If this hydrogen is part of a tetrahedral group,
                    # follow a different codepath
                    if residue.rebuild_tetrahedral(atomname):
                        count += 1
                        continue
                else:
                    _LOGGER.warning(
                        "Tetrahedral hydrogen reconstruction not available "
                        "for nucleic acids. Some hydrogens may be missing (if "
                        "so, this is a bug)."
                    )
                # Otherwise use the standard quatfit methods
                coords = []
                refcoords = []
                refatomcoords = residue.reference.map[atomname].coords
                bondlist = residue.reference.get_nearest_bonds(atomname)
                for bond in bondlist:
                    if bond == "N+1":
                        atom = residue.peptide_n
                    elif bond == "C-1":
                        atom = residue.peptide_c
                    else:
                        atom = residue.get_atom(bond)
                    if atom is None:
                        continue
                    # Get coordinates, reference coordinates
                    coords.append(atom.coords)
                    refcoords.append(residue.reference.map[bond].coords)
                    # Exit if we have enough atoms
                    if len(coords) == 3:
                        break
                if len(coords) == 3:
                    newcoords = quat.find_coordinates(
                        3, coords, refcoords, refatomcoords
                    )
                    residue.create_atom(atomname, newcoords)
                    count += 1
                else:
                    _LOGGER.warning(
                        f"Couldn't rebuild {atomname} in {residue}!"
                    )
        _LOGGER.debug(f" Added {count} hydrogen atoms.")

[docs]    def set_donors_acceptors(self):
        """Set the donors and acceptors within the biomolecule."""
        for residue in self.residues:
            residue.set_donors_acceptors()

[docs]    def calculate_dihedral_angles(self):
        """Calculate dihedral angles for every residue in the biomolecule."""
        for residue in self.residues:
            if not isinstance(residue, aa.Amino):
                continue
            residue.dihedrals = []
            refangles = residue.reference.dihedrals
            for dihed in refangles:
                coords = []
                atoms = dihed.split()
                for i in range(4):
                    atomname = atoms[i]
                    if residue.has_atom(atomname):
                        coords.append(residue.get_atom(atomname).coords)
                if len(coords) == 4:
                    angle = util.dihedral(
                        coords[0], coords[1], coords[2], coords[3]
                    )
                else:
                    angle = None
                residue.add_dihedral_angle(angle)

[docs]    def set_reference_distance(self):
        """Set the distance to the CA atom in the residue.

        This is necessary for determining which atoms are allowed to move
        during rotations.  Uses the :func:`shortest_path` algorithm found in
        :mod:`utilities`.

        :raises ValueError:  if shortest path cannot be found (e.g., if the
            atoms are not connected)
        """
        for residue in self.residues:
            if not isinstance(residue, aa.Amino):
                continue
            # Initialize some variables
            map_ = {}
            caatom = residue.get_atom("CA")
            if caatom is None:
                # TODO: What does the %s mean? Is it the residue name?
                text = "Cannot set references to %s without CA atom!"
                raise ValueError(text)
            # Set up the linked map
            for atom in residue.atoms:
                map_[atom] = atom.bonds
            # Run the algorithm
            for atom in residue.atoms:
                if atom.is_backbone:
                    atom.refdistance = -1
                elif residue.is_c_term and atom.name == "HO":
                    atom.refdistance = 3
                elif residue.is_n_term and (
                    atom.name == "H3" or atom.name == "H2"
                ):
                    atom.refdistance = 2
                else:
                    path = util.shortest_path(map_, atom, caatom)
                    if path is not None:
                        atom.refdistance = len(path) - 1
                    else:
                        raise ValueError(
                            "Found gap in biomolecule structure for atom "
                            f"{atom}"
                        )

[docs]    def remove_hydrogens(self):
        """Remove hydrogens from the biomolecule."""
        for residue in self.residues:
            if not isinstance(residue, (aa.Amino, na.Nucleic)):
                continue
            for atom in residue.atoms[:]:
                if atom.is_hydrogen:
                    residue.remove_atom(atom.name)

[docs]    def assign_termini(self, chain, neutraln=False, neutralc=False):
        """Assign the termini for the given chain.

        Assignment made by looking at the start and end residues.

        :param chain:  chain of biomolecule
        :type chain:  Chain
        :param neutraln:  indicate whether to neutralize N-terminus
        :type neutraln:  bool
        :param neutralc:  indicate whether to neutralize C-terminus
        :type neutralc:  bool
        """
        if len(chain.residues) == 0:
            text = f'Error: chain "{chain.chain_id}" has 0 residues!'
            raise IndexError(text)
        # Set the N-Terminus/ 5' Terminus
        res0 = chain.residues[0]
        if isinstance(res0, aa.Amino):
            res0.is_n_term = True
            if isinstance(res0, aa.PRO) or neutraln:
                self.apply_patch("NEUTRAL-NTERM", res0)
            else:
                self.apply_patch("NTERM", res0)
        elif isinstance(res0, na.Nucleic):
            res0.is5term = True
            self.apply_patch("5TERM", res0)
        # Set the C-Terminus/ 3' Terminus
        reslast = chain.residues[-1]
        if isinstance(reslast, aa.Amino):
            reslast.is_c_term = True
            if neutralc:
                self.apply_patch("NEUTRAL-CTERM", reslast)
            else:
                self.apply_patch("CTERM", reslast)
        elif isinstance(reslast, na.Nucleic):
            reslast.is3term = True
            self.apply_patch("3TERM", reslast)
        else:
            for i in range(len(chain.residues)):
                resthis = chain.residues[-1 - i]
                if isinstance(resthis, aa.Amino):
                    resthis.is_c_term = True
                    if neutralc:
                        self.apply_patch("NEUTRAL-CTERM", resthis)
                    else:
                        self.apply_patch("CTERM", resthis)
                    break
                elif resthis.name in ["NH2", "NME"]:
                    break
                elif isinstance(resthis, na.Nucleic):
                    resthis.is3term = True
                    self.apply_patch("3TERM", resthis)
                    break

[docs]    def update_internal_bonds(self):
        """Update the internal bonding network.

        Update using the reference objects in each atom.
        """
        for residue in self.residues:
            if isinstance(residue, (aa.Amino, aa.WAT, na.Nucleic)):
                for atom in residue.atoms:
                    if not atom.has_reference:
                        continue
                    for bond in atom.reference.bonds:
                        if not residue.has_atom(bond):
                            continue
                        bondatom = residue.get_atom(bond)
                        if bondatom not in atom.bonds:
                            atom.add_bond(bondatom)

[docs]    def update_bonds(self):
        """Update the bonding network of the biomolecule.

        This happens in 3 steps:

        1. Apply the PEPTIDE patch to all Amino residues to add
           reference for the N(i+1) and C(i-1) atoms
        2. UpdateInternal_bonds for inter-residue linking
        3. Set the links to the N(i+1) and C(i-1) atoms
        """
        # Apply the peptide patch
        for residue in self.residues:
            if isinstance(residue, aa.Amino):
                if residue.is_n_term or residue.is_c_term:
                    continue
                else:
                    self.apply_patch("PEPTIDE", residue)
        # Update all internal bonds
        self.update_internal_bonds()
        # Set the peptide bond pointers
        for chain in self.chains:
            for i in range(len(chain.residues) - 1):
                res1 = chain.residues[i]
                res2 = chain.residues[i + 1]
                if not isinstance(res1, aa.Amino) or not isinstance(
                    res2, aa.Amino
                ):
                    continue
                atom1 = res1.get_atom("C")
                atom2 = res2.get_atom("N")
                if atom1 is not None:
                    res2.peptide_c = atom1
                if atom2 is not None:
                    res1.peptide_n = atom2
                if atom1 is None or atom2 is None:
                    continue
                if util.distance(atom1.coords, atom2.coords) > PEPTIDE_DIST:
                    text = (
                        f"Gap in backbone detected between {res1} and {res2}!"
                    )
                    _LOGGER.warning(text)
                    res2.peptide_c = None
                    res1.peptide_n = None

[docs]    def apply_patch(self, patchname, residue):
        """Apply a patch to the given residue.

        This is one of the key functions in PDB2PQR.  A similar function
        appears in :mod:`definitions` - that version is needed for residue
        level subtitutions so certain protonation states (i.e. CYM, HSE) are
        detectatble on input.

        This version looks up the particular patch name in the patch_map
        stored in the biomolecule, and then applies the various commands to the
        reference and actual residue structures.

        See the inline comments for a more detailed explanation.

        :param patchname:  the name of the patch
        :type patchname:  str
        :param residue:  the residue to apply the patch to
        :type residue:  Residue
        """
        if patchname not in self.patch_map:
            raise KeyError(f"Unable to find patch {patchname}!")
        _LOGGER.debug(f"PATCH INFO: {residue} patched with {patchname}")
        if patchname == "PEPTIDE":
            newreference = residue.reference
        else:
            newreference = copy.deepcopy(residue.reference)
        patch = self.patch_map[patchname]
        # Add atoms from patch
        for atomname in patch.map:
            newreference.map[atomname] = patch.map[atomname]
            for bond in patch.map[atomname].bonds:
                if bond not in newreference.map:
                    continue
                if atomname not in newreference.map[bond].bonds:
                    newreference.map[bond].bonds.append(atomname)
        # Remove atoms as directed by patch
        for remove in patch.remove:
            if remove in residue.map:
                residue.remove_atom(remove)
            if remove not in newreference.map:
                continue
            removebonds = newreference.map[remove].bonds
            del newreference.map[remove]
            for bond in removebonds:
                index = newreference.map[bond].bonds.index(remove)
                del newreference.map[bond].bonds[index]
        # Add the new dihedrals
        for dihedral_ in patch.dihedrals:
            newreference.dihedrals.append(dihedral_)
        # Point at the new reference
        residue.reference = newreference
        residue.patches.append(patchname)
        # Rename atoms as directed by patch
        for atom in residue.atoms:
            if atom.name in patch.altnames:
                residue.rename_atom(atom.name, patch.altnames[atom.name])
        # Replace each atom's reference with the new one
        for atomname in residue.map:
            if newreference.has_atom(atomname):
                atom = residue.get_atom(atomname)
                atom.reference = newreference.map[atomname]

[docs]    def update_ss_bridges(self):
        """Check and set SS-bridge partners."""
        sg_partners = {}
        for residue in self.residues:
            if isinstance(residue, aa.CYS):
                atom = residue.get_atom("SG")
                if atom is not None:
                    sg_partners[atom] = []
        for atom in sg_partners:
            for partner, value in sg_partners.items():
                if atom == partner or sg_partners[atom] != []:
                    continue
                dist = util.distance(atom.coords, partner.coords)
                if dist < BONDED_SS_LIMIT:
                    sg_partners[atom].append(partner)
                    value.append(atom)
        for atom in sg_partners:
            res1 = atom.residue
            numpartners = len(sg_partners[atom])
            if numpartners == 1:
                partner = sg_partners[atom][0]
                res2 = partner.residue
                res1.ss_bonded = True
                res1.ss_bonded_partner = partner
                self.apply_patch("CYX", res1)
                _LOGGER.debug(f"{res1} - {res2}")
            elif numpartners > 1:
                error = f"WARNING: {res1} has multiple potential "
                error += "SS-bridge partners"
                _LOGGER.warning(error)
            elif numpartners == 0:
                _LOGGER.debug(f"{res1} is a free cysteine")

[docs]    def update_residue_types(self):
        """Find the type of residue as notated in the Amino Acid definition.

        .. todo::
           Why are we setting residue types to numeric values (see code)?
        """
        for chain in self.chains:
            for residue in chain.residues:
                name = residue.name
                if name in AA_NAMES:
                    residue.type = 1
                elif name == "WAT":
                    residue.type = 3
                elif name in NA_NAMES:
                    residue.type = 4
                else:  # Residue is a ligand or unknown
                    residue.type = 2

[docs]    def apply_force_field(self, forcefield_):
        """Apply the forcefield to the atoms within the biomolecule.

        :param forcefield_:  forcefield object
        :type forcefield_:  Forcefield
        :return:  (list of atoms that were found in the forcefield,
            list of atoms that were not found in the forcefield)
        :rtype:  (list, list)
        """
        misslist = []
        hitlist = []
        for residue in self.residues:
            if isinstance(residue, (aa.Amino, aa.WAT, na.Nucleic)):
                resname = residue.ffname
            else:
                resname = residue.name
            for atom in residue.atoms:
                atomname = atom.name
                charge, radius = forcefield_.get_params(resname, atomname)
                if charge is not None and radius is not None:
                    atom.ffcharge = charge
                    atom.radius = radius
                    hitlist.append(atom)
                else:
                    misslist.append(atom)
        return hitlist, misslist

[docs]    def apply_name_scheme(self, forcefield_):
        """Apply the naming scheme of the given forcefield.

        :param forcefield_:  forcefield object
        :type forcefield_:  Forcefield
        """
        for residue in self.residues:
            if isinstance(residue, (aa.Amino, aa.WAT, na.Nucleic)):
                resname = residue.ffname
            else:
                resname = residue.name
            for atom in residue.atoms:
                rname, aname = forcefield_.get_names(resname, atom.name)
                if resname not in ["LIG", "WAT", "ACE", "NME"] and (
                    rname is not None
                ):
                    try:
                        if (residue.is_n_term or residue.is_c_term) and (
                            rname != residue.name
                        ):
                            rname = residue.name
                    except AttributeError:
                        pass
                if aname is not None and rname is not None:
                    atom.res_name = rname
                    atom.name = aname

[docs]    def apply_pka_values(self, force_field, ph, pkadic):
        """Apply calculated pKa values to assign titration states.

        :param force_field:  force field name (determines naming of residues)
        :type force_field:  str
        :param ph:  pH value
        :type ph:  float
        :param pkadic:  dictionary of pKa values for residues
        :type pkadic:  dict
        """
        _LOGGER.info(f"Applying pKa values at a pH of {ph:.2f}:")
        formatted_pkadict = pprint.pformat(pkadic)
        _LOGGER.debug(f"{formatted_pkadict}")
        for residue in self.residues:
            if not isinstance(residue, aa.Amino):
                continue
            resname = residue.name
            resnum = residue.res_seq
            chain_id = residue.chain_id
            if residue.is_n_term:
                key = f"N+  {resnum:>3} {chain_id}"
                key = key.strip()
                if key in pkadic:
                    value = pkadic[key]
                    del pkadic[key]
                    if ph >= value:
                        if force_field in [
                            "amber",
                            "charmm",
                            "tyl06",
                            "peoepb",
                            "swanson",
                        ]:
                            warn = f"N-terminal {key} neutral"
                            _LOGGER.warning(warn)
                        else:
                            self.apply_patch("NEUTRAL-NTERM", residue)
            if residue.is_c_term:
                key = f"C-  {resnum:>3} {chain_id}"
                key = key.strip()
                if key in pkadic:
                    value = pkadic[key]
                    del pkadic[key]
                    if ph < value:
                        if force_field in [
                            "amber",
                            "charmm",
                            "tyl06",
                            "peoepb",
                            "swanson",
                        ]:
                            warn = f"C-terminal {key} neutral"
                            _LOGGER.warning(warn)
                        else:
                            self.apply_patch("NEUTRAL-CTERM", residue)
            key = f"{resname:<3} {resnum:>3} {chain_id}"
            key = key.strip()
            if key in pkadic:
                value = pkadic[key]
                del pkadic[key]
                if resname == "ARG" and ph >= value:
                    if force_field == "parse":
                        self.apply_patch("AR0", residue)
                        _LOGGER.warning(
                            "Neutral arginines are very rare. Please "
                            "double-check your setup."
                        )
                    else:
                        warn = (key, "neutral")
                        _LOGGER.warning(warn)
                elif resname == "ASP" and ph < value:
                    if residue.is_c_term and force_field in [
                        "amber",
                        "tyl06",
                        "swanson",
                    ]:
                        warn = (key, "Protonated at C-Terminal")
                        _LOGGER.warning(warn)
                    elif residue.is_n_term and force_field in [
                        "amber",
                        "tyl06",
                        "swanson",
                    ]:
                        warn = (key, "Protonated at N-Terminal")
                        _LOGGER.warning(warn)
                    else:
                        self.apply_patch("ASH", residue)
                elif resname == "CYS" and ph >= value:
                    if force_field == "charmm":
                        warn = (key, "negative")
                        _LOGGER.warning(warn)
                    else:
                        self.apply_patch("CYM", residue)
                elif resname == "GLU" and ph < value:
                    if residue.is_c_term and force_field in [
                        "amber",
                        "tyl06",
                        "swanson",
                    ]:
                        warn = (key, "Protonated at C-Terminal")
                        _LOGGER.warning(warn)
                    elif residue.is_n_term and force_field in [
                        "amber",
                        "tyl06",
                        "swanson",
                    ]:
                        warn = (key, "Protonated at N-Terminal")
                        _LOGGER.warning(warn)
                    else:
                        self.apply_patch("GLH", residue)
                elif resname == "HIS" and ph < value:
                    self.apply_patch("HIP", residue)
                elif resname == "LYS" and ph >= value:
                    if force_field == "charmm":
                        warn = (key, "neutral")
                        _LOGGER.warning(warn)
                    elif (
                        force_field in ["amber", "tyl06", "swanson"]
                        and residue.is_c_term
                    ):
                        warn = (key, "neutral at C-Terminal")
                        _LOGGER.warning(warn)
                    elif force_field == "tyl06" and residue.is_n_term:
                        warn = (key, "neutral at N-Terminal")
                        _LOGGER.warning(warn)
                    else:
                        self.apply_patch("LYN", residue)
                elif resname == "TYR" and ph >= value:
                    if force_field in [
                        "charmm",
                        "amber",
                        "tyl06",
                        "peoepb",
                        "swanson",
                    ]:
                        warn = (key, "negative")
                        _LOGGER.warning(warn)
                    else:
                        self.apply_patch("TYM", residue)
        if len(pkadic) > 0:
            warn = (
                "PDB2PQR could not identify the following residues and residue"
                " numbers as returned by PROPKA or PDB2PKA"
            )
            _LOGGER.warning(warn)
            for item in pkadic:
                text = f"             {item}"
                _LOGGER.warning(text)

[docs]    def hold_residues(self, hlist):
        """Set fixed state of specified residues.

        :param hlist:  list of (res_seq, chainid, ins_code) specifying the
            residues for altering fixed state status.
        :type hlist:  [(str, str, str)]
        """
        if not hlist:
            return
        for residue in self.residues:
            reskey = (residue.res_seq, residue.chain_id, residue.ins_code)
            if reskey in hlist:
                hlist.remove(reskey)
                if isinstance(residue, aa.Amino):
                    residue.stateboolean = {"FIXEDSTATE": False}
                    _LOGGER.debug(f"Setting residue {residue} as fixed.")
                else:
                    err = f"Matched residue {residue} but not subclass of "
                    err += "Amino."
                    _LOGGER.warning(err)
        if len(hlist) > 0:
            err = (
                "The following fixed residues were not matched (possible "
                f"internal error): {hlist}."
            )
            _LOGGER.warning(err)

[docs]    def create_residue(self, residue, resname):
        """Create a residue object.

        If the resname is a known residue type, try to make that specific
        object, otherwise just make a standard residue object.

        :param residue:  a list of atoms
        :type residue:  list
        :param resname:  the name of the residue
        :type resname:  str
        :return:  the residue object
        :rtype:  Residue
        """
        try:
            refobj = self.definition.map[resname]
            if refobj.name != resname:
                try:
                    klass = getattr(aa, refobj.name)
                except AttributeError:
                    klass = getattr(na, refobj.name)
                residue = klass(residue, refobj)
                residue.reference = refobj
            else:
                try:
                    klass = getattr(aa, resname)
                except AttributeError:
                    klass = getattr(na, resname)
                residue = klass(residue, refobj)
        except (KeyError, NameError):
            _LOGGER.debug(f"Parsing {resname} as new residue")
            residue = residue_.Residue(residue)
        return residue

[docs]    def repair_heavy(self):
        """Repair all heavy atoms.

        Unfortunately the first time we get to an atom we might not be able to
        rebuild it - it might depend on other atoms to be rebuild first (think
        side chains).  As such a 'seenmap' is used to keep track of what we've
        already seen and subsequent attempts to rebuild the atom.

        :raise ValueError:  missing atoms prevent reconstruction
        """
        total_missing = self.num_missing_heavy
        if total_missing == 0:
            _LOGGER.warning("No heavy atoms need to be repaired.")
            return
        for residue in self.residues:
            if not isinstance(residue, (aa.Amino, na.Nucleic)):
                continue
            atomlist = list(residue.atoms)
            for atom in atomlist:
                atomname = atom.name
                if (
                    atomname in ["OP1", "OP2"]
                    and residue.reference.has_atom("O1P")
                    and residue.reference.has_atom("O2P")
                ):
                    continue
                if not residue.reference.has_atom(atomname):
                    _LOGGER.warning(f"Extra atom {atomname} in {residue}! - ")
                    residue.remove_atom(atomname)
                    _LOGGER.warning("Deleted this atom.")
            missing = residue.missing
            if missing == []:
                continue
            seenmap = {}
            nummissing = len(missing)
            while len(missing) > 0:
                coords = []
                refcoords = []
                atomname = missing.pop(0)
                refatomcoords = residue.reference.map[atomname].coords
                bondlist = residue.reference.get_nearest_bonds(atomname)
                for bond in bondlist:
                    if bond == "N+1":
                        atom = residue.peptide_n
                    elif bond == "C-1":
                        atom = residue.peptide_c
                    else:
                        atom = residue.get_atom(bond)
                    if atom is None:
                        continue
                    # Get coordinates, reference coordinates
                    coords.append(atom.coords)
                    refcoords.append(residue.reference.map[bond].coords)
                    # Exit if we have enough atoms
                    if len(coords) == 3:
                        break
                # We might need other atoms to be rebuilt first
                if len(coords) < 3:
                    try:
                        seenmap[atomname] += 1
                    except KeyError:
                        seenmap[atomname] = 1
                    missing.append(atomname)
                    if seenmap[atomname] > nummissing:
                        missing_str = " ".join(missing)
                        err = (
                            "Too few atoms present to reconstruct or cap "
                            f"residue {residue} in structure! This error is "
                            "generally caused by missing backbone atoms in "
                            "this biomolecule; you must use an external "
                            "program to complete gaps in the biomolecule "
                            f"backbone. Heavy atoms missing from {residue}:  "
                            f"{missing_str}"
                        )
                        raise ValueError(err)
                else:  # Rebuild the atom
                    newcoords = quat.find_coordinates(
                        3, coords, refcoords, refatomcoords
                    )
                    residue.create_atom(atomname, newcoords)
                    _LOGGER.debug(
                        f"Added {atomname} to {residue} at coordinates "
                        f"{newcoords[0]:.3f}, "
                        f"{newcoords[1]:.3f}, "
                        f"{newcoords[2]:.3f}"
                    )

[docs]    def create_html_typemap(self, definition, outfilename):
        """Create an HTML typemap file at the desired location.

        If a type cannot be found for an atom a blank is listed.

        :param definition:  the definition objects.
        :type definition:  Definition
        :param outfilename:  the name of the file to write
        :type outfilename:  str
        """
        # Cache the initial atom numbers
        numcache = {atom: atom.serial for atom in self.atoms}
        self.reserialize()
        amberff = forcefield.Forcefield("amber", definition, None)
        charmmff = forcefield.Forcefield("charmm", definition, None)
        with open(outfilename, "w") as file_:
            file_.write("<HTML>\n")
            file_.write("<HEAD>\n")
            file_.write("<TITLE>PQR Typemap (beta)</TITLE>\n")
            file_.write("</HEAD>\n")
            file_.write("<BODY>\n")
            file_.write(
                "<H3>This is a developmental page including the atom "
                "type for the atoms in the PQR file.</H3><P>\n"
            )
            file_.write("<TABLE CELLSPACING=2 CELLPADDING=2 BORDER=1>\n")
            file_.write(
                "<tr><th>Atom Number</th><th>Atom Name</th><th>Residue "
                "Name</th><th>Chain ID</th><th>AMBER Atom Type</th><th>"
                "CHARMM Atom Type</th></tr>\n"
            )
            for atom in self.atoms:
                if isinstance(atom.residue, (aa.Amino, aa.WAT, na.Nucleic)):
                    resname = atom.residue.ffname
                else:
                    resname = atom.residue.name
                ambergroup = amberff.get_group(resname, atom.name)
                charmmgroup = charmmff.get_group(resname, atom.name)
                file_.write(
                    f"<tr><td>{atom.serial}</td>"
                    f"<td>{atom.name}</td>"
                    f"<td>{resname}</td>"
                    f"<td>{atom.chain_id}</td>"
                    f"<td>{ambergroup}</td>"
                    f"<td>{charmmgroup}</td></tr>\n"
                )
            file_.write("</table>\n")
            file_.write("</BODY></HTML>\n")
        # Return the original numbers back
        for atom in self.atoms:
            atom.serial = numcache[atom]

[docs]    def reserialize(self):
        """Generate new serial numbers for atoms in the biomolecule."""
        for count, atom in enumerate(self.atoms, start=1):
            atom.serial = count

    @property
    def atoms(self):
        """Return all Atom objects in list format.

        :return:  all atom objects
        :rtype:  [Atom]
        """
        atomlist = []
        for chain in self.chains:
            for atom in chain.atoms:
                atomlist.append(atom)
        return atomlist

    @property
    def charge(self):
        """Get the total charge on the biomolecule

        .. todo::
           Since the misslist is used to identify incorrect charge
           assignments, this routine does not list the 3 and 5 termini of
           nucleic acid chains as having non-integer charge even though they
           are (correctly) non-integer.

        :return:  (list of residues with non-integer charges,
            the total charge on the biomolecule)
        :rtype:  (list, float)
        """
        charge = 0.0
        misslist = []
        for chain in self.chains:
            for residue in chain.residues:
                rescharge = residue.charge
                charge += rescharge
                if isinstance(residue, na.Nucleic) and (
                    residue.is3term or residue.is5term
                ):
                    continue
                if float(f"{int(rescharge)}") != rescharge:
                    misslist.append(residue)
        return misslist, charge

    def __str__(self):
        output = [chain.get_summary() for chain in self.chains]
        return " ".join(output)