#!/usr/bin/env python3
"""Module containing the BoxResidues class and the command line interface."""
import warnings
from pathlib import PurePath
from typing import Optional
import numpy as np
from Bio import BiopythonDeprecationWarning
from biobb_common.generic.biobb_object import BiobbObject
from biobb_common.tools import file_utils as fu
from biobb_common.tools.file_utils import launchlogger
from biobb_vs.utils.common import (
_from_string_to_list,
check_input_path,
check_output_path,
get_box_coordinates,
)
with warnings.catch_warnings():
warnings.simplefilter("ignore", BiopythonDeprecationWarning)
# try:
# import Bio.SubsMat.MatrixInfo
# except ImportError:
import Bio.Align.substitution_matrices
import Bio.pairwise2
import Bio.PDB
[docs]
class BoxResidues(BiobbObject):
"""
| biobb_vs BoxResidues
| This class sets the center and the size of a rectangular parallelepiped box around a set of residues.
| Sets the center and the size of a rectangular parallelepiped box around a selection of residues found in a given PDB. The residue identifiers that compose the selection (i.e. binding site) are provided by a property list.
Args:
input_pdb_path (str): PDB protein structure for which the box will be build. Its size and center will be set around the 'resid_list' property once mapped against this PDB. File type: input. `Sample file <https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/data/utils/input_box_residues.pdb>`_. Accepted formats: pdb (edam:format_1476).
output_pdb_path (str): PDB including the annotation of the box center and size as REMARKs. File type: output. `Sample file <https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/utils/ref_output_box_residues.pdb>`_. Accepted formats: pdb (edam:format_1476).
properties (dic - Python dictionary object containing the tool parameters, not input/output files):
* **resid_list** (*list*) - (None) List with all the residue numbers to form a cavity or binding site. Mandatory property.
* **offset** (*float*) - (2.0) [0.1~1000|0.1] Extra distance (Angstroms) between the last residue atom and the box boundary.
* **box_coordinates** (*bool*) - (False) Add box coordinates as 8 ATOM records.
* **residue_offset** (*int*) - (0) [0~1000|1] Residue id offset.
* **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
* **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
* **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
Examples:
This is a use example of how to use the building block from Python::
from biobb_vs.utils.box_residues import box_residues
prop = {
'resid_list': [718, 743, 745, 762, 766, 796, 790, 791, 793, 794, 788],
'offset': 2,
'box_coordinates': True
}
box_residues(input_pdb_path='/path/to/myStructure.pdb',
output_pdb_path='/path/to/newBox.pdb',
properties=prop)
Info:
* wrapped_software:
* name: In house using Biopython
* version: >=1.76
* license: Apache-2.0
* ontology:
* name: EDAM
* schema: http://edamontology.org/EDAM.owl
"""
def __init__(
self, input_pdb_path, output_pdb_path, properties=None, **kwargs
) -> None:
properties = properties or {}
# Call parent class constructor
super().__init__(properties)
self.locals_var_dict = locals().copy()
# Input/Output files
self.io_dict = {
"in": {"input_pdb_path": input_pdb_path},
"out": {"output_pdb_path": output_pdb_path},
}
# Properties specific for BB
self.resid_list = _from_string_to_list(properties.get("resid_list", []))
self.offset = float(properties.get("offset", 2.0))
self.box_coordinates = float(properties.get("box_coordinates", False))
self.residue_offset = properties.get("residue_offset", 0)
self.properties = properties
# Check the properties
self.check_properties(properties)
self.check_arguments()
[docs]
def check_data_params(self, out_log, err_log):
"""Checks all the input/output paths and parameters"""
self.io_dict["in"]["input_pdb_path"] = check_input_path(
self.io_dict["in"]["input_pdb_path"],
"input_pdb_path",
self.out_log,
self.__class__.__name__,
)
self.io_dict["out"]["output_pdb_path"] = check_output_path(
self.io_dict["out"]["output_pdb_path"],
"output_pdb_path",
False,
self.out_log,
self.__class__.__name__,
)
[docs]
@launchlogger
def launch(self) -> int:
"""Execute the :class:`BoxResidues <utils.box_residues.BoxResidues>` utils.box_residues.BoxResidues object."""
# check input/output paths and parameters
self.check_data_params(self.out_log, self.err_log)
# Setup Biobb
if self.check_restart():
return 0
self.stage_files()
# Parse structure
fu.log(
"Loading input PDB structure %s" % (self.io_dict["in"]["input_pdb_path"]),
self.out_log,
self.global_log,
)
structure_name = PurePath(self.io_dict["in"]["input_pdb_path"]).name
parser = Bio.PDB.PDBParser(QUIET=True)
structPDB = parser.get_structure(
structure_name, self.io_dict["in"]["input_pdb_path"]
)
if len(structPDB):
structPDB = structPDB[0]
# Mapping residue structure into input structure
fu.log(
"Mapping residue structure into input structure",
self.out_log,
self.global_log,
)
# Listing residues to be selected from the residue structure
residPDB_res_list = []
for residPDB_res in self.resid_list:
if self.residue_offset:
residPDB_res_list.append((" ", residPDB_res + self.residue_offset, " "))
else:
residPDB_res_list.append((" ", residPDB_res, " "))
selection_res_list = []
selection_atoms_num = 0
for struct_chain in structPDB:
for struct_res in struct_chain:
if struct_res.get_id() in residPDB_res_list:
selection_res_list.append(struct_res)
selection_atoms_num += len(struct_res.get_list())
if len(selection_res_list) == 0:
fu.log(
self.__class__.__name__ + ": Cannot match any of the residues listed in [%s] into %s"
% (
", ".join(str(v) for v in self.resid_list),
self.io_dict["in"]["input_pdb_path"],
),
self.out_log,
)
raise SystemExit(
self.__class__.__name__ + ": Cannot match any of the residues listed in [%s] into %s"
% (
", ".join(str(v) for v in self.resid_list),
self.io_dict["in"]["input_pdb_path"],
)
)
elif len(selection_res_list) != len(residPDB_res_list):
fu.log(
"Cannot match all the residues listed in %s into %s. Found %s out of %s"
% (
", ".join(str(v) for v in self.resid_list),
self.io_dict["in"]["input_pdb_path"],
len(selection_res_list),
len(residPDB_res_list),
),
self.out_log,
)
else:
fu.log(
"Selection of residues successfully matched",
self.out_log,
self.global_log,
)
# Compute binding site box size
# compute box center
selection_box_center = (
sum(atom.coord for res in selection_res_list for atom in res.get_atoms()) / selection_atoms_num
)
fu.log(
"Binding site center (Angstroms): %10.3f%10.3f%10.3f"
% (
selection_box_center[0],
selection_box_center[1],
selection_box_center[2],
),
self.out_log,
self.global_log,
)
# compute box size
selection_coords_max = np.amax(
[atom.coord for res in selection_res_list for atom in res.get_atoms()],
axis=0,
)
selection_box_size = selection_coords_max - selection_box_center
if self.offset:
selection_box_size = [c + self.offset for c in selection_box_size]
fu.log(
"Binding site size (Angstroms): %10.3f%10.3f%10.3f"
% (selection_box_size[0], selection_box_size[1], selection_box_size[2]),
self.out_log,
self.global_log,
)
# compute volume
vol = np.prod(selection_box_size) * 2**3
fu.log("Volume (cubic Angstroms): %.0f" % (vol), self.out_log, self.global_log)
# add box details as PDB remarks
remarks = "REMARK BOX CENTER:%10.3f%10.3f%10.3f" % (
selection_box_center[0],
selection_box_center[1],
selection_box_center[2],
)
remarks += " SIZE:%10.3f%10.3f%10.3f" % (
selection_box_size[0],
selection_box_size[1],
selection_box_size[2],
)
selection_box_coords_txt = ""
# add (optional) box coordinates as 8 ATOM records
if self.box_coordinates:
fu.log("Adding box coordinates", self.out_log, self.global_log)
selection_box_coords_txt = get_box_coordinates(
selection_box_center, selection_box_size
)
with open(self.io_dict["out"]["output_pdb_path"], "w") as f:
f.seek(0, 0)
f.write(remarks.rstrip("\r\n") + "\n" + selection_box_coords_txt)
fu.log(
"Saving output PDB file (with box setting annotations): %s"
% (self.io_dict["out"]["output_pdb_path"]),
self.out_log,
self.global_log,
)
# Copy files to host
self.copy_to_host()
self.remove_tmp_files()
return 0
[docs]
def box_residues(
input_pdb_path: str,
output_pdb_path: str,
properties: Optional[dict] = None,
**kwargs,
) -> int:
"""Create the :class:`BoxResidues <utils.box_residues.BoxResidues>` class and
execute the :meth:`launch() <utils.box_residues.BoxResidues.launch>` method."""
return BoxResidues(**dict(locals())).launch()
box_residues.__doc__ = BoxResidues.__doc__
main = BoxResidues.get_main(box_residues, "Sets the center and the size of a rectangular parallelepiped box around a selection of residues found in a given PDB.")
if __name__ == "__main__":
main()