forlilab · maylinnp · May 22, 2025 · Mar 14, 2025 · Mar 14, 2025 · Mar 14, 2025
diff --git a/README.md b/README.md
@@ -29,6 +29,16 @@ at [Scripps Research](https://www.scripps.edu/).
 ### Documentation
 In-depth documentation can be found on [ReadTheDocs](https://ringtail.readthedocs.io).
 
+
+
+### Version 2.2.0: enhancements and bug fixes
+- New method added to enable exporting receptor pdb with flexible residues given ligand (currently for API only)
+- During results writing: check to ensure files provided with `file` option, and directories provided with `file_path` option
+- Multiprocess removed from clustering methods, speeding up the process significantly
+- Minor sqlite query updates for enhanced query speed, such as replacing `LIKE` with `=` where possible when matching text
+- Initializing ringtail with full path to a database could result in issues exporting SDFs to specified folder, this has been fixed
+- Clustering without filtering could result in error, this has been fixed
+
 ### Version 2.1.2 bug fixes
 - Removing of union operand that made Ringtail incompatible with python=3.9
 - Pymol now displays receptor if present in database

diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -241,6 +241,27 @@ It is also possible to write a database query and export the results of the quer
     query_string = "SELECT docking_score, leff, Pose_ID, LigName FROM Results"
     rtc.export_csv(requested_data = query_string, csv_name = "query_results.csv", table = False)
 
+Exporting flexible residues (as PDB) of a receptor given a docked ligand
+========================================================================
+When processing docking data for a receptor docked with flexible residues, it might be of interest to produce a PDB of the flexible residues for a given docked ``ligand``. This is currently possible using the following method (only through API).
+The user needs to provide a meeko.Polymer object, and the ligand has to be part of a filtered bookmark ``bookmark_name``. Finally, a file output name must be provided, and the extension ``.pdb`` will be added if not provided. 
+The meeko.Polymer object available with meeko>=0.6.1. The polymer is a deserialized version of the receptor/polymer json which can be produced by the method ``mk_prepare_receptor.py`` with the flag ``--write_json`` (see the `meeko documentation <https://meeko.readthedocs.io/en/release-doc/rec_overview.html>`_ for more details on how to prepare the receptor).
+
+.. code-block:: python 
+
+    rtc.write_flexres_pdb(receptor_polymer = polymer, ligname = "best_ligand", filename = "flexres_receptor", bookmark_name = "eworst6")
+
+    #results in creation of flexres_receptor.pdb
+
+A polymer can then be prepared using the following code snippet, given ``receptor.json``:
+
+.. code-block:: python 
+
+    with open("recptor.json") as f:
+        json_string = f.read()
+    polymer = meeko.Polymer.from_json(json_string)
+
+
 Creating a new database from a bookmark
 =======================================
 A bookmark may also be exported as a separate SQLite dabase with the ``export_bookmark_db`` method. This will create a database of name ``<current_db_name>_<bookmark_name>.db``. This is currently only possible if using SQLite.

diff --git a/docs/source/changes.rst b/docs/source/changes.rst
@@ -3,6 +3,21 @@
 Changes in Ringtail
 ######################
 
+
+Changes in 2.2.0: new API method for flex res receptor export and bug fixes
+**************************************************************************
+Enhancements
+============
+* New method added to enable exporting receptor pdb with flexible residues given ligand (currently for API only)
+* During results writing: check to ensure files provided with `file` option, and directories provided with `file_path` option
+* Multiprocess removed from clustering methods, speeding up the process significantly
+* Minor sqlite query updates for enhanced query speed, such as replacing `LIKE` with `=` where possible when matching text
+
+Bug fixes
+==========
+* Initializing ringtail with full path to a database could result in issues exporting SDFs to specified folder, this has been fixed
+* Clustering without filtering could result in error, this has been fixed
+
 Changes in 2.1.2: bug fixes
 ****************************
 * Removing of union operand that made Ringtail incompatible with python=3.9

diff --git a/ringtail/mpmanager.py b/ringtail/mpmanager.py
@@ -12,7 +12,7 @@
 from .mpreaderwriter import DockingFileReader
 from .mpreaderwriter import Writer
 from .logutils import LOGGER
-from .exceptions import MultiprocessingError, RTCoreError
+from .exceptions import MultiprocessingError, RTCoreError, OptionError
 import traceback
 from datetime import datetime
 
@@ -157,22 +157,32 @@ def _process_data_sources(self):
             if self.file_sources.file != (None and [[]]):
                 for file_list in self.file_sources.file:
                     for file in file_list:
-                        if (
-                            fnmatch.fnmatch(file, self.file_pattern)
-                            and file != self.receptor_file
-                        ):
-                            self._add_to_queue(file)
+                        if os.path.isfile(file):
+                            if (
+                                fnmatch.fnmatch(file, self.file_pattern)
+                                and file != self.receptor_file
+                            ):
+                                self._add_to_queue(file)
+                        else:
+                            raise OptionError(
+                                f"The file {file} is not a a file (or does not exist), and will not be processed."
+                            )
 
             # add files from file path(s)
             if self.file_sources.file_path != (None and [[]]):
                 for path_list in self.file_sources.file_path:
                     for path in path_list:
                         # scan for ligand dlgs
-                        for files in self._scan_dir(
-                            path, self.file_pattern, recursive=True
-                        ):
-                            for file in files:
-                                self._add_to_queue(file)
+                        if os.path.isdir(path):
+                            for files in self._scan_dir(
+                                path, self.file_pattern, recursive=True
+                            ):
+                                for file in files:
+                                    self._add_to_queue(file)
+                        else:
+                            raise OptionError(
+                                f"The path {path} is not a directory (or does note exist), and will not be processed."
+                            )
 
             # add files from file list(s)
             if self.file_sources.file_list != (None and [[]]):

diff --git a/ringtail/ringtailcore.py b/ringtail/ringtailcore.py
@@ -7,6 +7,7 @@
 import matplotlib.pyplot as plt
 import json
 from meeko import RDKitMolCreate
+from meeko.export_flexres import pdb_updated_flexres_from_rdkit
 from .storagemanager import StorageManager
 from .resultsmanager import ResultsManager
 from .receptormanager import ReceptorManager
@@ -1475,6 +1476,69 @@ def filter(
 
         return ligands_passed
 
+    def write_flexres_pdb(
+        self, receptor_polymer, ligname: str, filename: str, bookmark_name: str = None
+    ):
+        """
+        Writes a receptor pdb with flexible residues based on the ligand provided
+
+        Args:
+            receptor_polymer (Polymer): version of receptor produced by meeko
+            ligname (str): ligand name for which the receptor flexible residue info should be collected
+            filename (str): name of the output pdb, extension is optional, will default to '.pdb'
+            bookmark_name (str, optional): will use last used bookmark if not specified, will not work in a db without any filtering performed
+        """
+        # make flexres rdkit mols for ligand-receptor docking
+        if bookmark_name is not None:
+            self.set_storageman_attributes(bookmark_name=bookmark_name)
+
+        with self.storageman:
+            self.storageman.create_temp_table_from_bookmark()
+            ligname, ligand_smile, atom_index_map, hydrogen_parents = (
+                self.storageman.fetch_single_ligand_output_info(ligname)
+            )
+            flexible_residues, flexres_atomnames = self.storageman.fetch_flexres_info()
+            if flexible_residues is None:
+                flexible_residues, flexres_atomnames = [], []
+            elif flexible_residues != []:  # converts string to list
+                flexible_residues = json.loads(flexible_residues)
+                flexres_atomnames = json.loads(flexres_atomnames)
+
+            ligand_mol, flexres_mols, _ = self._create_rdkit_mol(
+                ligname,
+                ligand_smile,
+                atom_index_map,
+                hydrogen_parents,
+                flexible_residues,
+                flexres_atomnames,
+            )
+            if filename:
+                # if providing filename, make sure it has .pdb extension
+                root, ext = os.path.splitext(filename)
+                if not ext:
+                    ext = ".pdb"
+                path = root + ext
+            else:
+                # name if after the receptor
+                receptor_name, _ = self.storageman.fetch_receptor_objects()[0]
+                path = receptor_name + ".pdb"
+
+        flexmoldict = {}
+        # string in list of strings
+        for index, flexres in enumerate(flexible_residues):
+            residue = flexres.split(":")[1]
+            chain = residue[0]
+            resnum = residue[1:]
+            # res id is chain:resnum
+            flexmoldict[f"{chain}:{resnum}"] = flexres_mols[index]
+
+        pdb_str = pdb_updated_flexres_from_rdkit(receptor_polymer, flexmoldict)
+        # write pdb string to file
+        with open(path, "w") as file:
+            file.write(pdb_str)
+
+        return ligand_mol, flexmoldict
+
     def write_molecule_sdfs(
         self,
         sdf_path: str = None,
@@ -1519,7 +1583,7 @@ def write_molecule_sdfs(
             # determine filename
             if all_in_one:
                 # will write one SDF file for all molecules in bookmark (_None if no bookmark present)
-                db_file_name = os.path.splitext(self.db_file)[0]
+                db_file_name = os.path.splitext(os.path.basename(self.db_file))[0]
                 sdf_file_name = ("{0}_{1}.sdf").format(
                     db_file_name, str(self.storageman.bookmark_name)
                 )

diff --git a/ringtail/storagemanager.py b/ringtail/storagemanager.py
@@ -28,8 +28,6 @@
 )
 from .exceptions import DatabaseQueryError, DatabaseViewCreationError, OptionError
 
-import multiprocess
-
 
 class StorageManager:
 
@@ -39,7 +37,7 @@ class StorageManager:
     _db_schema_code_compatibility = {
         "1.0.0": ["1.0.0"],
         "1.1.0": ["1.1.0"],
-        "2.0.0": ["2.0.0", "2.1.0", "2.1.1", "2.1.2"],
+        "2.0.0": ["2.0.0", "2.1.0", "2.1.1", "2.1.2", "2.2.0"],
     }
 
     """Base class for a generic virtual screening database object.
@@ -1958,7 +1956,7 @@ def fetch_single_ligand_output_info(self, ligname) -> str:
         try:
             cur = self.conn.cursor()
             cur.execute(
-                f"SELECT LigName, ligand_smile, atom_index_map, hydrogen_parents FROM Ligands WHERE LigName LIKE '{ligname}'"
+                f"SELECT LigName, ligand_smile, atom_index_map, hydrogen_parents FROM Ligands WHERE LigName = '{ligname}'"
             )
             info = cur.fetchone()
             cur.close()
@@ -2540,15 +2538,15 @@ def _generate_result_filtering_query(self, filters_dict):
 
         # process filter values to lists and dicts that are easily incorporated in sql queries
         processed_filters = self._process_filters_for_query(filters_dict)
+        # check if clustering
+        clustering = bool(self.mfpt_cluster or self.interaction_cluster)
 
         # raise error if no filters are present and no clusterings
         if not processed_filters and not clustering:
             raise DatabaseQueryError(
                 "Ringtail query strings are empty, please check filter options."
             )
 
-        # check if clustering
-        clustering = bool(self.mfpt_cluster or self.interaction_cluster)
         # if clustering without filtering
         if clustering:
             # allows for clustering without filtering
@@ -2695,23 +2693,11 @@ def _clusterFps(fps, cutoff):
                 cutoff distance (float)
             """
 
-            # first generate the distance matrix:
             dists = []
             nfps = len(fps)
-            inputs = []
-
-            def gen(fps):
-                for i in range(1, len(fps)):
-                    yield (i, fps)
-
-            def mp_wrapper(input_tpl):
-                i, fps = input_tpl
-                return DataStructs.BulkTanimotoSimilarity(fps[i], fps[:i])
-
-            with multiprocess.Pool() as p:
-                inputs = gen(fps)
-                for sims in p.imap(mp_wrapper, inputs):
-                    dists.extend([1 - x for x in sims])
+            for i in range(1, nfps):
+                sims = DataStructs.BulkTanimotoSimilarity(fps[i], fps[:i])
+                dists.extend([1 - x for x in sims])
 
             # now cluster the data:
             cs = Butina.ClusterData(dists, nfps, cutoff, isDistData=True)

diff --git a/setup.py b/setup.py
@@ -22,7 +22,7 @@ def find_files(directory):
 
 setup(
     name="ringtail",
-    version="2.1.2",
+    version="2.2.0",
     author="Forli Lab",
     author_email="forli@scripps.edu",
     url="https://github.com/forlilab/Ringtail",

diff --git a/test/.DS_Store b/test/.DS_Store
diff --git a/test/test_data/.DS_Store b/test/test_data/.DS_Store