proteusPy.DisulfideList

This module is part of the proteusPy package, a Python package for the analysis and modeling of protein structures, with an emphasis on disulfide bonds. This work is based on the original C/C++ implementation by Eric G. Suchanek.

The module provides the implmentation and interface for the DisulfideList object, used extensively by Disulfide class.

Author: Eric G. Suchanek, PhD Last revision: 3/18/2024 -egs-

   1"""
   2This module is part of the proteusPy package, a Python package for 
   3the analysis and modeling of protein structures, with an emphasis on disulfide bonds.
   4This work is based on the original C/C++ implementation by Eric G. Suchanek. \n
   5
   6The module provides the implmentation and interface for the [DisulfideList](#DisulfideList)
   7object, used extensively by Disulfide class.
   8
   9Author: Eric G. Suchanek, PhD
  10Last revision: 3/18/2024 -egs-
  11"""
  12
  13try:
  14    # Check if running in Jupyter
  15    shell = get_ipython().__class__.__name__
  16    if shell == "ZMQInteractiveShell":
  17        from tqdm.notebook import tqdm
  18    else:
  19        from tqdm import tqdm
  20except NameError:
  21    from tqdm import tqdm
  22
  23from collections import UserList
  24from pathlib import Path
  25
  26import numpy as np
  27import pandas
  28import plotly.express as px
  29import plotly.graph_objects as go
  30import pyvista as pv
  31from Bio.PDB import PDBParser
  32from plotly.subplots import make_subplots
  33
  34import proteusPy
  35from proteusPy import Disulfide
  36from proteusPy.atoms import *
  37from proteusPy.ProteusGlobals import MODEL_DIR, PBAR_COLS, WINSIZE
  38from proteusPy.utility import get_jet_colormap, grid_dimensions
  39
  40# Set the figure sizes and axis limits.
  41DPI = 220
  42WIDTH = 6.0
  43HEIGHT = 6.0
  44TORMIN = -179.0
  45TORMAX = 180.0
  46
  47
  48Torsion_DF_Cols = [
  49    "source",
  50    "ss_id",
  51    "proximal",
  52    "distal",
  53    "chi1",
  54    "chi2",
  55    "chi3",
  56    "chi4",
  57    "chi5",
  58    "energy",
  59    "ca_distance",
  60    "cb_distance",
  61    "phi_prox",
  62    "psi_prox",
  63    "phi_dist",
  64    "psi_dist",
  65    "torsion_length",
  66    "rho",
  67]
  68
  69Distance_DF_Cols = [
  70    "source",
  71    "ss_id",
  72    "proximal",
  73    "distal",
  74    "energy",
  75    "ca_distance",
  76    "cb_distance",
  77]
  78
  79
  80class DisulfideList(UserList):
  81    """
  82    The class provides a sortable list for Disulfide objects.
  83    Indexing and slicing are supported, as well as typical list operations like
  84    ``.insert()``, ``.append()`` and ``.extend().`` The DisulfideList object must be initialized
  85    with an iterable (tuple, list) and a name. Sorting is keyed by torsional energy.
  86
  87    The class can also render Disulfides to a pyVista window using the
  88    [display()](#DisulfideList.display) and [display_overlay()](#DisulfideList.display_overlay)methods.
  89    See below for examples.\n
  90
  91    Examples:
  92    >>> from proteusPy import Disulfide, DisulfideLoader, DisulfideList, Load_PDB_SS
  93
  94    Instantiate some variables. Note: the list is initialized with an iterable and a name (optional)
  95
  96    >>> SS = Disulfide('tmp')
  97
  98    The list is initialized with an iterable, a name and resolution. Name and resolution
  99    are optional.
 100    >>> SSlist = DisulfideList([],'ss', -1.0)
 101
 102    Load the database.
 103    >>> PDB_SS = Load_PDB_SS(verbose=False, subset=True)
 104
 105    Get the first disulfide via indexing.
 106    >>> SS = PDB_SS[0]
 107
 108    # assert str(SS) == "<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å>"
 109
 110    >>> SS4yys = PDB_SS['4yys']
 111
 112    # assert str(SS4yys) == "[<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å>, <Disulfide 4yys_56A_98A, Source: 4yys, Resolution: 1.35 Å>, <Disulfide 4yys_156A_207A, Source: 4yys, Resolution: 1.35 Å>]"
 113
 114    Make some empty disulfides.
 115    >>> ss1 = Disulfide('ss1')
 116    >>> ss2 = Disulfide('ss2')
 117
 118    Make a DisulfideList containing ss1, named 'tmp'
 119    >>> sslist = DisulfideList([ss1], 'tmp')
 120    >>> sslist.append(ss2)
 121
 122    Extract the first disulfide
 123    >>> ss1 = PDB_SS[0]
 124
 125    # assert str(ss1.pprint_all()) == "<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å\n Proximal Chain fullID: <('4yys', 0, 'A', (' ', 22, ' '))> Distal Chain fullID: <('4yys', 0, 'A', (' ', 65, ' '))>\nProximal Coordinates:\n   N: <Vector -2.36, -20.48, 5.21>\n   Cα: <Vector -2.10, -19.89, 3.90>\n   C: <Vector -1.12, -18.78, 4.12>\n   O: <Vector -1.30, -17.96, 5.03>\n   Cβ: <Vector -3.38, -19.31, 3.32>\n   Sγ: <Vector -3.24, -18.40, 1.76>\n   Cprev <Vector -2.67, -21.75, 5.36>\n   Nnext: <Vector -0.02, -18.76, 3.36>\n Distal Coordinates:\n   N: <Vector -0.60, -18.71, -1.62>\n   Cα: <Vector -0.48, -19.10, -0.22>\n   C: <Vector 0.92, -19.52, 0.18>\n   O: <Vector 1.10, -20.09, 1.25>\n   Cβ: <Vector -1.48, -20.23, 0.08>\n   Sγ: <Vector -3.22, -19.69, 0.18>\n   Cprev <Vector -0.73, -17.44, -2.01>\n   Nnext: <Vector 1.92, -19.18, -0.63>\n<BLANKLINE>\n Proximal Internal Coords:\n   N: <Vector -0.41, 1.40, -0.00>\n   Cα: <Vector 0.00, 0.00, 0.00>\n   C: <Vector 1.50, 0.00, 0.00>\n   O: <Vector 2.12, 0.71, -0.80>\n   Cβ: <Vector -0.50, -0.70, -1.25>\n   Sγ: <Vector 0.04, -2.41, -1.50>\n   Cprev <Vector -2.67, -21.75, 5.36>\n   Nnext: <Vector -0.02, -18.76, 3.36>\nDistal Internal Coords:\n   N: <Vector 1.04, -5.63, 1.17>\n   Cα: <Vector 1.04, -4.18, 1.31>\n   C: <Vector 1.72, -3.68, 2.57>\n   O: <Vector 1.57, -2.51, 2.92>\n   Cβ: <Vector -0.41, -3.66, 1.24>\n   Sγ: <Vector -1.14, -3.69, -0.43>\n   Cprev <Vector -0.73, -17.44, -2.01>\n   Nnext: <Vector 1.92, -19.18, -0.63>\n Χ1-Χ5: 174.63°, 82.52°, -83.32°, -62.52° -73.83°, 138.89°, 1.70 kcal/mol\n Cα Distance: 4.50 Å\n Torsion length: 231.53 deg>"
 126
 127    Get a list of disulfides via slicing
 128    >>> subset = DisulfideList(PDB_SS[0:10],'subset')
 129
 130    Display the subset disulfides overlaid onto the same coordinate frame,
 131    (proximal N, Ca, C').
 132
 133    The disulfides are colored individually to facilitate inspection.
 134
 135    >>> subset.display_overlay()
 136    """
 137
 138    def __init__(self, iterable, id: str, res=-1.0, quiet=True):
 139        """
 140        Initialize the DisulfideList
 141
 142        :param iterable: an iterable e.g. []
 143        :type iterable: iterable
 144        :param id: Name for the list
 145        :type id: str
 146
 147        Example:
 148        >>> from proteusPy import DisulfideList, Disulfide
 149
 150        Initialize some empty disulfides.
 151        >>> ss1 = Disulfide('ss1')
 152        >>> ss2 = Disulfide('ss2')
 153        >>> ss3 = Disulfide('ss3')
 154
 155        Make a list containing the disulfides.
 156        >>> sslist = DisulfideList([ss1, ss2], 'sslist')
 157        >>> sslist
 158        [<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>]
 159        >>> sslist.append(ss3)
 160        >>> sslist
 161        [<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss3, Source: 1egs, Resolution: -1.0 Å>]
 162        """
 163
 164        super().__init__(self.validate_ss(item) for item in iterable)
 165
 166        self.pdb_id = id
 167        self.quiet = quiet
 168        total = 0
 169        count = 0
 170
 171        if res == -1:
 172            for ss in iterable:
 173                if ss.resolution is not None:
 174                    total += ss.resolution
 175                    count += 1
 176            if count != 0:
 177                self.res = total / count
 178            else:
 179                self.res = -1.0
 180        else:
 181            self.res = res
 182
 183    def __getitem__(self, item):
 184        """
 185        Retrieve a disulfide from the list. Internal only.
 186
 187        :param item: Index or slice
 188        :return: Sublist
 189        """
 190        if isinstance(item, slice):
 191            indices = range(*item.indices(len(self.data)))
 192            name = self.data[0].pdb_id
 193            sublist = [self.data[i] for i in indices]
 194            return DisulfideList(sublist, name)
 195        return UserList.__getitem__(self, item)
 196
 197    def __setitem__(self, index, item):
 198        self.data[index] = self.validate_ss(item)
 199
 200    # Rendering engine calculates and instantiates all bond
 201    # cylinders and atomic sphere meshes. Called by all high level routines
 202
 203    def _render(self, style, panelsize=256) -> pv.Plotter:
 204        """
 205        Display a window showing the list of disulfides in the given style.
 206        :param style: one of 'cpk', 'bs', 'sb', 'plain', 'cov', 'pd'
 207        :return: Window in the relevant style
 208        """
 209        ssList = self.data
 210        name = self.id
 211        tot_ss = len(ssList)  # number off ssbonds
 212        rows, cols = grid_dimensions(tot_ss)
 213        winsize = (panelsize * cols, panelsize * rows)
 214
 215        pl = pv.Plotter(window_size=winsize, shape=(rows, cols))
 216        i = 0
 217
 218        for r in range(rows):
 219            for c in range(cols):
 220                pl.subplot(r, c)
 221                if i < tot_ss:
 222                    # ss = Disulfide()
 223                    ss = ssList[i]
 224                    src = ss.pdb_id
 225                    enrg = ss.energy
 226                    title = f"{src} {ss.proximal}{ss.proximal_chain}-{ss.distal}{ss.distal_chain}: E: {enrg:.2f}, Cα: {ss.ca_distance:.2f} Å, Tors: {ss.torsion_length:.2f}°"
 227                    pl.add_title(title=title, font_size=FONTSIZE)
 228                    ss._render(
 229                        pl,
 230                        style=style,
 231                        bondcolor=BOND_COLOR,
 232                        bs_scale=BS_SCALE,
 233                        spec=SPECULARITY,
 234                        specpow=SPEC_POWER,
 235                    )
 236                i += 1
 237        return pl
 238
 239    @property
 240    def Average_Distance(self):
 241        """
 242        Return the Average distance (Å) between the atoms in the list.
 243
 244        :return: Average distance (Å) between all atoms in the list
 245
 246        """
 247        sslist = self.data
 248        tot = len(sslist)
 249        cnt = 1
 250
 251        total = 0.0
 252        for ss1 in sslist:
 253            for ss2 in sslist:
 254                if ss2 == ss1:
 255                    continue
 256                total += ss1.Distance_RMS(ss2)
 257                cnt += 1
 258
 259        return total / cnt
 260
 261    @property
 262    def Average_Energy(self):
 263        """
 264        Return the Average energy (kcal/mol) for the Disulfides in the list.
 265
 266        :return: Average energy (kcal/mol) between all atoms in the list
 267
 268        """
 269        sslist = self.data
 270        tot = len(sslist)
 271
 272        total = 0.0
 273        for ss1 in sslist:
 274            total += ss1.energy
 275
 276        return total / tot
 277
 278    @property
 279    def Average_Conformation(self):
 280        """
 281        Return the Average conformation for the Disulfides in the list.
 282
 283        :return: Average conformation: [x1, x2, x3, x4, x5]
 284        """
 285
 286        sslist = self.data
 287        tot = len(sslist)
 288
 289        tors = self.build_torsion_df()
 290
 291        res = np.zeros(5)
 292
 293        for ss, i in zip(sslist, range(tot)):
 294            res += ss.torsion_array
 295
 296        return res / tot
 297
 298    def append(self, item):
 299        """
 300        Append the list with item
 301
 302        :param item: Disulfide to add
 303        :type item: Disulfide
 304        """
 305        self.data.append(self.validate_ss(item))
 306
 307    @property
 308    def Average_Resolution(self) -> float:
 309        """
 310        Compute and return the average structure resolution for the given list.
 311
 312        :return: Average resolution (A)
 313        """
 314        res = 0.0
 315        cnt = 1
 316
 317        for ss in self.data:
 318            _res = ss.resolution
 319            if _res is not None and _res != -1.0:
 320                res += _res
 321                cnt += 1
 322        return res / cnt if cnt else -1.0
 323
 324    @property
 325    def Average_Torsion_Distance(self):
 326        """
 327        Return the average distance in torsion space (degrees), between all pairs in the
 328        DisulfideList
 329
 330        :return: Torsion Distance (degrees)
 331        """
 332        sslist = self.data
 333        tot = len(sslist)
 334        total = 0
 335        cnt = 1
 336
 337        for ss1 in sslist:
 338            for ss2 in sslist:
 339                if ss2 == ss1:
 340                    continue
 341                total += ss1.Torsion_Distance(ss2)
 342                cnt += 1
 343
 344        return total / cnt
 345
 346    def build_distance_df(self) -> pandas.DataFrame:
 347        """
 348        Create a dataframe containing the input DisulfideList Cα-Cα distance, energy.
 349        This can take several minutes for the entire database.
 350
 351        :return: DataFrame containing Ca distances
 352        :rtype: pandas.DataFrame
 353        """
 354
 355        # create a dataframe with the following columns for the disulfide
 356        # conformations extracted from the structure
 357
 358        SS_df = pandas.DataFrame(columns=Distance_DF_Cols)
 359        sslist = self.data
 360
 361        pbar = tqdm(sslist, ncols=PBAR_COLS)
 362        for ss in pbar:
 363            new_row = [
 364                ss.pdb_id,
 365                ss.name,
 366                ss.proximal,
 367                ss.distal,
 368                ss.energy,
 369                ss.ca_distance,
 370            ]
 371            # add the row to the end of the dataframe
 372            SS_df.loc[len(SS_df.index)] = new_row
 373
 374        return SS_df
 375
 376    def build_torsion_df(self) -> pandas.DataFrame:
 377        """
 378        Create a dataframe containing the input DisulfideList torsional parameters,
 379        Cα-Cα distance, energy, and phi-psi angles. This can take several minutes for the
 380        entire database.
 381
 382        :param SSList: DisulfideList - input list of Disulfides
 383        :return: pandas.Dataframe containing the torsions
 384        """
 385        # create a dataframe with the following columns for the disulfide
 386        # conformations extracted from the structure
 387
 388        SS_df = pandas.DataFrame(columns=Torsion_DF_Cols)
 389        sslist = self.data
 390        if self.quiet or len(sslist) < 100:
 391            pbar = sslist
 392        else:
 393            pbar = tqdm(sslist, ncols=PBAR_COLS, leave=False)
 394
 395        for ss in pbar:
 396            new_row = [
 397                ss.pdb_id,
 398                ss.name,
 399                ss.proximal,
 400                ss.distal,
 401                ss.chi1,
 402                ss.chi2,
 403                ss.chi3,
 404                ss.chi4,
 405                ss.chi5,
 406                ss.energy,
 407                ss.ca_distance,
 408                ss.cb_distance,
 409                ss.psiprox,
 410                ss.psiprox,
 411                ss.phidist,
 412                ss.psidist,
 413                ss.torsion_length,
 414                ss.rho,
 415            ]
 416            # add the row to the end of the dataframe
 417            SS_df.loc[len(SS_df.index)] = new_row
 418
 419        return SS_df
 420
 421    def by_chain(self, chain: str):
 422        """
 423        Return a DisulfideList from the input chain identifier.
 424
 425        :param chain: chain identifier, 'A', 'B, etc
 426        :return: DisulfideList containing disulfides within that chain.
 427        """
 428
 429        reslist = DisulfideList([], chain)
 430        sslist = self.data
 431
 432        for ss in sslist:
 433            pchain = ss.proximal_chain
 434            dchain = ss.distal_chain
 435            if pchain == dchain:
 436                if pchain == chain:
 437                    reslist.append(ss)
 438            else:
 439                print(f"Cross chain SS: {ss.repr_compact}:")
 440        return reslist
 441
 442    def calculate_torsion_statistics(self):
 443        df = self.build_torsion_df()
 444
 445        df_subset = df.iloc[:, 4:]
 446        df_stats = df_subset.describe()
 447
 448        # print(df_stats.head())
 449
 450        mean_vals = df_stats.loc["mean"].values
 451        std_vals = df_stats.loc["std"].values
 452
 453        tor_cols = ["chi1", "chi2", "chi3", "chi4", "chi5", "torsion_length"]
 454        dist_cols = ["ca_distance", "cb_distance", "energy"]
 455        tor_stats = {}
 456        dist_stats = {}
 457
 458        for col in tor_cols:
 459            tor_stats[col] = {"mean": df[col].mean(), "std": df[col].std()}
 460
 461        for col in dist_cols:
 462            dist_stats[col] = {"mean": df[col].mean(), "std": df[col].std()}
 463
 464        tor_stats = pandas.DataFrame(tor_stats, columns=tor_cols)
 465        dist_stats = pandas.DataFrame(dist_stats, columns=dist_cols)
 466
 467        return tor_stats, dist_stats
 468
 469    def display(self, style="sb", light=True, panelsize=512):
 470        """
 471        Display the Disulfide list in the specific rendering style.
 472
 473        :param single: Display the bond in a single panel in the specific style.
 474        :param style:  Rendering style: One of:\n
 475            - 'sb' - split bonds
 476            - 'bs' - ball and stick
 477            - 'cpk' - CPK style
 478            - 'pd' - Proximal/Distal style - Red=proximal, Green=Distal
 479            - 'plain' - boring single color
 480        :light: If True, light background, if False, dark
 481        """
 482        id = self.pdb_id
 483        ssbonds = self.data
 484        tot_ss = len(ssbonds)  # number off ssbonds
 485        avg_enrg = self.Average_Energy
 486        avg_dist = self.Average_Distance
 487        resolution = self.resolution
 488
 489        if light:
 490            pv.set_plot_theme("document")
 491        else:
 492            pv.set_plot_theme("dark")
 493
 494        title = f"<{id}> {resolution:.2f} Å: ({tot_ss} SS), Avg E: {avg_enrg:.2f} kcal/mol, Avg Dist: {avg_dist:.2f} Å"
 495
 496        pl = pv.Plotter()
 497        pl = self._render(style, panelsize)
 498        pl.enable_anti_aliasing("msaa")
 499        pl.add_title(title=title, font_size=FONTSIZE)
 500        pl.link_views()
 501        pl.reset_camera()
 502        pl.show()
 503
 504    def display_torsion_statistics(
 505        self,
 506        display=True,
 507        save=False,
 508        fname="ss_torsions.png",
 509        stats=False,
 510        light=True,
 511    ):
 512        """
 513        Display torsion and distance statistics for a given Disulfide list.
 514
 515        :param display: Whether to display the plot in the notebook. Default is True.
 516        :type display: bool
 517        :param save: Whether to save the plot as an image file. Default is False.
 518        :type save: bool
 519        :param fname: The name of the image file to save. Default is 'ss_torsions.png'.
 520        :type fname: str
 521        :param stats: Whether to return the DataFrame representing the statistics for `self`. Default is False.
 522        :type stats: bool
 523        :param light: Whether to use the 'plotly_light' or 'plotly_dark' template. Default is True.
 524        :type light: bool
 525        :return: None
 526        """
 527        title = f"{self.id}: {self.length} members"
 528
 529        df = self.torsion_df
 530        df_subset = df.iloc[:, 4:]
 531        df_stats = df_subset.describe()
 532
 533        # print(df_stats.head())
 534
 535        mean_vals = df_stats.loc["mean"].values
 536        std_vals = df_stats.loc["std"].values
 537
 538        fig = make_subplots(
 539            rows=2, cols=2, vertical_spacing=0.125, column_widths=[1, 1]
 540        )
 541        fig.update_layout(template="plotly" if light else "plotly_dark")
 542
 543        fig.update_layout(
 544            title={
 545                "text": title,
 546                "xanchor": "center",
 547                # 'y':.9,
 548                "x": 0.5,
 549                "yanchor": "top",
 550            },
 551            width=1024,
 552            height=1024,
 553        )
 554
 555        fig.add_trace(
 556            go.Bar(
 557                x=["X1", "X2", "X3", "X4", "X5"],
 558                y=mean_vals[:5],
 559                name="Torsion Angle,(°) ",
 560                error_y=dict(type="data", array=std_vals[:5], visible=True),
 561            ),
 562            row=1,
 563            col=1,
 564        )
 565
 566        fig.add_trace(
 567            go.Bar(
 568                x=["rho"],
 569                y=[mean_vals[13]],
 570                name="ρ, (°)",
 571                error_y=dict(type="data", array=[std_vals[13]], visible=True),
 572            ),
 573            row=1,
 574            col=1,
 575        )
 576
 577        # Update the layout of the subplot
 578        # Cα N, Cα, Cβ, C', Sγ Å °
 579
 580        fig.update_yaxes(
 581            title_text="Torsion Angle (°)", range=[-200, 200], row=1, col=1
 582        )
 583        fig.update_yaxes(range=[0, 320], row=2, col=2)
 584
 585        # Add another subplot for the mean values of energy
 586        fig.add_trace(
 587            go.Bar(
 588                x=["Strain Energy (kcal/mol)"],
 589                y=[mean_vals[5]],
 590                name="Energy (kcal/mol)",
 591                error_y=dict(
 592                    type="data",
 593                    array=[std_vals[5].tolist()],
 594                    width=0.25,
 595                    visible=True,
 596                ),
 597            ),
 598            row=1,
 599            col=2,
 600        )
 601        fig.update_traces(width=0.25, row=1, col=2)
 602
 603        # Update the layout of the subplot
 604        # fig.update_xaxes(title_text="Energy", row=1, col=2)
 605        fig.update_yaxes(
 606            title_text="kcal/mol", range=[0, 20], row=1, col=2
 607        )  # max possible DSE
 608
 609        # Add another subplot for the mean values of ca_distance
 610        fig.add_trace(
 611            go.Bar(
 612                x=["Cα Distance, (Å)", "Cβ Distance, (Å)"],
 613                y=[mean_vals[6], mean_vals[7]],
 614                name="Cβ Distance (Å)",
 615                error_y=dict(
 616                    type="data",
 617                    array=[std_vals[6].tolist(), std_vals[7].tolist()],
 618                    width=0.25,
 619                    visible=True,
 620                ),
 621            ),
 622            row=2,
 623            col=1,
 624        )
 625        # Update the layout of the subplot
 626        fig.update_yaxes(title_text="Distance (A)", range=[0, 10], row=2, col=1)  #
 627        fig.update_traces(width=0.25, row=2, col=1)
 628
 629        # Add a scatter subplot for torsion length column
 630        fig.add_trace(
 631            go.Bar(
 632                x=["Torsion Length, (Å)"],
 633                y=[mean_vals[12]],
 634                name="Torsion Length, (Å)",
 635                error_y=dict(
 636                    type="data", array=[std_vals[12]], width=0.25, visible=True
 637                ),
 638            ),
 639            row=2,
 640            col=2,
 641        )
 642        # Update the layout of the subplot
 643        fig.update_yaxes(title_text="Torsion Length", range=[0, 350], row=2, col=2)
 644        fig.update_traces(width=0.25, row=2, col=2)
 645
 646        # Update the error bars
 647        fig.update_traces(
 648            error_y_thickness=2,
 649            error_y_color="gray",
 650            texttemplate="%{y:.2f} ± %{error_y.array:.2f}",
 651            textposition="outside",
 652        )  # , row=1, col=1)
 653
 654        if display:
 655            fig.show()
 656        if save:
 657            fig.write_image(Path(fname))
 658
 659        if stats:
 660            return df_stats
 661
 662        return
 663
 664    @property
 665    def distance_df(self) -> pandas.DataFrame:
 666        """
 667        Build and return the distance dataframe for the input list.
 668        This can take considerable time for the entire list.
 669
 670        :return: Dataframe containing the Cα-Cα distances for the given list.
 671
 672        Example:
 673        >>> from proteusPy import Disulfide, Load_PDB_SS, DisulfideList
 674        >>> PDB_SS = Load_PDB_SS()
 675
 676        """
 677        return self.build_distance_df()
 678
 679    def display_overlay(
 680        self,
 681        screenshot=False,
 682        movie=False,
 683        verbose=True,
 684        fname="ss_overlay.png",
 685        light=True,
 686    ):
 687        """
 688        Display all disulfides in the list overlaid in stick mode against
 689        a common coordinate frames. This allows us to see all of the disulfides
 690        at one time in a single view. Colors vary smoothy between bonds.
 691
 692        :param screenshot: Save a screenshot, defaults to False
 693        :param movie: Save a movie, defaults to False
 694        :param verbose: Verbosity, defaults to True
 695        :param fname: Filename to save for the movie or screenshot, defaults to 'ss_overlay.png'
 696        :param light: Background color, defaults to True for White. False for Dark.
 697        """
 698
 699        id = self.pdb_id
 700        ssbonds = self.data
 701        tot_ss = len(ssbonds)  # number off ssbonds
 702        avg_enrg = self.Average_Energy
 703        avg_dist = self.Average_Distance
 704        resolution = self.resolution
 705
 706        res = 100
 707
 708        if tot_ss > 100:
 709            res = 60
 710        if tot_ss > 200:
 711            res = 30
 712        if tot_ss > 300:
 713            res = 8
 714
 715        title = f"<{id}> {resolution:.2f} Å: ({tot_ss} SS), Avg E: {avg_enrg:.2f} kcal/mol, Avg Dist: {avg_dist:.2f} Å"
 716
 717        if light:
 718            pv.set_plot_theme("document")
 719        else:
 720            pv.set_plot_theme("dark")
 721
 722        if movie:
 723            pl = pv.Plotter(window_size=WINSIZE, off_screen=True)
 724        else:
 725            pl = pv.Plotter(window_size=WINSIZE, off_screen=False)
 726
 727        pl.add_title(title=title, font_size=FONTSIZE)
 728        pl.enable_anti_aliasing("msaa")
 729        # pl.add_camera_orientation_widget()
 730        pl.add_axes()
 731
 732        mycol = np.zeros(shape=(tot_ss, 3))
 733        mycol = get_jet_colormap(tot_ss)
 734
 735        # scale the overlay bond radii down so that we can see the individual elements better
 736        # maximum 90% reduction
 737
 738        brad = BOND_RADIUS if tot_ss < 10 else BOND_RADIUS * 0.75
 739        brad = brad if tot_ss < 25 else brad * 0.8
 740        brad = brad if tot_ss < 50 else brad * 0.8
 741        brad = brad if tot_ss < 100 else brad * 0.6
 742
 743        # print(f'Brad: {brad}')
 744        pbar = tqdm(range(tot_ss), ncols=PBAR_COLS)
 745
 746        for i, ss in zip(pbar, ssbonds):
 747            color = [int(mycol[i][0]), int(mycol[i][1]), int(mycol[i][2])]
 748            ss._render(
 749                pl,
 750                style="plain",
 751                bondcolor=color,
 752                translate=False,
 753                bond_radius=brad,
 754                res=res,
 755            )
 756
 757        pl.reset_camera()
 758
 759        if screenshot:
 760            pl.show(auto_close=False)  # allows for manipulation
 761            pl.screenshot(fname)
 762
 763        elif movie:
 764            if verbose:
 765                print(f" -> display_overlay(): Saving mp4 animation to: {fname}")
 766
 767            pl.open_movie(fname)
 768            path = pl.generate_orbital_path(n_points=360)
 769            pl.orbit_on_path(path, write_frames=True)
 770            pl.close()
 771
 772            if verbose:
 773                print(f" -> display_overlay(): Saved mp4 animation to: {fname}")
 774        else:
 775            pl.show()
 776
 777        return
 778
 779    def extend(self, other):
 780        """
 781        Extend the Disulfide list with other.
 782
 783        :param other: extension
 784        :type item: DisulfideList
 785        """
 786
 787        if isinstance(other, type(self)):
 788            self.data.extend(other)
 789        else:
 790            self.data.extend(self.validate_ss(item) for item in other)
 791
 792    def get_by_name(self, name):
 793        """
 794        Returns the Disulfide with the given name from the list.
 795        """
 796        for ss in self.data:
 797            if ss.name == name:
 798                return ss.copy()  # or ss.copy() !!!
 799        return None
 800
 801    def get_chains(self):
 802        """
 803        Return the chain IDs for chains within the given Disulfide.
 804        :return: Chain IDs for given Disulfide
 805        """
 806
 807        res_dict = {"xxx"}
 808        sslist = self.data
 809
 810        for ss in sslist:
 811            pchain = ss.proximal_chain
 812            dchain = ss.distal_chain
 813            res_dict.update(pchain)
 814            res_dict.update(dchain)
 815
 816        res_dict.remove("xxx")
 817
 818        return res_dict
 819
 820    def get_torsion_array(self):
 821        """
 822        Returns a 2D NumPy array representing the dihedral angles in the given disulfide list.
 823
 824        :return: A 2D NumPy array of shape (n, 5), where n is the number of disulfide bonds in the list. Each row
 825                of the array represents the dihedral angles of a disulfide bond, in the following order:
 826                [X1_i, X2_i, X3_i, X4_i, X5_i], where i is the index of the disulfide bond in the list.
 827        """
 828        sslist = self.data
 829        tot = len(sslist)
 830        res = np.zeros(shape=(tot, 5))
 831
 832        for idx, ss in enumerate(sslist):
 833            row = ss.torsion_array
 834            res[idx, :] = row
 835
 836        return res
 837
 838    def has_chain(self, chain) -> bool:
 839        """
 840        Returns True if given chain contained in Disulfide, False otherwise.
 841        :return: Returns True if given chain contained in Disulfide, False otherwise.
 842        """
 843
 844        chns = {"xxx"}
 845        chns = self.get_chains()
 846        if chain in chns:
 847            return True
 848        else:
 849            return False
 850
 851    @property
 852    def id(self):
 853        """
 854        PDB ID of the list
 855        """
 856        return self.pdb_id
 857
 858    @id.setter
 859    def id(self, value):
 860        """
 861        Set the DisulfideList ID
 862
 863        Parameters
 864        ----------
 865        value : str
 866            List ID
 867        """
 868        self.pdb_id = value
 869
 870    @property
 871    def resolution(self):
 872        """
 873        Resolution of the parent sturcture (A)
 874        """
 875        return self.res
 876
 877    @resolution.setter
 878    def resolution(self, value):
 879        """
 880        Set the resolution of the list
 881
 882        :param value: Resolution (A)
 883        """
 884        self.res = value
 885
 886    def TorsionGraph(
 887        self, display=True, save=False, fname="ss_torsions.png", light=True
 888    ):
 889        # tor_stats, dist_stats = self.calculate_torsion_statistics()
 890        self.display_torsion_statistics(
 891            display=display, save=save, fname=fname, light=light
 892        )
 893
 894    def insert(self, index, item):
 895        """
 896        Insert a Disulfide into the list at the specified index
 897
 898        :param index: insertion point
 899        :type index: int
 900        :param item: Disulfide to insert
 901        :type item: Disulfide
 902        """
 903        self.data.insert(index, self.validate_ss(item))
 904
 905    @property
 906    def length(self):
 907        return len(self.data)
 908
 909    @property
 910    def min(self) -> Disulfide:
 911        """
 912        Return Disulfide from the list with the minimum energy
 913
 914        :return: Disulfide with the minimum energy.
 915        """
 916        sslist = sorted(self.data)
 917        return sslist[0]
 918
 919    @property
 920    def max(self) -> Disulfide:
 921        """
 922        Return Disulfide from the list with the maximum energy
 923
 924        :return: Disulfide with the maximum energy.
 925        """
 926        sslist = sorted(self.data)
 927        return sslist[-1]
 928
 929    def minmax_distance(self):
 930        """
 931        Return the Disulfides with the minimum and
 932        maximum Cα distances in the list.
 933
 934        :return: SSmin, SSmax
 935        """
 936
 937        _min = 99999.9
 938        _max = -99999.9
 939
 940        sslist = self.data
 941        ssmin = 0
 942        ssmax = 0
 943        idx = 0
 944
 945        pbar = tqdm(sslist, ncols=PBAR_COLS)
 946        for ss in pbar:
 947            dist = ss.ca_distance
 948
 949            if dist >= _max:
 950                ssmax = idx
 951                _max = dist
 952
 953            if dist <= _min:
 954                ssmin = idx
 955                _min = dist
 956            idx += 1
 957
 958        return sslist[ssmin], sslist[ssmax]
 959
 960    @property
 961    def minmax_energy(self):
 962        """
 963        Return the Disulfides with the minimum and maximum energies
 964        from the DisulfideList.
 965
 966        :return: Disulfide with the given ID
 967        """
 968        sslist = sorted(self.data)
 969        return sslist[0], sslist[-1]
 970
 971    def nearest_neighbors(
 972        self,
 973        chi1: float,
 974        chi2: float,
 975        chi3: float,
 976        chi4: float,
 977        chi5: float,
 978        cutoff: float,
 979    ):
 980        """
 981        Given a torsional array of chi1-chi5,
 982
 983        :param chi1: Chi1 (degrees)
 984        :param chi2: Chi2 (degrees)
 985        :param chi3: Chi3 (degrees)
 986        :param chi4: Chi4 (degrees)
 987        :param chi5: Chi5 (degrees)
 988        :param cutoff: Distance cutoff, degrees
 989        :return: DisulfideList of neighbors
 990        """
 991
 992        sslist = self.data
 993        modelss = proteusPy.Disulfide("model")
 994
 995        modelss.build_model(chi1, chi2, chi3, chi4, chi5)
 996        res = DisulfideList([], "neighbors")
 997        res = modelss.Torsion_neighbors(sslist, cutoff)
 998
 999        return res
1000
1001    def nearest_neighbors_ss(self, ss, cutoff: float):
1002        """
1003        Given an input Disulfide and overall torsional cutoff, return
1004        the list of Disulfides within the cutoff
1005
1006        :param ss: Disulfide to compare to
1007        :param chi5: Chi5 (degrees)
1008        :param cutoff: Distance cutoff, degrees
1009        :return: DisulfideList of neighbors
1010        """
1011
1012        chi1 = ss.chi1
1013        chi2 = ss.chi2
1014        chi3 = ss.chi3
1015        chi4 = ss.chi4
1016        chi5 = ss.chi5
1017
1018        sslist = self.data
1019        modelss = proteusPy.Disulfide("model")
1020
1021        modelss.build_model(chi1, chi2, chi3, chi4, chi5)
1022        res = DisulfideList([], "neighbors")
1023        res = modelss.Torsion_neighbors(sslist, cutoff)
1024
1025        return res
1026
1027    def pprint(self):
1028        """
1029        Pretty print self.
1030        """
1031        sslist = self.data
1032        for ss in sslist:
1033            ss.pprint()
1034
1035    def pprint_all(self):
1036        """
1037        Pretty print full disulfide descriptions in self.
1038        """
1039        sslist = self.data
1040        for ss in sslist:
1041            ss.pprint_all()
1042
1043    @property
1044    def torsion_df(self):
1045        return self.build_torsion_df()
1046
1047    @property
1048    def torsion_array(self):
1049        return self.get_torsion_array()
1050
1051    def validate_ss(self, value):
1052        return value
1053
1054    # class ends
1055
1056
1057# utility functions
1058from proteusPy.DisulfideExceptions import DisulfideConstructionWarning
1059
1060
1061def load_disulfides_from_id(
1062    struct_name: str,
1063    pdb_dir=MODEL_DIR,
1064    model_numb=0,
1065    verbose=False,
1066    quiet=False,
1067    dbg=False,
1068) -> DisulfideList:
1069    """
1070    Loads the Disulfides by PDB ID and returns a ```DisulfideList``` of Disulfide objects.
1071    Assumes the file is downloaded in the pdb_dir path.
1072
1073    *NB:* Requires EGS-Modified BIO.parse_pdb_header.py from https://github.com/suchanek/biopython
1074
1075    :param struct_name: the name of the PDB entry.
1076    :param pdb_dir: path to the PDB files, defaults to MODEL_DIR - this is: PDB_DIR/good and are
1077    the pre-parsed PDB files that have been scanned by the DisulfideDownloader program.
1078    :param model_numb: model number to use, defaults to 0 for single structure files.
1079    :param verbose: print info while parsing
1080    :return: a list of Disulfide objects initialized from the file.
1081
1082    Example:
1083
1084    PDB_DIR defaults to os.getenv('PDB').
1085    To load the Disulfides from the PDB ID 5rsa we'd use the following:
1086
1087    >>> from proteusPy import DisulfideList, load_disulfides_from_id
1088    >>> SSlist = DisulfideList([],'5rsa')
1089    >>> SSlist = load_disulfides_from_id('5rsa', verbose=False)
1090    >>> SSlist
1091    [<Disulfide 5rsa_26A_84A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_40A_95A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_58A_110A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_65A_72A, Source: 5rsa, Resolution: 2.0 Å>]
1092    """
1093    import copy
1094    import warnings
1095
1096    from proteusPy.Disulfide import Disulfide, parse_ssbond_header_rec
1097
1098    i = 1
1099    proximal = distal = -1
1100    resolution = -1.0
1101
1102    _chaina = None
1103    _chainb = None
1104
1105    parser = PDBParser(PERMISSIVE=True)
1106
1107    # Biopython uses the Structure -> Model -> Chain hierarchy to organize
1108    # structures. All are iterable.
1109
1110    structure = parser.get_structure(struct_name, file=f"{pdb_dir}pdb{struct_name}.ent")
1111    model = structure[model_numb]
1112
1113    if verbose:
1114        print(f"-> load_disulfide_from_id() - Parsing structure: {struct_name}:")
1115
1116    ssbond_dict = structure.header["ssbond"]  # NB: this requires the modified code
1117    resolution = structure.header["resolution"]
1118
1119    SSList = DisulfideList([], struct_name, resolution)
1120
1121    # list of tuples with (proximal distal chaina chainb)
1122    ssbonds = parse_ssbond_header_rec(ssbond_dict)
1123
1124    with warnings.catch_warnings():
1125        if quiet:
1126            warnings.filterwarnings("ignore")
1127        for pair in ssbonds:
1128            # in the form (proximal, distal, chain)
1129            proximal = pair[0]
1130            distal = pair[1]
1131            chain1_id = pair[2]
1132            chain2_id = pair[3]
1133
1134            if not proximal.isnumeric() or not distal.isnumeric():
1135                mess = f" -> load_disulfides_from_id(): Cannot parse SSBond record (non-numeric IDs):\
1136                {struct_name} Prox: {proximal} {chain1_id} Dist: {distal} {chain2_id}, ignoring."
1137                warnings.warn(mess, DisulfideConstructionWarning)
1138                continue
1139            else:
1140                proximal = int(proximal)
1141                distal = int(distal)
1142
1143            if proximal == distal:
1144                mess = f" -> load_disulfides_from_id(): Cannot parse SSBond record (proximal == distal):\
1145                {struct_name} Prox: {proximal} {chain1_id} Dist: {distal} {chain2_id}, ignoring."
1146                warnings.warn(mess, DisulfideConstructionWarning)
1147                continue
1148
1149            _chaina = model[chain1_id]
1150            _chainb = model[chain2_id]
1151
1152            if (_chaina is None) or (_chainb is None):
1153                mess = f" -> load_disulfides_from_id(): NULL chain(s): {struct_name}: {proximal} {chain1_id}\
1154                - {distal} {chain2_id}, ignoring!"
1155                warnings.warn(mess, DisulfideConstructionWarning)
1156                continue
1157
1158            if chain1_id != chain2_id:
1159                if verbose:
1160                    mess = f" -> load_disulfides_from_id(): Cross Chain SS for: Prox: {proximal} {chain1_id}\
1161                    Dist: {distal} {chain2_id}"
1162                    warnings.warn(mess, DisulfideConstructionWarning)
1163                pass  # was break
1164
1165            try:
1166                prox_res = _chaina[proximal]
1167                dist_res = _chainb[distal]
1168
1169            except KeyError:
1170                mess = f"Cannot parse SSBond record (KeyError): {struct_name} Prox:\
1171                {proximal} {chain1_id} Dist: {distal} {chain2_id}, ignoring!"
1172                warnings.warn(mess, DisulfideConstructionWarning)
1173                continue
1174
1175            # make a new Disulfide object, name them based on proximal and distal
1176            # initialize SS bond from the proximal, distal coordinates
1177
1178            if _chaina[proximal].is_disordered() or _chainb[distal].is_disordered():
1179                mess = f" -> load_disulfides_from_id(): Disordered chain(s): {struct_name}: {proximal} {chain1_id}\
1180                - {distal} {chain2_id}, ignoring!"
1181                warnings.warn(mess, DisulfideConstructionWarning)
1182                continue
1183            else:
1184                if verbose:
1185                    print(
1186                        f" -> load_disulfides_from_id(): SSBond: {i}: {struct_name}: {proximal} {chain1_id}\
1187                    - {distal} {chain2_id}"
1188                    )
1189                ssbond_name = f"{struct_name}_{proximal}{chain1_id}_{distal}{chain2_id}"
1190                new_ss = Disulfide(ssbond_name)
1191                new_ss.initialize_disulfide_from_chain(
1192                    _chaina, _chainb, proximal, distal, resolution, quiet=quiet
1193                )
1194                SSList.append(new_ss)
1195        i += 1
1196    return copy.deepcopy(SSList)
1197
1198
1199if __name__ == "__main__":
1200    import doctest
1201
1202    doctest.testmod()
1203
1204# end of file
1205# end of file
DPI = 220
WIDTH = 6.0
HEIGHT = 6.0
TORMIN = -179.0
TORMAX = 180.0
Torsion_DF_Cols = ['source', 'ss_id', 'proximal', 'distal', 'chi1', 'chi2', 'chi3', 'chi4', 'chi5', 'energy', 'ca_distance', 'cb_distance', 'phi_prox', 'psi_prox', 'phi_dist', 'psi_dist', 'torsion_length', 'rho']
Distance_DF_Cols = ['source', 'ss_id', 'proximal', 'distal', 'energy', 'ca_distance', 'cb_distance']
class DisulfideList(collections.UserList):
  81class DisulfideList(UserList):
  82    """
  83    The class provides a sortable list for Disulfide objects.
  84    Indexing and slicing are supported, as well as typical list operations like
  85    ``.insert()``, ``.append()`` and ``.extend().`` The DisulfideList object must be initialized
  86    with an iterable (tuple, list) and a name. Sorting is keyed by torsional energy.
  87
  88    The class can also render Disulfides to a pyVista window using the
  89    [display()](#DisulfideList.display) and [display_overlay()](#DisulfideList.display_overlay)methods.
  90    See below for examples.\n
  91
  92    Examples:
  93    >>> from proteusPy import Disulfide, DisulfideLoader, DisulfideList, Load_PDB_SS
  94
  95    Instantiate some variables. Note: the list is initialized with an iterable and a name (optional)
  96
  97    >>> SS = Disulfide('tmp')
  98
  99    The list is initialized with an iterable, a name and resolution. Name and resolution
 100    are optional.
 101    >>> SSlist = DisulfideList([],'ss', -1.0)
 102
 103    Load the database.
 104    >>> PDB_SS = Load_PDB_SS(verbose=False, subset=True)
 105
 106    Get the first disulfide via indexing.
 107    >>> SS = PDB_SS[0]
 108
 109    # assert str(SS) == "<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å>"
 110
 111    >>> SS4yys = PDB_SS['4yys']
 112
 113    # assert str(SS4yys) == "[<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å>, <Disulfide 4yys_56A_98A, Source: 4yys, Resolution: 1.35 Å>, <Disulfide 4yys_156A_207A, Source: 4yys, Resolution: 1.35 Å>]"
 114
 115    Make some empty disulfides.
 116    >>> ss1 = Disulfide('ss1')
 117    >>> ss2 = Disulfide('ss2')
 118
 119    Make a DisulfideList containing ss1, named 'tmp'
 120    >>> sslist = DisulfideList([ss1], 'tmp')
 121    >>> sslist.append(ss2)
 122
 123    Extract the first disulfide
 124    >>> ss1 = PDB_SS[0]
 125
 126    # assert str(ss1.pprint_all()) == "<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å\n Proximal Chain fullID: <('4yys', 0, 'A', (' ', 22, ' '))> Distal Chain fullID: <('4yys', 0, 'A', (' ', 65, ' '))>\nProximal Coordinates:\n   N: <Vector -2.36, -20.48, 5.21>\n   Cα: <Vector -2.10, -19.89, 3.90>\n   C: <Vector -1.12, -18.78, 4.12>\n   O: <Vector -1.30, -17.96, 5.03>\n   Cβ: <Vector -3.38, -19.31, 3.32>\n   Sγ: <Vector -3.24, -18.40, 1.76>\n   Cprev <Vector -2.67, -21.75, 5.36>\n   Nnext: <Vector -0.02, -18.76, 3.36>\n Distal Coordinates:\n   N: <Vector -0.60, -18.71, -1.62>\n   Cα: <Vector -0.48, -19.10, -0.22>\n   C: <Vector 0.92, -19.52, 0.18>\n   O: <Vector 1.10, -20.09, 1.25>\n   Cβ: <Vector -1.48, -20.23, 0.08>\n   Sγ: <Vector -3.22, -19.69, 0.18>\n   Cprev <Vector -0.73, -17.44, -2.01>\n   Nnext: <Vector 1.92, -19.18, -0.63>\n<BLANKLINE>\n Proximal Internal Coords:\n   N: <Vector -0.41, 1.40, -0.00>\n   Cα: <Vector 0.00, 0.00, 0.00>\n   C: <Vector 1.50, 0.00, 0.00>\n   O: <Vector 2.12, 0.71, -0.80>\n   Cβ: <Vector -0.50, -0.70, -1.25>\n   Sγ: <Vector 0.04, -2.41, -1.50>\n   Cprev <Vector -2.67, -21.75, 5.36>\n   Nnext: <Vector -0.02, -18.76, 3.36>\nDistal Internal Coords:\n   N: <Vector 1.04, -5.63, 1.17>\n   Cα: <Vector 1.04, -4.18, 1.31>\n   C: <Vector 1.72, -3.68, 2.57>\n   O: <Vector 1.57, -2.51, 2.92>\n   Cβ: <Vector -0.41, -3.66, 1.24>\n   Sγ: <Vector -1.14, -3.69, -0.43>\n   Cprev <Vector -0.73, -17.44, -2.01>\n   Nnext: <Vector 1.92, -19.18, -0.63>\n Χ1-Χ5: 174.63°, 82.52°, -83.32°, -62.52° -73.83°, 138.89°, 1.70 kcal/mol\n Cα Distance: 4.50 Å\n Torsion length: 231.53 deg>"
 127
 128    Get a list of disulfides via slicing
 129    >>> subset = DisulfideList(PDB_SS[0:10],'subset')
 130
 131    Display the subset disulfides overlaid onto the same coordinate frame,
 132    (proximal N, Ca, C').
 133
 134    The disulfides are colored individually to facilitate inspection.
 135
 136    >>> subset.display_overlay()
 137    """
 138
 139    def __init__(self, iterable, id: str, res=-1.0, quiet=True):
 140        """
 141        Initialize the DisulfideList
 142
 143        :param iterable: an iterable e.g. []
 144        :type iterable: iterable
 145        :param id: Name for the list
 146        :type id: str
 147
 148        Example:
 149        >>> from proteusPy import DisulfideList, Disulfide
 150
 151        Initialize some empty disulfides.
 152        >>> ss1 = Disulfide('ss1')
 153        >>> ss2 = Disulfide('ss2')
 154        >>> ss3 = Disulfide('ss3')
 155
 156        Make a list containing the disulfides.
 157        >>> sslist = DisulfideList([ss1, ss2], 'sslist')
 158        >>> sslist
 159        [<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>]
 160        >>> sslist.append(ss3)
 161        >>> sslist
 162        [<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss3, Source: 1egs, Resolution: -1.0 Å>]
 163        """
 164
 165        super().__init__(self.validate_ss(item) for item in iterable)
 166
 167        self.pdb_id = id
 168        self.quiet = quiet
 169        total = 0
 170        count = 0
 171
 172        if res == -1:
 173            for ss in iterable:
 174                if ss.resolution is not None:
 175                    total += ss.resolution
 176                    count += 1
 177            if count != 0:
 178                self.res = total / count
 179            else:
 180                self.res = -1.0
 181        else:
 182            self.res = res
 183
 184    def __getitem__(self, item):
 185        """
 186        Retrieve a disulfide from the list. Internal only.
 187
 188        :param item: Index or slice
 189        :return: Sublist
 190        """
 191        if isinstance(item, slice):
 192            indices = range(*item.indices(len(self.data)))
 193            name = self.data[0].pdb_id
 194            sublist = [self.data[i] for i in indices]
 195            return DisulfideList(sublist, name)
 196        return UserList.__getitem__(self, item)
 197
 198    def __setitem__(self, index, item):
 199        self.data[index] = self.validate_ss(item)
 200
 201    # Rendering engine calculates and instantiates all bond
 202    # cylinders and atomic sphere meshes. Called by all high level routines
 203
 204    def _render(self, style, panelsize=256) -> pv.Plotter:
 205        """
 206        Display a window showing the list of disulfides in the given style.
 207        :param style: one of 'cpk', 'bs', 'sb', 'plain', 'cov', 'pd'
 208        :return: Window in the relevant style
 209        """
 210        ssList = self.data
 211        name = self.id
 212        tot_ss = len(ssList)  # number off ssbonds
 213        rows, cols = grid_dimensions(tot_ss)
 214        winsize = (panelsize * cols, panelsize * rows)
 215
 216        pl = pv.Plotter(window_size=winsize, shape=(rows, cols))
 217        i = 0
 218
 219        for r in range(rows):
 220            for c in range(cols):
 221                pl.subplot(r, c)
 222                if i < tot_ss:
 223                    # ss = Disulfide()
 224                    ss = ssList[i]
 225                    src = ss.pdb_id
 226                    enrg = ss.energy
 227                    title = f"{src} {ss.proximal}{ss.proximal_chain}-{ss.distal}{ss.distal_chain}: E: {enrg:.2f}, Cα: {ss.ca_distance:.2f} Å, Tors: {ss.torsion_length:.2f}°"
 228                    pl.add_title(title=title, font_size=FONTSIZE)
 229                    ss._render(
 230                        pl,
 231                        style=style,
 232                        bondcolor=BOND_COLOR,
 233                        bs_scale=BS_SCALE,
 234                        spec=SPECULARITY,
 235                        specpow=SPEC_POWER,
 236                    )
 237                i += 1
 238        return pl
 239
 240    @property
 241    def Average_Distance(self):
 242        """
 243        Return the Average distance (Å) between the atoms in the list.
 244
 245        :return: Average distance (Å) between all atoms in the list
 246
 247        """
 248        sslist = self.data
 249        tot = len(sslist)
 250        cnt = 1
 251
 252        total = 0.0
 253        for ss1 in sslist:
 254            for ss2 in sslist:
 255                if ss2 == ss1:
 256                    continue
 257                total += ss1.Distance_RMS(ss2)
 258                cnt += 1
 259
 260        return total / cnt
 261
 262    @property
 263    def Average_Energy(self):
 264        """
 265        Return the Average energy (kcal/mol) for the Disulfides in the list.
 266
 267        :return: Average energy (kcal/mol) between all atoms in the list
 268
 269        """
 270        sslist = self.data
 271        tot = len(sslist)
 272
 273        total = 0.0
 274        for ss1 in sslist:
 275            total += ss1.energy
 276
 277        return total / tot
 278
 279    @property
 280    def Average_Conformation(self):
 281        """
 282        Return the Average conformation for the Disulfides in the list.
 283
 284        :return: Average conformation: [x1, x2, x3, x4, x5]
 285        """
 286
 287        sslist = self.data
 288        tot = len(sslist)
 289
 290        tors = self.build_torsion_df()
 291
 292        res = np.zeros(5)
 293
 294        for ss, i in zip(sslist, range(tot)):
 295            res += ss.torsion_array
 296
 297        return res / tot
 298
 299    def append(self, item):
 300        """
 301        Append the list with item
 302
 303        :param item: Disulfide to add
 304        :type item: Disulfide
 305        """
 306        self.data.append(self.validate_ss(item))
 307
 308    @property
 309    def Average_Resolution(self) -> float:
 310        """
 311        Compute and return the average structure resolution for the given list.
 312
 313        :return: Average resolution (A)
 314        """
 315        res = 0.0
 316        cnt = 1
 317
 318        for ss in self.data:
 319            _res = ss.resolution
 320            if _res is not None and _res != -1.0:
 321                res += _res
 322                cnt += 1
 323        return res / cnt if cnt else -1.0
 324
 325    @property
 326    def Average_Torsion_Distance(self):
 327        """
 328        Return the average distance in torsion space (degrees), between all pairs in the
 329        DisulfideList
 330
 331        :return: Torsion Distance (degrees)
 332        """
 333        sslist = self.data
 334        tot = len(sslist)
 335        total = 0
 336        cnt = 1
 337
 338        for ss1 in sslist:
 339            for ss2 in sslist:
 340                if ss2 == ss1:
 341                    continue
 342                total += ss1.Torsion_Distance(ss2)
 343                cnt += 1
 344
 345        return total / cnt
 346
 347    def build_distance_df(self) -> pandas.DataFrame:
 348        """
 349        Create a dataframe containing the input DisulfideList Cα-Cα distance, energy.
 350        This can take several minutes for the entire database.
 351
 352        :return: DataFrame containing Ca distances
 353        :rtype: pandas.DataFrame
 354        """
 355
 356        # create a dataframe with the following columns for the disulfide
 357        # conformations extracted from the structure
 358
 359        SS_df = pandas.DataFrame(columns=Distance_DF_Cols)
 360        sslist = self.data
 361
 362        pbar = tqdm(sslist, ncols=PBAR_COLS)
 363        for ss in pbar:
 364            new_row = [
 365                ss.pdb_id,
 366                ss.name,
 367                ss.proximal,
 368                ss.distal,
 369                ss.energy,
 370                ss.ca_distance,
 371            ]
 372            # add the row to the end of the dataframe
 373            SS_df.loc[len(SS_df.index)] = new_row
 374
 375        return SS_df
 376
 377    def build_torsion_df(self) -> pandas.DataFrame:
 378        """
 379        Create a dataframe containing the input DisulfideList torsional parameters,
 380        Cα-Cα distance, energy, and phi-psi angles. This can take several minutes for the
 381        entire database.
 382
 383        :param SSList: DisulfideList - input list of Disulfides
 384        :return: pandas.Dataframe containing the torsions
 385        """
 386        # create a dataframe with the following columns for the disulfide
 387        # conformations extracted from the structure
 388
 389        SS_df = pandas.DataFrame(columns=Torsion_DF_Cols)
 390        sslist = self.data
 391        if self.quiet or len(sslist) < 100:
 392            pbar = sslist
 393        else:
 394            pbar = tqdm(sslist, ncols=PBAR_COLS, leave=False)
 395
 396        for ss in pbar:
 397            new_row = [
 398                ss.pdb_id,
 399                ss.name,
 400                ss.proximal,
 401                ss.distal,
 402                ss.chi1,
 403                ss.chi2,
 404                ss.chi3,
 405                ss.chi4,
 406                ss.chi5,
 407                ss.energy,
 408                ss.ca_distance,
 409                ss.cb_distance,
 410                ss.psiprox,
 411                ss.psiprox,
 412                ss.phidist,
 413                ss.psidist,
 414                ss.torsion_length,
 415                ss.rho,
 416            ]
 417            # add the row to the end of the dataframe
 418            SS_df.loc[len(SS_df.index)] = new_row
 419
 420        return SS_df
 421
 422    def by_chain(self, chain: str):
 423        """
 424        Return a DisulfideList from the input chain identifier.
 425
 426        :param chain: chain identifier, 'A', 'B, etc
 427        :return: DisulfideList containing disulfides within that chain.
 428        """
 429
 430        reslist = DisulfideList([], chain)
 431        sslist = self.data
 432
 433        for ss in sslist:
 434            pchain = ss.proximal_chain
 435            dchain = ss.distal_chain
 436            if pchain == dchain:
 437                if pchain == chain:
 438                    reslist.append(ss)
 439            else:
 440                print(f"Cross chain SS: {ss.repr_compact}:")
 441        return reslist
 442
 443    def calculate_torsion_statistics(self):
 444        df = self.build_torsion_df()
 445
 446        df_subset = df.iloc[:, 4:]
 447        df_stats = df_subset.describe()
 448
 449        # print(df_stats.head())
 450
 451        mean_vals = df_stats.loc["mean"].values
 452        std_vals = df_stats.loc["std"].values
 453
 454        tor_cols = ["chi1", "chi2", "chi3", "chi4", "chi5", "torsion_length"]
 455        dist_cols = ["ca_distance", "cb_distance", "energy"]
 456        tor_stats = {}
 457        dist_stats = {}
 458
 459        for col in tor_cols:
 460            tor_stats[col] = {"mean": df[col].mean(), "std": df[col].std()}
 461
 462        for col in dist_cols:
 463            dist_stats[col] = {"mean": df[col].mean(), "std": df[col].std()}
 464
 465        tor_stats = pandas.DataFrame(tor_stats, columns=tor_cols)
 466        dist_stats = pandas.DataFrame(dist_stats, columns=dist_cols)
 467
 468        return tor_stats, dist_stats
 469
 470    def display(self, style="sb", light=True, panelsize=512):
 471        """
 472        Display the Disulfide list in the specific rendering style.
 473
 474        :param single: Display the bond in a single panel in the specific style.
 475        :param style:  Rendering style: One of:\n
 476            - 'sb' - split bonds
 477            - 'bs' - ball and stick
 478            - 'cpk' - CPK style
 479            - 'pd' - Proximal/Distal style - Red=proximal, Green=Distal
 480            - 'plain' - boring single color
 481        :light: If True, light background, if False, dark
 482        """
 483        id = self.pdb_id
 484        ssbonds = self.data
 485        tot_ss = len(ssbonds)  # number off ssbonds
 486        avg_enrg = self.Average_Energy
 487        avg_dist = self.Average_Distance
 488        resolution = self.resolution
 489
 490        if light:
 491            pv.set_plot_theme("document")
 492        else:
 493            pv.set_plot_theme("dark")
 494
 495        title = f"<{id}> {resolution:.2f} Å: ({tot_ss} SS), Avg E: {avg_enrg:.2f} kcal/mol, Avg Dist: {avg_dist:.2f} Å"
 496
 497        pl = pv.Plotter()
 498        pl = self._render(style, panelsize)
 499        pl.enable_anti_aliasing("msaa")
 500        pl.add_title(title=title, font_size=FONTSIZE)
 501        pl.link_views()
 502        pl.reset_camera()
 503        pl.show()
 504
 505    def display_torsion_statistics(
 506        self,
 507        display=True,
 508        save=False,
 509        fname="ss_torsions.png",
 510        stats=False,
 511        light=True,
 512    ):
 513        """
 514        Display torsion and distance statistics for a given Disulfide list.
 515
 516        :param display: Whether to display the plot in the notebook. Default is True.
 517        :type display: bool
 518        :param save: Whether to save the plot as an image file. Default is False.
 519        :type save: bool
 520        :param fname: The name of the image file to save. Default is 'ss_torsions.png'.
 521        :type fname: str
 522        :param stats: Whether to return the DataFrame representing the statistics for `self`. Default is False.
 523        :type stats: bool
 524        :param light: Whether to use the 'plotly_light' or 'plotly_dark' template. Default is True.
 525        :type light: bool
 526        :return: None
 527        """
 528        title = f"{self.id}: {self.length} members"
 529
 530        df = self.torsion_df
 531        df_subset = df.iloc[:, 4:]
 532        df_stats = df_subset.describe()
 533
 534        # print(df_stats.head())
 535
 536        mean_vals = df_stats.loc["mean"].values
 537        std_vals = df_stats.loc["std"].values
 538
 539        fig = make_subplots(
 540            rows=2, cols=2, vertical_spacing=0.125, column_widths=[1, 1]
 541        )
 542        fig.update_layout(template="plotly" if light else "plotly_dark")
 543
 544        fig.update_layout(
 545            title={
 546                "text": title,
 547                "xanchor": "center",
 548                # 'y':.9,
 549                "x": 0.5,
 550                "yanchor": "top",
 551            },
 552            width=1024,
 553            height=1024,
 554        )
 555
 556        fig.add_trace(
 557            go.Bar(
 558                x=["X1", "X2", "X3", "X4", "X5"],
 559                y=mean_vals[:5],
 560                name="Torsion Angle,(°) ",
 561                error_y=dict(type="data", array=std_vals[:5], visible=True),
 562            ),
 563            row=1,
 564            col=1,
 565        )
 566
 567        fig.add_trace(
 568            go.Bar(
 569                x=["rho"],
 570                y=[mean_vals[13]],
 571                name="ρ, (°)",
 572                error_y=dict(type="data", array=[std_vals[13]], visible=True),
 573            ),
 574            row=1,
 575            col=1,
 576        )
 577
 578        # Update the layout of the subplot
 579        # Cα N, Cα, Cβ, C', Sγ Å °
 580
 581        fig.update_yaxes(
 582            title_text="Torsion Angle (°)", range=[-200, 200], row=1, col=1
 583        )
 584        fig.update_yaxes(range=[0, 320], row=2, col=2)
 585
 586        # Add another subplot for the mean values of energy
 587        fig.add_trace(
 588            go.Bar(
 589                x=["Strain Energy (kcal/mol)"],
 590                y=[mean_vals[5]],
 591                name="Energy (kcal/mol)",
 592                error_y=dict(
 593                    type="data",
 594                    array=[std_vals[5].tolist()],
 595                    width=0.25,
 596                    visible=True,
 597                ),
 598            ),
 599            row=1,
 600            col=2,
 601        )
 602        fig.update_traces(width=0.25, row=1, col=2)
 603
 604        # Update the layout of the subplot
 605        # fig.update_xaxes(title_text="Energy", row=1, col=2)
 606        fig.update_yaxes(
 607            title_text="kcal/mol", range=[0, 20], row=1, col=2
 608        )  # max possible DSE
 609
 610        # Add another subplot for the mean values of ca_distance
 611        fig.add_trace(
 612            go.Bar(
 613                x=["Cα Distance, (Å)", "Cβ Distance, (Å)"],
 614                y=[mean_vals[6], mean_vals[7]],
 615                name="Cβ Distance (Å)",
 616                error_y=dict(
 617                    type="data",
 618                    array=[std_vals[6].tolist(), std_vals[7].tolist()],
 619                    width=0.25,
 620                    visible=True,
 621                ),
 622            ),
 623            row=2,
 624            col=1,
 625        )
 626        # Update the layout of the subplot
 627        fig.update_yaxes(title_text="Distance (A)", range=[0, 10], row=2, col=1)  #
 628        fig.update_traces(width=0.25, row=2, col=1)
 629
 630        # Add a scatter subplot for torsion length column
 631        fig.add_trace(
 632            go.Bar(
 633                x=["Torsion Length, (Å)"],
 634                y=[mean_vals[12]],
 635                name="Torsion Length, (Å)",
 636                error_y=dict(
 637                    type="data", array=[std_vals[12]], width=0.25, visible=True
 638                ),
 639            ),
 640            row=2,
 641            col=2,
 642        )
 643        # Update the layout of the subplot
 644        fig.update_yaxes(title_text="Torsion Length", range=[0, 350], row=2, col=2)
 645        fig.update_traces(width=0.25, row=2, col=2)
 646
 647        # Update the error bars
 648        fig.update_traces(
 649            error_y_thickness=2,
 650            error_y_color="gray",
 651            texttemplate="%{y:.2f} ± %{error_y.array:.2f}",
 652            textposition="outside",
 653        )  # , row=1, col=1)
 654
 655        if display:
 656            fig.show()
 657        if save:
 658            fig.write_image(Path(fname))
 659
 660        if stats:
 661            return df_stats
 662
 663        return
 664
 665    @property
 666    def distance_df(self) -> pandas.DataFrame:
 667        """
 668        Build and return the distance dataframe for the input list.
 669        This can take considerable time for the entire list.
 670
 671        :return: Dataframe containing the Cα-Cα distances for the given list.
 672
 673        Example:
 674        >>> from proteusPy import Disulfide, Load_PDB_SS, DisulfideList
 675        >>> PDB_SS = Load_PDB_SS()
 676
 677        """
 678        return self.build_distance_df()
 679
 680    def display_overlay(
 681        self,
 682        screenshot=False,
 683        movie=False,
 684        verbose=True,
 685        fname="ss_overlay.png",
 686        light=True,
 687    ):
 688        """
 689        Display all disulfides in the list overlaid in stick mode against
 690        a common coordinate frames. This allows us to see all of the disulfides
 691        at one time in a single view. Colors vary smoothy between bonds.
 692
 693        :param screenshot: Save a screenshot, defaults to False
 694        :param movie: Save a movie, defaults to False
 695        :param verbose: Verbosity, defaults to True
 696        :param fname: Filename to save for the movie or screenshot, defaults to 'ss_overlay.png'
 697        :param light: Background color, defaults to True for White. False for Dark.
 698        """
 699
 700        id = self.pdb_id
 701        ssbonds = self.data
 702        tot_ss = len(ssbonds)  # number off ssbonds
 703        avg_enrg = self.Average_Energy
 704        avg_dist = self.Average_Distance
 705        resolution = self.resolution
 706
 707        res = 100
 708
 709        if tot_ss > 100:
 710            res = 60
 711        if tot_ss > 200:
 712            res = 30
 713        if tot_ss > 300:
 714            res = 8
 715
 716        title = f"<{id}> {resolution:.2f} Å: ({tot_ss} SS), Avg E: {avg_enrg:.2f} kcal/mol, Avg Dist: {avg_dist:.2f} Å"
 717
 718        if light:
 719            pv.set_plot_theme("document")
 720        else:
 721            pv.set_plot_theme("dark")
 722
 723        if movie:
 724            pl = pv.Plotter(window_size=WINSIZE, off_screen=True)
 725        else:
 726            pl = pv.Plotter(window_size=WINSIZE, off_screen=False)
 727
 728        pl.add_title(title=title, font_size=FONTSIZE)
 729        pl.enable_anti_aliasing("msaa")
 730        # pl.add_camera_orientation_widget()
 731        pl.add_axes()
 732
 733        mycol = np.zeros(shape=(tot_ss, 3))
 734        mycol = get_jet_colormap(tot_ss)
 735
 736        # scale the overlay bond radii down so that we can see the individual elements better
 737        # maximum 90% reduction
 738
 739        brad = BOND_RADIUS if tot_ss < 10 else BOND_RADIUS * 0.75
 740        brad = brad if tot_ss < 25 else brad * 0.8
 741        brad = brad if tot_ss < 50 else brad * 0.8
 742        brad = brad if tot_ss < 100 else brad * 0.6
 743
 744        # print(f'Brad: {brad}')
 745        pbar = tqdm(range(tot_ss), ncols=PBAR_COLS)
 746
 747        for i, ss in zip(pbar, ssbonds):
 748            color = [int(mycol[i][0]), int(mycol[i][1]), int(mycol[i][2])]
 749            ss._render(
 750                pl,
 751                style="plain",
 752                bondcolor=color,
 753                translate=False,
 754                bond_radius=brad,
 755                res=res,
 756            )
 757
 758        pl.reset_camera()
 759
 760        if screenshot:
 761            pl.show(auto_close=False)  # allows for manipulation
 762            pl.screenshot(fname)
 763
 764        elif movie:
 765            if verbose:
 766                print(f" -> display_overlay(): Saving mp4 animation to: {fname}")
 767
 768            pl.open_movie(fname)
 769            path = pl.generate_orbital_path(n_points=360)
 770            pl.orbit_on_path(path, write_frames=True)
 771            pl.close()
 772
 773            if verbose:
 774                print(f" -> display_overlay(): Saved mp4 animation to: {fname}")
 775        else:
 776            pl.show()
 777
 778        return
 779
 780    def extend(self, other):
 781        """
 782        Extend the Disulfide list with other.
 783
 784        :param other: extension
 785        :type item: DisulfideList
 786        """
 787
 788        if isinstance(other, type(self)):
 789            self.data.extend(other)
 790        else:
 791            self.data.extend(self.validate_ss(item) for item in other)
 792
 793    def get_by_name(self, name):
 794        """
 795        Returns the Disulfide with the given name from the list.
 796        """
 797        for ss in self.data:
 798            if ss.name == name:
 799                return ss.copy()  # or ss.copy() !!!
 800        return None
 801
 802    def get_chains(self):
 803        """
 804        Return the chain IDs for chains within the given Disulfide.
 805        :return: Chain IDs for given Disulfide
 806        """
 807
 808        res_dict = {"xxx"}
 809        sslist = self.data
 810
 811        for ss in sslist:
 812            pchain = ss.proximal_chain
 813            dchain = ss.distal_chain
 814            res_dict.update(pchain)
 815            res_dict.update(dchain)
 816
 817        res_dict.remove("xxx")
 818
 819        return res_dict
 820
 821    def get_torsion_array(self):
 822        """
 823        Returns a 2D NumPy array representing the dihedral angles in the given disulfide list.
 824
 825        :return: A 2D NumPy array of shape (n, 5), where n is the number of disulfide bonds in the list. Each row
 826                of the array represents the dihedral angles of a disulfide bond, in the following order:
 827                [X1_i, X2_i, X3_i, X4_i, X5_i], where i is the index of the disulfide bond in the list.
 828        """
 829        sslist = self.data
 830        tot = len(sslist)
 831        res = np.zeros(shape=(tot, 5))
 832
 833        for idx, ss in enumerate(sslist):
 834            row = ss.torsion_array
 835            res[idx, :] = row
 836
 837        return res
 838
 839    def has_chain(self, chain) -> bool:
 840        """
 841        Returns True if given chain contained in Disulfide, False otherwise.
 842        :return: Returns True if given chain contained in Disulfide, False otherwise.
 843        """
 844
 845        chns = {"xxx"}
 846        chns = self.get_chains()
 847        if chain in chns:
 848            return True
 849        else:
 850            return False
 851
 852    @property
 853    def id(self):
 854        """
 855        PDB ID of the list
 856        """
 857        return self.pdb_id
 858
 859    @id.setter
 860    def id(self, value):
 861        """
 862        Set the DisulfideList ID
 863
 864        Parameters
 865        ----------
 866        value : str
 867            List ID
 868        """
 869        self.pdb_id = value
 870
 871    @property
 872    def resolution(self):
 873        """
 874        Resolution of the parent sturcture (A)
 875        """
 876        return self.res
 877
 878    @resolution.setter
 879    def resolution(self, value):
 880        """
 881        Set the resolution of the list
 882
 883        :param value: Resolution (A)
 884        """
 885        self.res = value
 886
 887    def TorsionGraph(
 888        self, display=True, save=False, fname="ss_torsions.png", light=True
 889    ):
 890        # tor_stats, dist_stats = self.calculate_torsion_statistics()
 891        self.display_torsion_statistics(
 892            display=display, save=save, fname=fname, light=light
 893        )
 894
 895    def insert(self, index, item):
 896        """
 897        Insert a Disulfide into the list at the specified index
 898
 899        :param index: insertion point
 900        :type index: int
 901        :param item: Disulfide to insert
 902        :type item: Disulfide
 903        """
 904        self.data.insert(index, self.validate_ss(item))
 905
 906    @property
 907    def length(self):
 908        return len(self.data)
 909
 910    @property
 911    def min(self) -> Disulfide:
 912        """
 913        Return Disulfide from the list with the minimum energy
 914
 915        :return: Disulfide with the minimum energy.
 916        """
 917        sslist = sorted(self.data)
 918        return sslist[0]
 919
 920    @property
 921    def max(self) -> Disulfide:
 922        """
 923        Return Disulfide from the list with the maximum energy
 924
 925        :return: Disulfide with the maximum energy.
 926        """
 927        sslist = sorted(self.data)
 928        return sslist[-1]
 929
 930    def minmax_distance(self):
 931        """
 932        Return the Disulfides with the minimum and
 933        maximum Cα distances in the list.
 934
 935        :return: SSmin, SSmax
 936        """
 937
 938        _min = 99999.9
 939        _max = -99999.9
 940
 941        sslist = self.data
 942        ssmin = 0
 943        ssmax = 0
 944        idx = 0
 945
 946        pbar = tqdm(sslist, ncols=PBAR_COLS)
 947        for ss in pbar:
 948            dist = ss.ca_distance
 949
 950            if dist >= _max:
 951                ssmax = idx
 952                _max = dist
 953
 954            if dist <= _min:
 955                ssmin = idx
 956                _min = dist
 957            idx += 1
 958
 959        return sslist[ssmin], sslist[ssmax]
 960
 961    @property
 962    def minmax_energy(self):
 963        """
 964        Return the Disulfides with the minimum and maximum energies
 965        from the DisulfideList.
 966
 967        :return: Disulfide with the given ID
 968        """
 969        sslist = sorted(self.data)
 970        return sslist[0], sslist[-1]
 971
 972    def nearest_neighbors(
 973        self,
 974        chi1: float,
 975        chi2: float,
 976        chi3: float,
 977        chi4: float,
 978        chi5: float,
 979        cutoff: float,
 980    ):
 981        """
 982        Given a torsional array of chi1-chi5,
 983
 984        :param chi1: Chi1 (degrees)
 985        :param chi2: Chi2 (degrees)
 986        :param chi3: Chi3 (degrees)
 987        :param chi4: Chi4 (degrees)
 988        :param chi5: Chi5 (degrees)
 989        :param cutoff: Distance cutoff, degrees
 990        :return: DisulfideList of neighbors
 991        """
 992
 993        sslist = self.data
 994        modelss = proteusPy.Disulfide("model")
 995
 996        modelss.build_model(chi1, chi2, chi3, chi4, chi5)
 997        res = DisulfideList([], "neighbors")
 998        res = modelss.Torsion_neighbors(sslist, cutoff)
 999
1000        return res
1001
1002    def nearest_neighbors_ss(self, ss, cutoff: float):
1003        """
1004        Given an input Disulfide and overall torsional cutoff, return
1005        the list of Disulfides within the cutoff
1006
1007        :param ss: Disulfide to compare to
1008        :param chi5: Chi5 (degrees)
1009        :param cutoff: Distance cutoff, degrees
1010        :return: DisulfideList of neighbors
1011        """
1012
1013        chi1 = ss.chi1
1014        chi2 = ss.chi2
1015        chi3 = ss.chi3
1016        chi4 = ss.chi4
1017        chi5 = ss.chi5
1018
1019        sslist = self.data
1020        modelss = proteusPy.Disulfide("model")
1021
1022        modelss.build_model(chi1, chi2, chi3, chi4, chi5)
1023        res = DisulfideList([], "neighbors")
1024        res = modelss.Torsion_neighbors(sslist, cutoff)
1025
1026        return res
1027
1028    def pprint(self):
1029        """
1030        Pretty print self.
1031        """
1032        sslist = self.data
1033        for ss in sslist:
1034            ss.pprint()
1035
1036    def pprint_all(self):
1037        """
1038        Pretty print full disulfide descriptions in self.
1039        """
1040        sslist = self.data
1041        for ss in sslist:
1042            ss.pprint_all()
1043
1044    @property
1045    def torsion_df(self):
1046        return self.build_torsion_df()
1047
1048    @property
1049    def torsion_array(self):
1050        return self.get_torsion_array()
1051
1052    def validate_ss(self, value):
1053        return value
1054
1055    # class ends

The class provides a sortable list for Disulfide objects. Indexing and slicing are supported, as well as typical list operations like .insert(), .append() and .extend(). The DisulfideList object must be initialized with an iterable (tuple, list) and a name. Sorting is keyed by torsional energy.

The class can also render Disulfides to a pyVista window using the
[display()](#DisulfideList.display) and [display_overlay()](#DisulfideList.display_overlay)methods.
See below for examples.


Examples:
>>> from proteusPy import Disulfide, DisulfideLoader, DisulfideList, Load_PDB_SS

Instantiate some variables. Note: the list is initialized with an iterable and a name (optional)

>>> SS = Disulfide('tmp')

The list is initialized with an iterable, a name and resolution. Name and resolution
are optional.
>>> SSlist = DisulfideList([],'ss', -1.0)

Load the database.
>>> PDB_SS = Load_PDB_SS(verbose=False, subset=True)

Get the first disulfide via indexing.
>>> SS = PDB_SS[0]

# assert str(SS) == "<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å>"

>>> SS4yys = PDB_SS['4yys']

# assert str(SS4yys) == "[<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å>, <Disulfide 4yys_56A_98A, Source: 4yys, Resolution: 1.35 Å>, <Disulfide 4yys_156A_207A, Source: 4yys, Resolution: 1.35 Å>]"

Make some empty disulfides.
>>> ss1 = Disulfide('ss1')
>>> ss2 = Disulfide('ss2')

Make a DisulfideList containing ss1, named 'tmp'
>>> sslist = DisulfideList([ss1], 'tmp')
>>> sslist.append(ss2)

Extract the first disulfide
>>> ss1 = PDB_SS[0]

# assert str(ss1.pprint_all()) == "<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å

Proximal Chain fullID: <('4yys', 0, 'A', (' ', 22, ' '))> Distal Chain fullID: <('4yys', 0, 'A', (' ', 65, ' '))> Proximal Coordinates: N: Cα: C: O: Cβ: Sγ: Cprev Nnext: Distal Coordinates: N: Cα: C: O: Cβ: Sγ: Cprev Nnext: Proximal Internal Coords: N: Cα: C: O: Cβ: Sγ: Cprev Nnext: Distal Internal Coords: N: Cα: C: O: Cβ: Sγ: Cprev Nnext: Χ1-Χ5: 174.63°, 82.52°, -83.32°, -62.52° -73.83°, 138.89°, 1.70 kcal/mol Cα Distance: 4.50 Å Torsion length: 231.53 deg>"

Get a list of disulfides via slicing
>>> subset = DisulfideList(PDB_SS[0:10],'subset')

Display the subset disulfides overlaid onto the same coordinate frame,
(proximal N, Ca, C').

The disulfides are colored individually to facilitate inspection.

>>> subset.display_overlay()
DisulfideList(iterable, id: str, res=-1.0, quiet=True)
139    def __init__(self, iterable, id: str, res=-1.0, quiet=True):
140        """
141        Initialize the DisulfideList
142
143        :param iterable: an iterable e.g. []
144        :type iterable: iterable
145        :param id: Name for the list
146        :type id: str
147
148        Example:
149        >>> from proteusPy import DisulfideList, Disulfide
150
151        Initialize some empty disulfides.
152        >>> ss1 = Disulfide('ss1')
153        >>> ss2 = Disulfide('ss2')
154        >>> ss3 = Disulfide('ss3')
155
156        Make a list containing the disulfides.
157        >>> sslist = DisulfideList([ss1, ss2], 'sslist')
158        >>> sslist
159        [<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>]
160        >>> sslist.append(ss3)
161        >>> sslist
162        [<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss3, Source: 1egs, Resolution: -1.0 Å>]
163        """
164
165        super().__init__(self.validate_ss(item) for item in iterable)
166
167        self.pdb_id = id
168        self.quiet = quiet
169        total = 0
170        count = 0
171
172        if res == -1:
173            for ss in iterable:
174                if ss.resolution is not None:
175                    total += ss.resolution
176                    count += 1
177            if count != 0:
178                self.res = total / count
179            else:
180                self.res = -1.0
181        else:
182            self.res = res

Initialize the DisulfideList

Parameters
  • iterable: an iterable e.g. []
  • id: Name for the list

Example:

>>> from proteusPy import DisulfideList, Disulfide

Initialize some empty disulfides.

>>> ss1 = Disulfide('ss1')
>>> ss2 = Disulfide('ss2')
>>> ss3 = Disulfide('ss3')

Make a list containing the disulfides.

>>> sslist = DisulfideList([ss1, ss2], 'sslist')
>>> sslist
[<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>]
>>> sslist.append(ss3)
>>> sslist
[<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss3, Source: 1egs, Resolution: -1.0 Å>]
pdb_id
quiet
Average_Distance
240    @property
241    def Average_Distance(self):
242        """
243        Return the Average distance (Å) between the atoms in the list.
244
245        :return: Average distance (Å) between all atoms in the list
246
247        """
248        sslist = self.data
249        tot = len(sslist)
250        cnt = 1
251
252        total = 0.0
253        for ss1 in sslist:
254            for ss2 in sslist:
255                if ss2 == ss1:
256                    continue
257                total += ss1.Distance_RMS(ss2)
258                cnt += 1
259
260        return total / cnt

Return the Average distance (Å) between the atoms in the list.

Returns

Average distance (Å) between all atoms in the list

Average_Energy
262    @property
263    def Average_Energy(self):
264        """
265        Return the Average energy (kcal/mol) for the Disulfides in the list.
266
267        :return: Average energy (kcal/mol) between all atoms in the list
268
269        """
270        sslist = self.data
271        tot = len(sslist)
272
273        total = 0.0
274        for ss1 in sslist:
275            total += ss1.energy
276
277        return total / tot

Return the Average energy (kcal/mol) for the Disulfides in the list.

Returns

Average energy (kcal/mol) between all atoms in the list

Average_Conformation
279    @property
280    def Average_Conformation(self):
281        """
282        Return the Average conformation for the Disulfides in the list.
283
284        :return: Average conformation: [x1, x2, x3, x4, x5]
285        """
286
287        sslist = self.data
288        tot = len(sslist)
289
290        tors = self.build_torsion_df()
291
292        res = np.zeros(5)
293
294        for ss, i in zip(sslist, range(tot)):
295            res += ss.torsion_array
296
297        return res / tot

Return the Average conformation for the Disulfides in the list.

Returns

Average conformation: [x1, x2, x3, x4, x5]

def append(self, item):
299    def append(self, item):
300        """
301        Append the list with item
302
303        :param item: Disulfide to add
304        :type item: Disulfide
305        """
306        self.data.append(self.validate_ss(item))

Append the list with item

Parameters
  • item: Disulfide to add
Average_Resolution: float
308    @property
309    def Average_Resolution(self) -> float:
310        """
311        Compute and return the average structure resolution for the given list.
312
313        :return: Average resolution (A)
314        """
315        res = 0.0
316        cnt = 1
317
318        for ss in self.data:
319            _res = ss.resolution
320            if _res is not None and _res != -1.0:
321                res += _res
322                cnt += 1
323        return res / cnt if cnt else -1.0

Compute and return the average structure resolution for the given list.

Returns

Average resolution (A)

Average_Torsion_Distance
325    @property
326    def Average_Torsion_Distance(self):
327        """
328        Return the average distance in torsion space (degrees), between all pairs in the
329        DisulfideList
330
331        :return: Torsion Distance (degrees)
332        """
333        sslist = self.data
334        tot = len(sslist)
335        total = 0
336        cnt = 1
337
338        for ss1 in sslist:
339            for ss2 in sslist:
340                if ss2 == ss1:
341                    continue
342                total += ss1.Torsion_Distance(ss2)
343                cnt += 1
344
345        return total / cnt

Return the average distance in torsion space (degrees), between all pairs in the DisulfideList

Returns

Torsion Distance (degrees)

def build_distance_df(self) -> pandas.core.frame.DataFrame:
347    def build_distance_df(self) -> pandas.DataFrame:
348        """
349        Create a dataframe containing the input DisulfideList Cα-Cα distance, energy.
350        This can take several minutes for the entire database.
351
352        :return: DataFrame containing Ca distances
353        :rtype: pandas.DataFrame
354        """
355
356        # create a dataframe with the following columns for the disulfide
357        # conformations extracted from the structure
358
359        SS_df = pandas.DataFrame(columns=Distance_DF_Cols)
360        sslist = self.data
361
362        pbar = tqdm(sslist, ncols=PBAR_COLS)
363        for ss in pbar:
364            new_row = [
365                ss.pdb_id,
366                ss.name,
367                ss.proximal,
368                ss.distal,
369                ss.energy,
370                ss.ca_distance,
371            ]
372            # add the row to the end of the dataframe
373            SS_df.loc[len(SS_df.index)] = new_row
374
375        return SS_df

Create a dataframe containing the input DisulfideList Cα-Cα distance, energy. This can take several minutes for the entire database.

Returns

DataFrame containing Ca distances

def build_torsion_df(self) -> pandas.core.frame.DataFrame:
377    def build_torsion_df(self) -> pandas.DataFrame:
378        """
379        Create a dataframe containing the input DisulfideList torsional parameters,
380        Cα-Cα distance, energy, and phi-psi angles. This can take several minutes for the
381        entire database.
382
383        :param SSList: DisulfideList - input list of Disulfides
384        :return: pandas.Dataframe containing the torsions
385        """
386        # create a dataframe with the following columns for the disulfide
387        # conformations extracted from the structure
388
389        SS_df = pandas.DataFrame(columns=Torsion_DF_Cols)
390        sslist = self.data
391        if self.quiet or len(sslist) < 100:
392            pbar = sslist
393        else:
394            pbar = tqdm(sslist, ncols=PBAR_COLS, leave=False)
395
396        for ss in pbar:
397            new_row = [
398                ss.pdb_id,
399                ss.name,
400                ss.proximal,
401                ss.distal,
402                ss.chi1,
403                ss.chi2,
404                ss.chi3,
405                ss.chi4,
406                ss.chi5,
407                ss.energy,
408                ss.ca_distance,
409                ss.cb_distance,
410                ss.psiprox,
411                ss.psiprox,
412                ss.phidist,
413                ss.psidist,
414                ss.torsion_length,
415                ss.rho,
416            ]
417            # add the row to the end of the dataframe
418            SS_df.loc[len(SS_df.index)] = new_row
419
420        return SS_df

Create a dataframe containing the input DisulfideList torsional parameters, Cα-Cα distance, energy, and phi-psi angles. This can take several minutes for the entire database.

Parameters
  • SSList: DisulfideList - input list of Disulfides
Returns

pandas.Dataframe containing the torsions

def by_chain(self, chain: str):
422    def by_chain(self, chain: str):
423        """
424        Return a DisulfideList from the input chain identifier.
425
426        :param chain: chain identifier, 'A', 'B, etc
427        :return: DisulfideList containing disulfides within that chain.
428        """
429
430        reslist = DisulfideList([], chain)
431        sslist = self.data
432
433        for ss in sslist:
434            pchain = ss.proximal_chain
435            dchain = ss.distal_chain
436            if pchain == dchain:
437                if pchain == chain:
438                    reslist.append(ss)
439            else:
440                print(f"Cross chain SS: {ss.repr_compact}:")
441        return reslist

Return a DisulfideList from the input chain identifier.

Parameters
  • chain: chain identifier, 'A', 'B, etc
Returns

DisulfideList containing disulfides within that chain.

def calculate_torsion_statistics(self):
443    def calculate_torsion_statistics(self):
444        df = self.build_torsion_df()
445
446        df_subset = df.iloc[:, 4:]
447        df_stats = df_subset.describe()
448
449        # print(df_stats.head())
450
451        mean_vals = df_stats.loc["mean"].values
452        std_vals = df_stats.loc["std"].values
453
454        tor_cols = ["chi1", "chi2", "chi3", "chi4", "chi5", "torsion_length"]
455        dist_cols = ["ca_distance", "cb_distance", "energy"]
456        tor_stats = {}
457        dist_stats = {}
458
459        for col in tor_cols:
460            tor_stats[col] = {"mean": df[col].mean(), "std": df[col].std()}
461
462        for col in dist_cols:
463            dist_stats[col] = {"mean": df[col].mean(), "std": df[col].std()}
464
465        tor_stats = pandas.DataFrame(tor_stats, columns=tor_cols)
466        dist_stats = pandas.DataFrame(dist_stats, columns=dist_cols)
467
468        return tor_stats, dist_stats
def display(self, style='sb', light=True, panelsize=512):
470    def display(self, style="sb", light=True, panelsize=512):
471        """
472        Display the Disulfide list in the specific rendering style.
473
474        :param single: Display the bond in a single panel in the specific style.
475        :param style:  Rendering style: One of:\n
476            - 'sb' - split bonds
477            - 'bs' - ball and stick
478            - 'cpk' - CPK style
479            - 'pd' - Proximal/Distal style - Red=proximal, Green=Distal
480            - 'plain' - boring single color
481        :light: If True, light background, if False, dark
482        """
483        id = self.pdb_id
484        ssbonds = self.data
485        tot_ss = len(ssbonds)  # number off ssbonds
486        avg_enrg = self.Average_Energy
487        avg_dist = self.Average_Distance
488        resolution = self.resolution
489
490        if light:
491            pv.set_plot_theme("document")
492        else:
493            pv.set_plot_theme("dark")
494
495        title = f"<{id}> {resolution:.2f} Å: ({tot_ss} SS), Avg E: {avg_enrg:.2f} kcal/mol, Avg Dist: {avg_dist:.2f} Å"
496
497        pl = pv.Plotter()
498        pl = self._render(style, panelsize)
499        pl.enable_anti_aliasing("msaa")
500        pl.add_title(title=title, font_size=FONTSIZE)
501        pl.link_views()
502        pl.reset_camera()
503        pl.show()

Display the Disulfide list in the specific rendering style.

Parameters
  • single: Display the bond in a single panel in the specific style.
  • style: Rendering style: One of:

    • 'sb' - split bonds
    • 'bs' - ball and stick
    • 'cpk' - CPK style
    • 'pd' - Proximal/Distal style - Red=proximal, Green=Distal
    • 'plain' - boring single color :light: If True, light background, if False, dark
def display_torsion_statistics( self, display=True, save=False, fname='ss_torsions.png', stats=False, light=True):
505    def display_torsion_statistics(
506        self,
507        display=True,
508        save=False,
509        fname="ss_torsions.png",
510        stats=False,
511        light=True,
512    ):
513        """
514        Display torsion and distance statistics for a given Disulfide list.
515
516        :param display: Whether to display the plot in the notebook. Default is True.
517        :type display: bool
518        :param save: Whether to save the plot as an image file. Default is False.
519        :type save: bool
520        :param fname: The name of the image file to save. Default is 'ss_torsions.png'.
521        :type fname: str
522        :param stats: Whether to return the DataFrame representing the statistics for `self`. Default is False.
523        :type stats: bool
524        :param light: Whether to use the 'plotly_light' or 'plotly_dark' template. Default is True.
525        :type light: bool
526        :return: None
527        """
528        title = f"{self.id}: {self.length} members"
529
530        df = self.torsion_df
531        df_subset = df.iloc[:, 4:]
532        df_stats = df_subset.describe()
533
534        # print(df_stats.head())
535
536        mean_vals = df_stats.loc["mean"].values
537        std_vals = df_stats.loc["std"].values
538
539        fig = make_subplots(
540            rows=2, cols=2, vertical_spacing=0.125, column_widths=[1, 1]
541        )
542        fig.update_layout(template="plotly" if light else "plotly_dark")
543
544        fig.update_layout(
545            title={
546                "text": title,
547                "xanchor": "center",
548                # 'y':.9,
549                "x": 0.5,
550                "yanchor": "top",
551            },
552            width=1024,
553            height=1024,
554        )
555
556        fig.add_trace(
557            go.Bar(
558                x=["X1", "X2", "X3", "X4", "X5"],
559                y=mean_vals[:5],
560                name="Torsion Angle,(°) ",
561                error_y=dict(type="data", array=std_vals[:5], visible=True),
562            ),
563            row=1,
564            col=1,
565        )
566
567        fig.add_trace(
568            go.Bar(
569                x=["rho"],
570                y=[mean_vals[13]],
571                name="ρ, (°)",
572                error_y=dict(type="data", array=[std_vals[13]], visible=True),
573            ),
574            row=1,
575            col=1,
576        )
577
578        # Update the layout of the subplot
579        # Cα N, Cα, Cβ, C', Sγ Å °
580
581        fig.update_yaxes(
582            title_text="Torsion Angle (°)", range=[-200, 200], row=1, col=1
583        )
584        fig.update_yaxes(range=[0, 320], row=2, col=2)
585
586        # Add another subplot for the mean values of energy
587        fig.add_trace(
588            go.Bar(
589                x=["Strain Energy (kcal/mol)"],
590                y=[mean_vals[5]],
591                name="Energy (kcal/mol)",
592                error_y=dict(
593                    type="data",
594                    array=[std_vals[5].tolist()],
595                    width=0.25,
596                    visible=True,
597                ),
598            ),
599            row=1,
600            col=2,
601        )
602        fig.update_traces(width=0.25, row=1, col=2)
603
604        # Update the layout of the subplot
605        # fig.update_xaxes(title_text="Energy", row=1, col=2)
606        fig.update_yaxes(
607            title_text="kcal/mol", range=[0, 20], row=1, col=2
608        )  # max possible DSE
609
610        # Add another subplot for the mean values of ca_distance
611        fig.add_trace(
612            go.Bar(
613                x=["Cα Distance, (Å)", "Cβ Distance, (Å)"],
614                y=[mean_vals[6], mean_vals[7]],
615                name="Cβ Distance (Å)",
616                error_y=dict(
617                    type="data",
618                    array=[std_vals[6].tolist(), std_vals[7].tolist()],
619                    width=0.25,
620                    visible=True,
621                ),
622            ),
623            row=2,
624            col=1,
625        )
626        # Update the layout of the subplot
627        fig.update_yaxes(title_text="Distance (A)", range=[0, 10], row=2, col=1)  #
628        fig.update_traces(width=0.25, row=2, col=1)
629
630        # Add a scatter subplot for torsion length column
631        fig.add_trace(
632            go.Bar(
633                x=["Torsion Length, (Å)"],
634                y=[mean_vals[12]],
635                name="Torsion Length, (Å)",
636                error_y=dict(
637                    type="data", array=[std_vals[12]], width=0.25, visible=True
638                ),
639            ),
640            row=2,
641            col=2,
642        )
643        # Update the layout of the subplot
644        fig.update_yaxes(title_text="Torsion Length", range=[0, 350], row=2, col=2)
645        fig.update_traces(width=0.25, row=2, col=2)
646
647        # Update the error bars
648        fig.update_traces(
649            error_y_thickness=2,
650            error_y_color="gray",
651            texttemplate="%{y:.2f} ± %{error_y.array:.2f}",
652            textposition="outside",
653        )  # , row=1, col=1)
654
655        if display:
656            fig.show()
657        if save:
658            fig.write_image(Path(fname))
659
660        if stats:
661            return df_stats
662
663        return

Display torsion and distance statistics for a given Disulfide list.

Parameters
  • display: Whether to display the plot in the notebook. Default is True.
  • save: Whether to save the plot as an image file. Default is False.
  • fname: The name of the image file to save. Default is 'ss_torsions.png'.
  • stats: Whether to return the DataFrame representing the statistics for self. Default is False.
  • light: Whether to use the 'plotly_light' or 'plotly_dark' template. Default is True.
Returns

None

distance_df: pandas.core.frame.DataFrame
665    @property
666    def distance_df(self) -> pandas.DataFrame:
667        """
668        Build and return the distance dataframe for the input list.
669        This can take considerable time for the entire list.
670
671        :return: Dataframe containing the Cα-Cα distances for the given list.
672
673        Example:
674        >>> from proteusPy import Disulfide, Load_PDB_SS, DisulfideList
675        >>> PDB_SS = Load_PDB_SS()
676
677        """
678        return self.build_distance_df()

Build and return the distance dataframe for the input list. This can take considerable time for the entire list.

Returns

Dataframe containing the Cα-Cα distances for the given list.

Example:

>>> from proteusPy import Disulfide, Load_PDB_SS, DisulfideList
>>> PDB_SS = Load_PDB_SS()
def display_overlay( self, screenshot=False, movie=False, verbose=True, fname='ss_overlay.png', light=True):
680    def display_overlay(
681        self,
682        screenshot=False,
683        movie=False,
684        verbose=True,
685        fname="ss_overlay.png",
686        light=True,
687    ):
688        """
689        Display all disulfides in the list overlaid in stick mode against
690        a common coordinate frames. This allows us to see all of the disulfides
691        at one time in a single view. Colors vary smoothy between bonds.
692
693        :param screenshot: Save a screenshot, defaults to False
694        :param movie: Save a movie, defaults to False
695        :param verbose: Verbosity, defaults to True
696        :param fname: Filename to save for the movie or screenshot, defaults to 'ss_overlay.png'
697        :param light: Background color, defaults to True for White. False for Dark.
698        """
699
700        id = self.pdb_id
701        ssbonds = self.data
702        tot_ss = len(ssbonds)  # number off ssbonds
703        avg_enrg = self.Average_Energy
704        avg_dist = self.Average_Distance
705        resolution = self.resolution
706
707        res = 100
708
709        if tot_ss > 100:
710            res = 60
711        if tot_ss > 200:
712            res = 30
713        if tot_ss > 300:
714            res = 8
715
716        title = f"<{id}> {resolution:.2f} Å: ({tot_ss} SS), Avg E: {avg_enrg:.2f} kcal/mol, Avg Dist: {avg_dist:.2f} Å"
717
718        if light:
719            pv.set_plot_theme("document")
720        else:
721            pv.set_plot_theme("dark")
722
723        if movie:
724            pl = pv.Plotter(window_size=WINSIZE, off_screen=True)
725        else:
726            pl = pv.Plotter(window_size=WINSIZE, off_screen=False)
727
728        pl.add_title(title=title, font_size=FONTSIZE)
729        pl.enable_anti_aliasing("msaa")
730        # pl.add_camera_orientation_widget()
731        pl.add_axes()
732
733        mycol = np.zeros(shape=(tot_ss, 3))
734        mycol = get_jet_colormap(tot_ss)
735
736        # scale the overlay bond radii down so that we can see the individual elements better
737        # maximum 90% reduction
738
739        brad = BOND_RADIUS if tot_ss < 10 else BOND_RADIUS * 0.75
740        brad = brad if tot_ss < 25 else brad * 0.8
741        brad = brad if tot_ss < 50 else brad * 0.8
742        brad = brad if tot_ss < 100 else brad * 0.6
743
744        # print(f'Brad: {brad}')
745        pbar = tqdm(range(tot_ss), ncols=PBAR_COLS)
746
747        for i, ss in zip(pbar, ssbonds):
748            color = [int(mycol[i][0]), int(mycol[i][1]), int(mycol[i][2])]
749            ss._render(
750                pl,
751                style="plain",
752                bondcolor=color,
753                translate=False,
754                bond_radius=brad,
755                res=res,
756            )
757
758        pl.reset_camera()
759
760        if screenshot:
761            pl.show(auto_close=False)  # allows for manipulation
762            pl.screenshot(fname)
763
764        elif movie:
765            if verbose:
766                print(f" -> display_overlay(): Saving mp4 animation to: {fname}")
767
768            pl.open_movie(fname)
769            path = pl.generate_orbital_path(n_points=360)
770            pl.orbit_on_path(path, write_frames=True)
771            pl.close()
772
773            if verbose:
774                print(f" -> display_overlay(): Saved mp4 animation to: {fname}")
775        else:
776            pl.show()
777
778        return

Display all disulfides in the list overlaid in stick mode against a common coordinate frames. This allows us to see all of the disulfides at one time in a single view. Colors vary smoothy between bonds.

Parameters
  • screenshot: Save a screenshot, defaults to False
  • movie: Save a movie, defaults to False
  • verbose: Verbosity, defaults to True
  • fname: Filename to save for the movie or screenshot, defaults to 'ss_overlay.png'
  • light: Background color, defaults to True for White. False for Dark.
def extend(self, other):
780    def extend(self, other):
781        """
782        Extend the Disulfide list with other.
783
784        :param other: extension
785        :type item: DisulfideList
786        """
787
788        if isinstance(other, type(self)):
789            self.data.extend(other)
790        else:
791            self.data.extend(self.validate_ss(item) for item in other)

Extend the Disulfide list with other.

Parameters
  • other: extension
def get_by_name(self, name):
793    def get_by_name(self, name):
794        """
795        Returns the Disulfide with the given name from the list.
796        """
797        for ss in self.data:
798            if ss.name == name:
799                return ss.copy()  # or ss.copy() !!!
800        return None

Returns the Disulfide with the given name from the list.

def get_chains(self):
802    def get_chains(self):
803        """
804        Return the chain IDs for chains within the given Disulfide.
805        :return: Chain IDs for given Disulfide
806        """
807
808        res_dict = {"xxx"}
809        sslist = self.data
810
811        for ss in sslist:
812            pchain = ss.proximal_chain
813            dchain = ss.distal_chain
814            res_dict.update(pchain)
815            res_dict.update(dchain)
816
817        res_dict.remove("xxx")
818
819        return res_dict

Return the chain IDs for chains within the given Disulfide.

Returns

Chain IDs for given Disulfide

def get_torsion_array(self):
821    def get_torsion_array(self):
822        """
823        Returns a 2D NumPy array representing the dihedral angles in the given disulfide list.
824
825        :return: A 2D NumPy array of shape (n, 5), where n is the number of disulfide bonds in the list. Each row
826                of the array represents the dihedral angles of a disulfide bond, in the following order:
827                [X1_i, X2_i, X3_i, X4_i, X5_i], where i is the index of the disulfide bond in the list.
828        """
829        sslist = self.data
830        tot = len(sslist)
831        res = np.zeros(shape=(tot, 5))
832
833        for idx, ss in enumerate(sslist):
834            row = ss.torsion_array
835            res[idx, :] = row
836
837        return res

Returns a 2D NumPy array representing the dihedral angles in the given disulfide list.

Returns

A 2D NumPy array of shape (n, 5), where n is the number of disulfide bonds in the list. Each row of the array represents the dihedral angles of a disulfide bond, in the following order: [X1_i, X2_i, X3_i, X4_i, X5_i], where i is the index of the disulfide bond in the list.

def has_chain(self, chain) -> bool:
839    def has_chain(self, chain) -> bool:
840        """
841        Returns True if given chain contained in Disulfide, False otherwise.
842        :return: Returns True if given chain contained in Disulfide, False otherwise.
843        """
844
845        chns = {"xxx"}
846        chns = self.get_chains()
847        if chain in chns:
848            return True
849        else:
850            return False

Returns True if given chain contained in Disulfide, False otherwise.

Returns

Returns True if given chain contained in Disulfide, False otherwise.

id
852    @property
853    def id(self):
854        """
855        PDB ID of the list
856        """
857        return self.pdb_id

PDB ID of the list

resolution
871    @property
872    def resolution(self):
873        """
874        Resolution of the parent sturcture (A)
875        """
876        return self.res

Resolution of the parent sturcture (A)

def TorsionGraph(self, display=True, save=False, fname='ss_torsions.png', light=True):
887    def TorsionGraph(
888        self, display=True, save=False, fname="ss_torsions.png", light=True
889    ):
890        # tor_stats, dist_stats = self.calculate_torsion_statistics()
891        self.display_torsion_statistics(
892            display=display, save=save, fname=fname, light=light
893        )
def insert(self, index, item):
895    def insert(self, index, item):
896        """
897        Insert a Disulfide into the list at the specified index
898
899        :param index: insertion point
900        :type index: int
901        :param item: Disulfide to insert
902        :type item: Disulfide
903        """
904        self.data.insert(index, self.validate_ss(item))

Insert a Disulfide into the list at the specified index

Parameters
  • index: insertion point
  • item: Disulfide to insert
length
906    @property
907    def length(self):
908        return len(self.data)
min: <module 'proteusPy.Disulfide' from '/Users/egs/repos/proteusPy/proteusPy/Disulfide.py'>
910    @property
911    def min(self) -> Disulfide:
912        """
913        Return Disulfide from the list with the minimum energy
914
915        :return: Disulfide with the minimum energy.
916        """
917        sslist = sorted(self.data)
918        return sslist[0]

Return Disulfide from the list with the minimum energy

Returns

Disulfide with the minimum energy.

max: <module 'proteusPy.Disulfide' from '/Users/egs/repos/proteusPy/proteusPy/Disulfide.py'>
920    @property
921    def max(self) -> Disulfide:
922        """
923        Return Disulfide from the list with the maximum energy
924
925        :return: Disulfide with the maximum energy.
926        """
927        sslist = sorted(self.data)
928        return sslist[-1]

Return Disulfide from the list with the maximum energy

Returns

Disulfide with the maximum energy.

def minmax_distance(self):
930    def minmax_distance(self):
931        """
932        Return the Disulfides with the minimum and
933        maximum Cα distances in the list.
934
935        :return: SSmin, SSmax
936        """
937
938        _min = 99999.9
939        _max = -99999.9
940
941        sslist = self.data
942        ssmin = 0
943        ssmax = 0
944        idx = 0
945
946        pbar = tqdm(sslist, ncols=PBAR_COLS)
947        for ss in pbar:
948            dist = ss.ca_distance
949
950            if dist >= _max:
951                ssmax = idx
952                _max = dist
953
954            if dist <= _min:
955                ssmin = idx
956                _min = dist
957            idx += 1
958
959        return sslist[ssmin], sslist[ssmax]

Return the Disulfides with the minimum and maximum Cα distances in the list.

Returns

SSmin, SSmax

minmax_energy
961    @property
962    def minmax_energy(self):
963        """
964        Return the Disulfides with the minimum and maximum energies
965        from the DisulfideList.
966
967        :return: Disulfide with the given ID
968        """
969        sslist = sorted(self.data)
970        return sslist[0], sslist[-1]

Return the Disulfides with the minimum and maximum energies from the DisulfideList.

Returns

Disulfide with the given ID

def nearest_neighbors( self, chi1: float, chi2: float, chi3: float, chi4: float, chi5: float, cutoff: float):
 972    def nearest_neighbors(
 973        self,
 974        chi1: float,
 975        chi2: float,
 976        chi3: float,
 977        chi4: float,
 978        chi5: float,
 979        cutoff: float,
 980    ):
 981        """
 982        Given a torsional array of chi1-chi5,
 983
 984        :param chi1: Chi1 (degrees)
 985        :param chi2: Chi2 (degrees)
 986        :param chi3: Chi3 (degrees)
 987        :param chi4: Chi4 (degrees)
 988        :param chi5: Chi5 (degrees)
 989        :param cutoff: Distance cutoff, degrees
 990        :return: DisulfideList of neighbors
 991        """
 992
 993        sslist = self.data
 994        modelss = proteusPy.Disulfide("model")
 995
 996        modelss.build_model(chi1, chi2, chi3, chi4, chi5)
 997        res = DisulfideList([], "neighbors")
 998        res = modelss.Torsion_neighbors(sslist, cutoff)
 999
1000        return res

Given a torsional array of chi1-chi5,

Parameters
  • chi1: Chi1 (degrees)
  • chi2: Chi2 (degrees)
  • chi3: Chi3 (degrees)
  • chi4: Chi4 (degrees)
  • chi5: Chi5 (degrees)
  • cutoff: Distance cutoff, degrees
Returns

DisulfideList of neighbors

def nearest_neighbors_ss(self, ss, cutoff: float):
1002    def nearest_neighbors_ss(self, ss, cutoff: float):
1003        """
1004        Given an input Disulfide and overall torsional cutoff, return
1005        the list of Disulfides within the cutoff
1006
1007        :param ss: Disulfide to compare to
1008        :param chi5: Chi5 (degrees)
1009        :param cutoff: Distance cutoff, degrees
1010        :return: DisulfideList of neighbors
1011        """
1012
1013        chi1 = ss.chi1
1014        chi2 = ss.chi2
1015        chi3 = ss.chi3
1016        chi4 = ss.chi4
1017        chi5 = ss.chi5
1018
1019        sslist = self.data
1020        modelss = proteusPy.Disulfide("model")
1021
1022        modelss.build_model(chi1, chi2, chi3, chi4, chi5)
1023        res = DisulfideList([], "neighbors")
1024        res = modelss.Torsion_neighbors(sslist, cutoff)
1025
1026        return res

Given an input Disulfide and overall torsional cutoff, return the list of Disulfides within the cutoff

Parameters
  • ss: Disulfide to compare to
  • chi5: Chi5 (degrees)
  • cutoff: Distance cutoff, degrees
Returns

DisulfideList of neighbors

def pprint(self):
1028    def pprint(self):
1029        """
1030        Pretty print self.
1031        """
1032        sslist = self.data
1033        for ss in sslist:
1034            ss.pprint()

Pretty print self.

def pprint_all(self):
1036    def pprint_all(self):
1037        """
1038        Pretty print full disulfide descriptions in self.
1039        """
1040        sslist = self.data
1041        for ss in sslist:
1042            ss.pprint_all()

Pretty print full disulfide descriptions in self.

torsion_df
1044    @property
1045    def torsion_df(self):
1046        return self.build_torsion_df()
torsion_array
1048    @property
1049    def torsion_array(self):
1050        return self.get_torsion_array()
def validate_ss(self, value):
1052    def validate_ss(self, value):
1053        return value
Inherited Members
collections.UserList
data
pop
remove
clear
copy
count
index
reverse
sort
def load_disulfides_from_id( struct_name: str, pdb_dir='/Users/egs/PDB/good/', model_numb=0, verbose=False, quiet=False, dbg=False) -> DisulfideList:
1062def load_disulfides_from_id(
1063    struct_name: str,
1064    pdb_dir=MODEL_DIR,
1065    model_numb=0,
1066    verbose=False,
1067    quiet=False,
1068    dbg=False,
1069) -> DisulfideList:
1070    """
1071    Loads the Disulfides by PDB ID and returns a ```DisulfideList``` of Disulfide objects.
1072    Assumes the file is downloaded in the pdb_dir path.
1073
1074    *NB:* Requires EGS-Modified BIO.parse_pdb_header.py from https://github.com/suchanek/biopython
1075
1076    :param struct_name: the name of the PDB entry.
1077    :param pdb_dir: path to the PDB files, defaults to MODEL_DIR - this is: PDB_DIR/good and are
1078    the pre-parsed PDB files that have been scanned by the DisulfideDownloader program.
1079    :param model_numb: model number to use, defaults to 0 for single structure files.
1080    :param verbose: print info while parsing
1081    :return: a list of Disulfide objects initialized from the file.
1082
1083    Example:
1084
1085    PDB_DIR defaults to os.getenv('PDB').
1086    To load the Disulfides from the PDB ID 5rsa we'd use the following:
1087
1088    >>> from proteusPy import DisulfideList, load_disulfides_from_id
1089    >>> SSlist = DisulfideList([],'5rsa')
1090    >>> SSlist = load_disulfides_from_id('5rsa', verbose=False)
1091    >>> SSlist
1092    [<Disulfide 5rsa_26A_84A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_40A_95A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_58A_110A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_65A_72A, Source: 5rsa, Resolution: 2.0 Å>]
1093    """
1094    import copy
1095    import warnings
1096
1097    from proteusPy.Disulfide import Disulfide, parse_ssbond_header_rec
1098
1099    i = 1
1100    proximal = distal = -1
1101    resolution = -1.0
1102
1103    _chaina = None
1104    _chainb = None
1105
1106    parser = PDBParser(PERMISSIVE=True)
1107
1108    # Biopython uses the Structure -> Model -> Chain hierarchy to organize
1109    # structures. All are iterable.
1110
1111    structure = parser.get_structure(struct_name, file=f"{pdb_dir}pdb{struct_name}.ent")
1112    model = structure[model_numb]
1113
1114    if verbose:
1115        print(f"-> load_disulfide_from_id() - Parsing structure: {struct_name}:")
1116
1117    ssbond_dict = structure.header["ssbond"]  # NB: this requires the modified code
1118    resolution = structure.header["resolution"]
1119
1120    SSList = DisulfideList([], struct_name, resolution)
1121
1122    # list of tuples with (proximal distal chaina chainb)
1123    ssbonds = parse_ssbond_header_rec(ssbond_dict)
1124
1125    with warnings.catch_warnings():
1126        if quiet:
1127            warnings.filterwarnings("ignore")
1128        for pair in ssbonds:
1129            # in the form (proximal, distal, chain)
1130            proximal = pair[0]
1131            distal = pair[1]
1132            chain1_id = pair[2]
1133            chain2_id = pair[3]
1134
1135            if not proximal.isnumeric() or not distal.isnumeric():
1136                mess = f" -> load_disulfides_from_id(): Cannot parse SSBond record (non-numeric IDs):\
1137                {struct_name} Prox: {proximal} {chain1_id} Dist: {distal} {chain2_id}, ignoring."
1138                warnings.warn(mess, DisulfideConstructionWarning)
1139                continue
1140            else:
1141                proximal = int(proximal)
1142                distal = int(distal)
1143
1144            if proximal == distal:
1145                mess = f" -> load_disulfides_from_id(): Cannot parse SSBond record (proximal == distal):\
1146                {struct_name} Prox: {proximal} {chain1_id} Dist: {distal} {chain2_id}, ignoring."
1147                warnings.warn(mess, DisulfideConstructionWarning)
1148                continue
1149
1150            _chaina = model[chain1_id]
1151            _chainb = model[chain2_id]
1152
1153            if (_chaina is None) or (_chainb is None):
1154                mess = f" -> load_disulfides_from_id(): NULL chain(s): {struct_name}: {proximal} {chain1_id}\
1155                - {distal} {chain2_id}, ignoring!"
1156                warnings.warn(mess, DisulfideConstructionWarning)
1157                continue
1158
1159            if chain1_id != chain2_id:
1160                if verbose:
1161                    mess = f" -> load_disulfides_from_id(): Cross Chain SS for: Prox: {proximal} {chain1_id}\
1162                    Dist: {distal} {chain2_id}"
1163                    warnings.warn(mess, DisulfideConstructionWarning)
1164                pass  # was break
1165
1166            try:
1167                prox_res = _chaina[proximal]
1168                dist_res = _chainb[distal]
1169
1170            except KeyError:
1171                mess = f"Cannot parse SSBond record (KeyError): {struct_name} Prox:\
1172                {proximal} {chain1_id} Dist: {distal} {chain2_id}, ignoring!"
1173                warnings.warn(mess, DisulfideConstructionWarning)
1174                continue
1175
1176            # make a new Disulfide object, name them based on proximal and distal
1177            # initialize SS bond from the proximal, distal coordinates
1178
1179            if _chaina[proximal].is_disordered() or _chainb[distal].is_disordered():
1180                mess = f" -> load_disulfides_from_id(): Disordered chain(s): {struct_name}: {proximal} {chain1_id}\
1181                - {distal} {chain2_id}, ignoring!"
1182                warnings.warn(mess, DisulfideConstructionWarning)
1183                continue
1184            else:
1185                if verbose:
1186                    print(
1187                        f" -> load_disulfides_from_id(): SSBond: {i}: {struct_name}: {proximal} {chain1_id}\
1188                    - {distal} {chain2_id}"
1189                    )
1190                ssbond_name = f"{struct_name}_{proximal}{chain1_id}_{distal}{chain2_id}"
1191                new_ss = Disulfide(ssbond_name)
1192                new_ss.initialize_disulfide_from_chain(
1193                    _chaina, _chainb, proximal, distal, resolution, quiet=quiet
1194                )
1195                SSList.append(new_ss)
1196        i += 1
1197    return copy.deepcopy(SSList)

Loads the Disulfides by PDB ID and returns a DisulfideList of Disulfide objects. Assumes the file is downloaded in the pdb_dir path.

NB: Requires EGS-Modified BIO.parse_pdb_header.py from https://github.com/suchanek/biopython

Parameters
  • struct_name: the name of the PDB entry.
  • pdb_dir: path to the PDB files, defaults to MODEL_DIR - this is: PDB_DIR/good and are the pre-parsed PDB files that have been scanned by the DisulfideDownloader program.
  • model_numb: model number to use, defaults to 0 for single structure files.
  • verbose: print info while parsing
Returns

a list of Disulfide objects initialized from the file.

Example:

PDB_DIR defaults to os.getenv('PDB'). To load the Disulfides from the PDB ID 5rsa we'd use the following:

>>> from proteusPy import DisulfideList, load_disulfides_from_id
>>> SSlist = DisulfideList([],'5rsa')
>>> SSlist = load_disulfides_from_id('5rsa', verbose=False)
>>> SSlist
[<Disulfide 5rsa_26A_84A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_40A_95A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_58A_110A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_65A_72A, Source: 5rsa, Resolution: 2.0 Å>]