proteusPy.DisulfideList

This module is part of the proteusPy package, a Python package for the analysis and modeling of protein structures, with an emphasis on disulfide bonds. This work is based on the original C/C++ implementation by Eric G. Suchanek.

The module provides the implmentation and interface for the DisulfideList object, used extensively by Disulfide class.

Author: Eric G. Suchanek, PhD Last revision: 2025-01-15 00:09:24 -egs-

   1"""
   2This module is part of the proteusPy package, a Python package for 
   3the analysis and modeling of protein structures, with an emphasis on disulfide bonds.
   4This work is based on the original C/C++ implementation by Eric G. Suchanek. \n
   5
   6The module provides the implmentation and interface for the [DisulfideList](#DisulfideList)
   7object, used extensively by Disulfide class.
   8
   9Author: Eric G. Suchanek, PhD
  10Last revision: 2025-01-15 00:09:24 -egs-
  11"""
  12
  13# pylint: disable=c0103
  14# pylint: disable=c0301
  15# pylint: disable=c0302
  16# pylint: disable=c0415
  17# pylint: disable=w0212
  18
  19# Cα N, Cα, Cβ, C', Sγ Å ° ρ
  20
  21try:
  22    # Check if running in Jupyter
  23    shell = get_ipython().__class__.__name__
  24    if shell == "ZMQInteractiveShell":
  25        from tqdm.notebook import tqdm
  26    else:
  27        from tqdm import tqdm
  28except NameError:
  29    from tqdm import tqdm
  30
  31import copy
  32import logging
  33import os
  34from collections import UserList
  35from itertools import combinations
  36from pathlib import Path
  37
  38import numpy as np
  39import pandas as pd
  40import plotly.express as px
  41import plotly.graph_objects as go
  42import pyvista as pv
  43from plotly.subplots import make_subplots
  44
  45import proteusPy
  46from proteusPy import Disulfide
  47from proteusPy.atoms import BOND_RADIUS
  48from proteusPy.logger_config import create_logger
  49from proteusPy.ProteusGlobals import (
  50    MODEL_DIR,
  51    PBAR_COLS,
  52    PDB_DIR,
  53    WINSIZE,
  54    Torsion_DF_Cols,
  55)
  56from proteusPy.utility import (
  57    calculate_fontsize,
  58    get_jet_colormap,
  59    grid_dimensions,
  60    set_plotly_theme,
  61    set_pyvista_theme,
  62)
  63
  64# pio.renderers.default = "png"  # or 'svg'
  65
  66_logger = create_logger(__name__)
  67
  68# Set the figure sizes and axis limits.
  69DPI = 220
  70WIDTH = 6.0
  71HEIGHT = 6.0
  72TORMIN = -179.9
  73TORMAX = 180.0
  74
  75NBINS = 380
  76
  77
  78Distance_DF_Cols = [
  79    "source",
  80    "ss_id",
  81    "proximal",
  82    "distal",
  83    "energy",
  84    "ca_distance",
  85    "cb_distance",
  86    "sg_distance",
  87]
  88
  89
  90class DisulfideList(UserList):
  91    """
  92    The class provides a sortable list for Disulfide objects.
  93    Indexing and slicing are supported, as well as typical list operations like
  94    ``.insert()``, ``.append()`` and ``.extend().`` The DisulfideList object must be initialized
  95    with an iterable (tuple, list) and a name. Sorting is keyed by torsional energy.
  96
  97    The class can also render Disulfides to a pyVista window using the
  98    [display()](#DisulfideList.display) and [display_overlay()](#DisulfideList.display_overlay)methods.
  99    See below for examples.\n
 100
 101    Examples:
 102    >>> from proteusPy import Disulfide, DisulfideLoader, DisulfideList, Load_PDB_SS
 103
 104    Instantiate some variables. Note: the list is initialifzed with an iterable and a name (optional)
 105
 106    >>> SS = Disulfide('tmp')
 107
 108    The list is initialized with an iterable, a name and resolution. Name and resolution
 109    are optional.
 110    >>> SSlist = DisulfideList([],'ss', -1.0)
 111
 112    Load the database.
 113    >>> PDB_SS = Load_PDB_SS(verbose=False, subset=True)
 114
 115    Get the first disulfide via indexing.
 116    >>> SS = PDB_SS[0]
 117
 118    # assert str(SS) == "<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å>"
 119
 120    >>> SS4yys = PDB_SS['4yys']
 121
 122    # assert str(SS4yys) == "[<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å>, <Disulfide 4yys_56A_98A, Source: 4yys, Resolution: 1.35 Å>, <Disulfide 4yys_156A_207A, Source: 4yys, Resolution: 1.35 Å>]"
 123
 124    Make some empty disulfides.
 125    >>> ss1 = Disulfide('ss1')
 126    >>> ss2 = Disulfide('ss2')
 127
 128    Make a DisulfideList containing ss1, named 'tmp'
 129    >>> sslist = DisulfideList([ss1], 'tmp')
 130    >>> sslist.append(ss2)
 131
 132    Extract the first disulfide
 133    >>> ss1 = PDB_SS[0]
 134
 135    # assert str(ss1.pprint_all()) == "<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å\n Proximal Chain fullID: <('4yys', 0, 'A', (' ', 22, ' '))> Distal Chain fullID: <('4yys', 0, 'A', (' ', 65, ' '))>\nProximal Coordinates:\n   N: <Vector -2.36, -20.48, 5.21>\n   Cα: <Vector -2.10, -19.89, 3.90>\n   C: <Vector -1.12, -18.78, 4.12>\n   O: <Vector -1.30, -17.96, 5.03>\n   Cβ: <Vector -3.38, -19.31, 3.32>\n   Sγ: <Vector -3.24, -18.40, 1.76>\n   Cprev <Vector -2.67, -21.75, 5.36>\n   Nnext: <Vector -0.02, -18.76, 3.36>\n Distal Coordinates:\n   N: <Vector -0.60, -18.71, -1.62>\n   Cα: <Vector -0.48, -19.10, -0.22>\n   C: <Vector 0.92, -19.52, 0.18>\n   O: <Vector 1.10, -20.09, 1.25>\n   Cβ: <Vector -1.48, -20.23, 0.08>\n   Sγ: <Vector -3.22, -19.69, 0.18>\n   Cprev <Vector -0.73, -17.44, -2.01>\n   Nnext: <Vector 1.92, -19.18, -0.63>\n<BLANKLINE>\n Proximal Internal Coords:\n   N: <Vector -0.41, 1.40, -0.00>\n   Cα: <Vector 0.00, 0.00, 0.00>\n   C: <Vector 1.50, 0.00, 0.00>\n   O: <Vector 2.12, 0.71, -0.80>\n   Cβ: <Vector -0.50, -0.70, -1.25>\n   Sγ: <Vector 0.04, -2.41, -1.50>\n   Cprev <Vector -2.67, -21.75, 5.36>\n   Nnext: <Vector -0.02, -18.76, 3.36>\nDistal Internal Coords:\n   N: <Vector 1.04, -5.63, 1.17>\n   Cα: <Vector 1.04, -4.18, 1.31>\n   C: <Vector 1.72, -3.68, 2.57>\n   O: <Vector 1.57, -2.51, 2.92>\n   Cβ: <Vector -0.41, -3.66, 1.24>\n   Sγ: <Vector -1.14, -3.69, -0.43>\n   Cprev <Vector -0.73, -17.44, -2.01>\n   Nnext: <Vector 1.92, -19.18, -0.63>\n Χ1-Χ5: 174.63°, 82.52°, -83.32°, -62.52° -73.83°, 138.89°, 1.70 kcal/mol\n Cα Distance: 4.50 Å\n Torsion length: 231.53 deg>"
 136
 137    Get a list of disulfides via slicing
 138    >>> subset = DisulfideList(PDB_SS[0:10],'subset')
 139
 140    Display the subset disulfides overlaid onto the same coordinate frame,
 141    (proximal N, Ca, C').
 142
 143    The disulfides are colored individually to facilitate inspection.
 144
 145    >>> subset.display_overlay()
 146    """
 147
 148    def __init__(self, iterable, pid: str = "nil", res=-1.0, quiet=True, fast=False):
 149        """
 150        Initialize the DisulfideList.
 151
 152        :param iterable: An iterable of disulfide bonds.
 153        :type iterable: iterable
 154        :param pid: Name for the list, default is "nil".
 155        :type pid: str
 156        :param res: Resolution, default is -1.0. If -1, the average resolution is used.
 157        :type res: float
 158        :param quiet: If True, suppress output, default is True.
 159        :type quiet: bool
 160        :param fast: If True, enable fast mode, default is False.
 161        :type fast: bool
 162
 163        Example:
 164        >>> from proteusPy import DisulfideList, Disulfide
 165
 166        Initialize some empty disulfides.
 167        >>> ss1 = Disulfide('ss1')
 168        >>> ss2 = Disulfide('ss2')
 169        >>> ss3 = Disulfide('ss3')
 170
 171        Make a list containing the disulfides.
 172        >>> sslist = DisulfideList([ss1, ss2], 'sslist')
 173        >>> sslist
 174        [<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>]
 175        >>> sslist.append(ss3)
 176        >>> sslist
 177        [<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss3, Source: 1egs, Resolution: -1.0 Å>]
 178        """
 179
 180        super().__init__(self.validate_ss(item) for item in iterable)
 181
 182        self.pdb_id = pid
 183        self.quiet = quiet
 184
 185        if not fast:
 186            if res == -1:
 187                self._res = self.average_resolution
 188            else:
 189                self._res = res
 190        else:
 191            self._res = res
 192
 193    def __getitem__(self, item):
 194        """
 195        Retrieve a disulfide from the list. Internal only.
 196
 197        :param item: Index or slice
 198        :return: Sublist
 199        """
 200        if isinstance(item, slice):
 201            indices = range(*item.indices(len(self.data)))
 202            ind_list = list(indices)
 203            first_ind = ind_list[0]
 204            last_ind = ind_list[-1]
 205            name = (
 206                self.data[first_ind].pdb_id
 207                + f"_slice[{first_ind}:{last_ind+1}]_{self.data[last_ind].pdb_id}"
 208            )
 209            sublist = [self.data[i] for i in indices]
 210            return DisulfideList(sublist, name)
 211        return UserList.__getitem__(self, item)
 212
 213    def __setitem__(self, index, item):
 214        self.data[index] = self.validate_ss(item)
 215
 216    # Rendering engine calculates and instantiates all bond
 217    # cylinders and atomic sphere meshes. Called by all high level routines
 218
 219    def _render(self, pl, style, res=100, panelsize=WINSIZE) -> pv.Plotter:
 220        """
 221        Display a window showing the list of disulfides in the given style.
 222        :param style: one of 'cpk', 'bs', 'sb', 'plain', 'cov', 'pd'
 223        :return: Window in the relevant style
 224        """
 225        ssList = self.data
 226        tot_ss = len(ssList)  # number off ssbonds
 227        rows, cols = grid_dimensions(tot_ss)
 228        res = 100
 229
 230        if tot_ss > 30:
 231            res = 60
 232        if tot_ss > 60:
 233            res = 30
 234        if tot_ss > 90:
 235            res = 12
 236
 237        total_plots = rows * cols
 238        for idx in range(min(tot_ss, total_plots)):
 239            if not self.quiet:
 240                if idx % 5 == 0:
 241                    _logger.info("Rendering %d of %d bonds.", idx + 1, tot_ss)
 242
 243            r = idx // cols
 244            c = idx % cols
 245            pl.subplot(r, c)
 246
 247            ss = ssList[idx]
 248            src = ss.pdb_id
 249            enrg = ss.energy
 250            title = f"{src} {ss.proximal}{ss.proximal_chain}-{ss.distal}{ss.distal_chain}: E: {enrg:.2f}, Cα: {ss.ca_distance:.2f} Å, Tors: {ss.torsion_length:.2f}°"
 251            fontsize = calculate_fontsize(title, panelsize)
 252            pl.add_title(title=title, font_size=fontsize)
 253            ss._render(
 254                pl,
 255                style=style,
 256                res=res,
 257            )
 258
 259        return pl
 260
 261    @property
 262    def average_ca_distance(self):
 263        """
 264        Return the Average energy (kcal/mol) for the Disulfides in the list.
 265
 266        :return: Average energy (kcal/mol) between all atoms in the list
 267        """
 268        sslist = self.data
 269        tot = len(sslist)
 270        if tot == 0:
 271            return 0.0
 272
 273        total_dist = sum(ss.ca_distance for ss in sslist)
 274        return total_dist / tot
 275
 276    @property
 277    def average_distance(self):
 278        """
 279        Return the Average distance (Å) between the atoms in the list.
 280
 281        :return: Average distance (Å) between all atoms in the list
 282
 283        """
 284        sslist = self.data
 285        cnt = 1
 286
 287        total = 0.0
 288        for ss1 in sslist:
 289            for ss2 in sslist:
 290                if ss2 == ss1:
 291                    continue
 292                total += ss1.Distance_RMS(ss2)
 293                cnt += 1
 294
 295        return total / cnt
 296
 297    @property
 298    def average_energy(self):
 299        """
 300        Return the Average energy (kcal/mol) for the Disulfides in the list.
 301
 302        :return: Average energy (kcal/mol) between all atoms in the list
 303        """
 304        sslist = self.data
 305        tot = len(sslist)
 306        if tot == 0:
 307            return 0.0
 308
 309        total_energy = sum(ss.energy for ss in sslist)
 310        return total_energy / tot
 311
 312    @property
 313    def average_conformation(self):
 314        """
 315        Return the average conformation for the disulfides in the list.
 316
 317        :return: Average conformation: [x1, x2, x3, x4, x5]
 318        """
 319        sslist = self.data
 320        res = np.mean([ss.torsion_array for ss in sslist], axis=0)
 321        return res
 322
 323    def append(self, item):
 324        """
 325        Append the list with item
 326
 327        :param item: Disulfide to add
 328        :type item: Disulfide
 329        """
 330        self.data.append(self.validate_ss(item))
 331
 332    @property
 333    def average_resolution(self) -> float:
 334        """
 335        Compute and return the average structure resolution for the given list.
 336
 337        :return: Average resolution (A)
 338        """
 339        resolutions = [ss.resolution for ss in self.data if ss.resolution != -1.0]
 340        return sum(resolutions) / len(resolutions) if resolutions else -1.0
 341
 342    @property
 343    def resolution(self) -> float:
 344        """
 345        Compute and return the average structure resolution for the given list.
 346
 347        :return: Average resolution (A)
 348        """
 349        return self._res
 350
 351    @resolution.setter
 352    def resolution(self, value: float):
 353        """
 354        Set the average structure resolution for the given list.
 355
 356        :param value: The new resolution value to set.
 357        :type value: float
 358        """
 359        if not isinstance(value, float):
 360            raise TypeError("Resolution must be a float.")
 361        self._res = value
 362
 363    @property
 364    def average_torsion_distance(self):
 365        """
 366        Return the average distance in torsion space (degrees), between all pairs in the
 367        DisulfideList
 368
 369        :return: Torsion Distance (degrees)
 370        """
 371        sslist = self.data
 372        total = 0
 373        cnt = 0
 374
 375        for ss1, ss2 in combinations(sslist, 2):
 376            total += ss1.torsion_distance(ss2)
 377            cnt += 1
 378
 379        return float(total / cnt) if cnt > 0 else 0
 380
 381    def build_distance_df(self) -> pd.DataFrame:
 382        """
 383        Create a dataframe containing the input DisulfideList Cα-Cα and Sg-Sg distances, energy.
 384        This can take several minutes for the entire database.
 385
 386        :return: DataFrame containing Ca distances
 387        :rtype: pd.DataFrame
 388        """
 389        # create a list to collect rows as dictionaries
 390        rows = []
 391        i = 0
 392        sslist = self.data
 393        total_length = len(sslist)
 394        update_interval = max(1, total_length // 20)  # 5% of the list length
 395
 396        if self.quiet:
 397            pbar = sslist
 398        else:
 399            pbar = tqdm(sslist, ncols=PBAR_COLS, leave=False)
 400
 401        for ss in pbar:
 402            new_row = {
 403                "source": ss.pdb_id,
 404                "ss_id": ss.name,
 405                "proximal": ss.proximal,
 406                "distal": ss.distal,
 407                "energy": ss.energy,
 408                "ca_distance": ss.ca_distance,
 409                "cb_distance": ss.cb_distance,
 410                "sg_distance": ss.sg_distance,
 411            }
 412            rows.append(new_row)
 413            i += 1
 414
 415            if not self.quiet:
 416                if i % update_interval == 0 or i == total_length - 1:
 417                    pbar.update(update_interval)
 418
 419        # create the dataframe from the list of dictionaries
 420        SS_df = pd.DataFrame(rows, columns=Distance_DF_Cols)
 421
 422        return SS_df
 423
 424    def build_torsion_df(self) -> pd.DataFrame:
 425        """
 426        Create a dataframe containing the input DisulfideList torsional parameters,
 427        Cα-Cα and Sg-Sg distances, energy, and phi-psi angles. This can take several minutes for the
 428        entire database.
 429
 430        :return: pd.DataFrame containing the torsions
 431        """
 432        # create a list to collect rows as dictionaries
 433        rows = []
 434        i = 0
 435        total_length = len(self.data)
 436        update_interval = max(1, total_length // 20)  # 5% of the list length
 437
 438        sslist = self.data
 439        if self.quiet:
 440            pbar = sslist
 441        else:
 442            pbar = tqdm(sslist, ncols=PBAR_COLS, leave=False)
 443
 444        for ss in pbar:
 445            new_row = {
 446                "source": ss.pdb_id,
 447                "ss_id": ss.name,
 448                "proximal": ss.proximal,
 449                "distal": ss.distal,
 450                "chi1": ss.chi1,
 451                "chi2": ss.chi2,
 452                "chi3": ss.chi3,
 453                "chi4": ss.chi4,
 454                "chi5": ss.chi5,
 455                "energy": ss.energy,
 456                "ca_distance": ss.ca_distance,
 457                "cb_distance": ss.cb_distance,
 458                "sg_distance": ss.sg_distance,
 459                "psi_prox": ss.psiprox,
 460                "phi_prox": ss.phiprox,
 461                "phi_dist": ss.phidist,
 462                "psi_dist": ss.psidist,
 463                "torsion_length": ss.torsion_length,
 464                "rho": ss.rho,
 465                "binary_class_string": ss.binary_class_string,
 466                "octant_class_string": ss.octant_class_string,
 467            }
 468            rows.append(new_row)
 469            i += 1
 470
 471            if not self.quiet:
 472                if i % update_interval == 0 or i == total_length - 1:
 473                    pbar.update(update_interval)
 474
 475        if not self.quiet:
 476            pbar.close()
 477
 478        # create the dataframe from the list of dictionaries
 479        SS_df = pd.DataFrame(rows, columns=Torsion_DF_Cols)
 480
 481        return SS_df
 482
 483    def by_chain(self, chain: str):
 484        """
 485        Return a DisulfideList from the input chain identifier.
 486
 487        :param chain: chain identifier, 'A', 'B, etc
 488        :return: DisulfideList containing disulfides within that chain.
 489        """
 490
 491        reslist = DisulfideList([], chain)
 492        sslist = self.data
 493
 494        for ss in sslist:
 495            pchain = ss.proximal_chain
 496            dchain = ss.distal_chain
 497            if pchain == dchain:
 498                if pchain == chain:
 499                    reslist.append(ss)
 500            else:
 501                print(f"Cross chain SS: {ss.repr_compact}:")
 502        return reslist
 503
 504    @property
 505    def center_of_mass(self):
 506        """
 507        Calculate the center of mass for the Disulfide list
 508        """
 509        sslist = self.data
 510        tot = len(sslist)
 511        if tot == 0:
 512            return 0.0
 513
 514        total_cofmass = sum(ss.cofmass for ss in sslist)
 515        return total_cofmass / tot
 516
 517    def describe(self):
 518        """
 519        Prints out relevant attributes of the given disulfideList.
 520
 521        :param disulfideList: A list of disulfide objects.
 522        :param list_name: The name of the list.
 523        """
 524        name = self.pdb_id
 525        avg_distance = self.average_ca_distance
 526        avg_energy = self.average_energy
 527        avg_resolution = self.average_resolution
 528        list_length = len(self.data)
 529
 530        if list_length == 0:
 531            avg_bondangle = 0
 532            avg_bondlength = 0
 533        else:
 534            total_bondangle = 0
 535            total_bondlength = 0
 536
 537            for ss in self.data:
 538                total_bondangle += ss.bond_angle_ideality
 539                total_bondlength += ss.bond_length_ideality
 540
 541            avg_bondangle = total_bondangle / list_length
 542            avg_bondlength = total_bondlength / list_length
 543
 544        print(f"DisulfideList: {name}")
 545        print(f"Length: {list_length}")
 546        print(f"Average energy: {avg_energy:.2f} kcal/mol")
 547        print(f"Average CA distance: {avg_distance:.2f} Å")
 548        print(f"Average Resolution: {avg_resolution:.2f} Å")
 549        print(f"Bond angle deviation: {avg_bondangle:.2f}°")
 550        print(f"Bond length deviation: {avg_bondlength:.2f} Å")
 551
 552    def display(self, style="sb", light="auto", panelsize=512):
 553        """
 554        Display the Disulfide list in the specific rendering style.
 555
 556        :param single: Display the bond in a single panel in the specific style.
 557        :param style:  Rendering style: One of:\n
 558            - 'sb' - split bonds
 559            - 'bs' - ball and stick
 560            - 'cpk' - CPK style
 561            - 'pd' - Proximal/Distal style - Red=proximal, Green=Distal
 562            - 'plain' - boring single color
 563        :light: If True, light background, if False, dark
 564        """
 565        # from proteusPy.utility import get_theme
 566
 567        ssbonds = self.data
 568        tot_ss = len(ssbonds)  # number off ssbonds
 569        rows, cols = grid_dimensions(tot_ss)
 570        winsize = (panelsize * cols, panelsize * rows)
 571
 572        set_pyvista_theme(light)
 573
 574        # title = f"<{pid}> {resolution:.2f} Å: ({tot_ss} SS), Avg E: {avg_enrg:.2f} kcal/mol, Avg Dist: {avg_dist:.2f} Å"
 575
 576        pl = pv.Plotter(window_size=winsize, shape=(rows, cols))
 577        pl = self._render(pl, style, panelsize=panelsize)
 578        pl.enable_anti_aliasing("msaa")
 579
 580        # the subwindows already show a title
 581        # pl.add_title(title=title, font_size=fontsize)
 582
 583        pl.link_views()
 584        pl.reset_camera()
 585        pl.show()
 586
 587    def display_torsion_statistics(
 588        self,
 589        display=True,
 590        save=False,
 591        fname="ss_torsions.png",
 592        theme="auto",
 593    ):
 594        """
 595        Display torsion and distance statistics for a given Disulfide list.
 596
 597        :param display: Whether to display the plot in the notebook. Default is True.
 598        :type display: bool
 599        :param save: Whether to save the plot as an image file. Default is False.
 600        :type save: bool
 601        :param fname: The name of the image file to save. Default is 'ss_torsions.png'.
 602        :type fname: str
 603        :param theme: The theme to use for the plot. Default is 'Auto'. Options are 'Auto', 'light', and 'dark'.
 604        :type theme: str
 605        :return: none
 606        """
 607
 608        if self.length == 0:
 609            _logger.warning("Empty DisulfideList. Nothing to display.")
 610            return
 611
 612        set_plotly_theme(theme)
 613        title = f"{self.id}: {self.length} members"
 614
 615        tor_vals, dist_vals = self.calculate_torsion_statistics()
 616
 617        tor_mean_vals = tor_vals.loc["mean"]
 618        tor_std_vals = tor_vals.loc["std"]
 619
 620        dist_mean_vals = dist_vals.loc["mean"]
 621        dist_std_vals = dist_vals.loc["std"]
 622
 623        fig = make_subplots(
 624            rows=2, cols=2, vertical_spacing=0.125, column_widths=[1, 1]
 625        )
 626
 627        fig.update_layout(
 628            title={
 629                "text": title,
 630                "xanchor": "center",
 631                # 'y':.9,
 632                "x": 0.5,
 633                "yanchor": "top",
 634            },
 635            width=1024,
 636            height=1024,
 637        )
 638
 639        fig.add_trace(
 640            go.Bar(
 641                x=["X1", "X2", "X3", "X4", "X5"],
 642                y=tor_mean_vals[:5],
 643                name="Torsion Angle (°) ",
 644                error_y=dict(type="data", array=tor_std_vals, visible=True),
 645            ),
 646            row=1,
 647            col=1,
 648        )
 649
 650        fig.add_trace(
 651            go.Bar(
 652                x=["rho"],
 653                y=[dist_mean_vals[4]],
 654                name="ρ (°)",
 655                error_y=dict(type="data", array=[dist_std_vals[4]], visible=True),
 656            ),
 657            row=1,
 658            col=1,
 659        )
 660
 661        # Update the layout of the subplot
 662        # Cα N, Cα, Cβ, C', Sγ Å °
 663
 664        fig.update_yaxes(
 665            title_text="Dihedral Angle (°)", range=[-200, 200], row=1, col=1
 666        )
 667        fig.update_yaxes(range=[0, 320], row=2, col=2)
 668
 669        # Add another subplot for the mean values of energy
 670        fig.add_trace(
 671            go.Bar(
 672                x=["Strain Energy (kcal/mol)"],
 673                y=[dist_mean_vals[3]],
 674                name="Energy (kcal/mol)",
 675                error_y=dict(
 676                    type="data",
 677                    array=[dist_std_vals[3].tolist()],
 678                    width=0.25,
 679                    visible=True,
 680                ),
 681            ),
 682            row=1,
 683            col=2,
 684        )
 685        fig.update_traces(width=0.25, row=1, col=2)
 686
 687        # Update the layout of the subplot
 688        # fig.update_xaxes(title_text="Energy", row=1, col=2)
 689        fig.update_yaxes(
 690            title_text="kcal/mol", range=[0, 8], row=1, col=2
 691        )  # max possible DSE
 692
 693        # Add another subplot for the mean values of ca_distance
 694        fig.add_trace(
 695            go.Bar(
 696                x=["Cα Distance (Å)", "Cβ Distance (Å)", "Sγ Distance (Å)"],
 697                y=[dist_mean_vals[0], dist_mean_vals[1], dist_mean_vals[2]],
 698                name="Distances (Å)",
 699                error_y=dict(
 700                    type="data",
 701                    array=[
 702                        dist_std_vals[0].tolist(),
 703                        dist_std_vals[1].tolist(),
 704                        dist_std_vals[2].tolist(),
 705                    ],
 706                    width=0.25,
 707                    visible=True,
 708                ),
 709            ),
 710            row=2,
 711            col=1,
 712        )
 713        # Update the layout of the subplot
 714        fig.update_yaxes(title_text="Distance (A)", range=[0, 8], row=2, col=1)  #
 715        fig.update_traces(width=0.25, row=2, col=1)
 716
 717        # Add a scatter subplot for torsion length column
 718        fig.add_trace(
 719            go.Bar(
 720                x=["Torsion Length (Å)"],
 721                y=[tor_mean_vals[5]],
 722                name="Torsion Length (Å)",
 723                error_y=dict(
 724                    type="data", array=[tor_std_vals[5]], width=0.25, visible=True
 725                ),
 726            ),
 727            row=2,
 728            col=2,
 729        )
 730        # Update the layout of the subplot
 731        fig.update_yaxes(title_text="Torsion Length", range=[0, 350], row=2, col=2)
 732        fig.update_traces(width=0.25, row=2, col=2)
 733
 734        # Update the error bars
 735        fig.update_traces(
 736            error_y_thickness=2,
 737            error_y_color="gray",
 738            texttemplate="%{y:.2f} ± %{error_y.array:.2f}",
 739            textposition="outside",
 740        )  # , row=1, col=1)
 741
 742        if display:
 743            fig.show()
 744
 745        if save:
 746            fig.write_image(Path(fname))
 747
 748        return
 749
 750    @property
 751    def distance_df(self) -> pd.DataFrame:
 752        """
 753        Build and return the distance dataframe for the input list.
 754        This can take considerable time for the entire list.
 755
 756        :return: Dataframe containing the Cα-Cα distances for the given list.
 757
 758        Example:
 759        >>> from proteusPy import Disulfide, Load_PDB_SS, DisulfideList
 760        >>> PDB_SS = Load_PDB_SS()
 761
 762        """
 763        return self.build_distance_df()
 764
 765    def display_overlay(
 766        self,
 767        screenshot=False,
 768        movie=False,
 769        verbose=False,
 770        fname="ss_overlay.png",
 771        light="auto",
 772        winsize=WINSIZE,
 773    ):
 774        """
 775        Display all disulfides in the list overlaid in stick mode against
 776        a common coordinate frames. This allows us to see all of the disulfides
 777        at one time in a single view. Colors vary smoothy between bonds.
 778
 779        :param screenshot: Save a screenshot, defaults to False
 780        :param movie: Save a movie, defaults to False
 781        :param verbose: Verbosity, defaults to True
 782        :param fname: Filename to save for the movie or screenshot, defaults to 'ss_overlay.png'
 783        :param light: Background color, defaults to True for White. False for Dark.
 784        """
 785
 786        # from proteusPy.utility import get_theme
 787
 788        pid = self.pdb_id
 789
 790        ssbonds = self.data
 791        tot_ss = len(ssbonds)  # number off ssbonds
 792        avg_enrg = self.average_energy
 793        avg_dist = self.average_distance
 794        resolution = self.average_resolution
 795
 796        res = 64
 797
 798        if tot_ss > 30:
 799            res = 48
 800        if tot_ss > 60:
 801            res = 16
 802        if tot_ss > 90:
 803            res = 8
 804
 805        title = f"<{pid}> {resolution:.2f} Å: ({tot_ss} SS), E: {avg_enrg:.2f} kcal/mol, Dist: {avg_dist:.2f} Å"
 806        fontsize = calculate_fontsize(title, winsize[0])
 807
 808        set_pyvista_theme(light)
 809
 810        if movie:
 811            pl = pv.Plotter(window_size=winsize, off_screen=True)
 812        else:
 813            pl = pv.Plotter(window_size=winsize, off_screen=False)
 814
 815        pl.add_title(title=title, font_size=fontsize)
 816        pl.enable_anti_aliasing("msaa")
 817        pl.add_axes()
 818
 819        mycol = np.zeros(shape=(tot_ss, 3))
 820        mycol = get_jet_colormap(tot_ss)
 821
 822        # scale the overlay bond radii down so that we can see the individual elements better
 823        # maximum 90% reduction
 824
 825        brad = BOND_RADIUS if tot_ss < 10 else BOND_RADIUS * 0.75
 826        brad = brad if tot_ss < 25 else brad * 0.8
 827        brad = brad if tot_ss < 50 else brad * 0.8
 828        brad = brad if tot_ss < 100 else brad * 0.6
 829
 830        # print(f'Brad: {brad}')
 831        if verbose:
 832            pbar = tqdm(range(tot_ss), ncols=PBAR_COLS)
 833        else:
 834            pbar = range(tot_ss)
 835
 836        for i, ss in zip(pbar, ssbonds):
 837            color = [int(mycol[i][0]), int(mycol[i][1]), int(mycol[i][2])]
 838            ss._render(
 839                pl,
 840                style="plain",
 841                bondcolor=color,
 842                translate=False,
 843                bond_radius=brad,
 844                res=res,
 845            )
 846
 847        pl.reset_camera()
 848
 849        if screenshot:
 850            pl.show(auto_close=False)  # allows for manipulation
 851            # Take the screenshot after ensuring the plotter is still active
 852            try:
 853                pl.screenshot(fname)
 854                if verbose:
 855                    print(f" -> display_overlay(): Saved image to: {fname}")
 856            except RuntimeError as e:
 857                _logger.error("Error saving screenshot: %s", e)
 858
 859        elif movie:
 860            if verbose:
 861                print(f" -> display_overlay(): Saving mp4 animation to: {fname}")
 862
 863            pl.open_movie(fname)
 864            path = pl.generate_orbital_path(n_points=360)
 865            pl.orbit_on_path(path, write_frames=True)
 866            pl.close()
 867
 868            if verbose:
 869                print(f" -> display_overlay(): Saved mp4 animation to: {fname}")
 870        else:
 871            pl.show()
 872
 873        return
 874
 875    def extend(self, other):
 876        """
 877        Extend the Disulfide list with other.
 878
 879        :param other: extension
 880        :type item: DisulfideList
 881        """
 882
 883        if isinstance(other, type(self)):
 884            self.data.extend(other)
 885        else:
 886            self.data.extend(self.validate_ss(item) for item in other)
 887
 888    def filter_by_distance(self, distance: float = -1.0, minimum: float = 2.0):
 889        """
 890        Return a DisulfideList filtered by to between the maxium Ca distance and
 891        the minimum, which defaults to 2.0A.
 892
 893        :param distance: Distance in Å
 894        :param minimum: Distance in Å
 895        :return: DisulfideList containing disulfides with the given distance.
 896        """
 897
 898        reslist = []
 899        sslist = self.data
 900
 901        # if distance is -1.0, return the entire list
 902        if distance == -1.0:
 903            return sslist.copy()
 904
 905        reslist = [
 906            ss
 907            for ss in sslist
 908            if ss.ca_distance < distance and ss.ca_distance > minimum
 909        ]
 910
 911        return DisulfideList(reslist, f"filtered by distance < {distance:.2f}")
 912
 913    def filter_by_sg_distance(self, distance: float = -1.0, minimum: float = 1.0):
 914        """
 915        Return a DisulfideList filtered by to between the maxium Sg distance and
 916        the minimum, which defaults to 1.0A.
 917
 918        :param distance: Distance in Å
 919        :param minimum: Distance in Å
 920        :return: DisulfideList containing disulfides with the given distance.
 921        """
 922
 923        reslist = []
 924        sslist = self.data
 925
 926        # if distance is -1.0, return the entire list
 927        if distance == -1.0:
 928            return sslist.copy()
 929
 930        reslist = [
 931            ss
 932            for ss in sslist
 933            if ss.sg_distance < distance and ss.sg_distance > minimum
 934        ]
 935
 936        return DisulfideList(reslist, f"filtered by Sγ distance < {distance:.2f}")
 937
 938    def filter_by_bond_ideality(self, angle: float = -1.0):
 939        """
 940        Return a DisulfideList filtered by bond angle ideality between the maxium angle
 941        and the minimum, which defaults to 0.0°.
 942
 943        :param angle: Angle in degrees
 944        :param minimum: Angle in degrees
 945        :return: DisulfideList containing disulfides with the given angle.
 946        """
 947
 948        reslist = []
 949        sslist = self.data
 950
 951        # if angle is -1.0, return the entire list
 952        if angle == -1.0:
 953            return sslist.copy()
 954
 955        reslist = [ss for ss in sslist if ss.bond_angle_ideality < angle]
 956
 957        return DisulfideList(reslist, f"filtered by bond angle < {angle:.2f}")
 958
 959    def get_by_name(self, name):
 960        """
 961        Returns the Disulfide with the given name from the list.
 962        """
 963        for ss in self.data:
 964            if ss.name == name:
 965                return ss.copy()  # or ss.copy() !!!
 966        return None
 967
 968    def get_chains(self):
 969        """
 970        Return the chain IDs for chains within the given Disulfide.
 971        :return: Chain IDs for given Disulfide
 972        """
 973
 974        res_dict = {"xxx"}
 975        sslist = self.data
 976
 977        for ss in sslist:
 978            pchain = ss.proximal_chain
 979            dchain = ss.distal_chain
 980            res_dict.update(pchain)
 981            res_dict.update(dchain)
 982
 983        res_dict.remove("xxx")
 984
 985        return res_dict
 986
 987    def get_torsion_array(self):
 988        """
 989        Return a 2D NumPy array representing the dihedral angles in the given disulfide list.
 990
 991        :return: A 2D NumPy array of shape (n, 5), where n is the number of disulfide bonds in the list. Each row
 992                of the array represents the dihedral angles of a disulfide bond, in the following order:
 993                [X1, X, X3, X4, X5], where i is the index of the disulfide bond in the list.
 994        """
 995        return np.array([ss.torsion_array for ss in self.data])
 996
 997    def has_chain(self, chain) -> bool:
 998        """
 999        Returns True if given chain contained in Disulfide, False otherwise.
1000        :return: Returns True if given chain contained in Disulfide, False otherwise.
1001        """
1002
1003        chns = {"xxx"}
1004        chns = self.get_chains()
1005        if chain in chns:
1006            return True
1007        else:
1008            return False
1009
1010    @property
1011    def id(self):
1012        """
1013        PDB ID of the list
1014        """
1015        return self.pdb_id
1016
1017    @id.setter
1018    def id(self, value):
1019        """
1020        Set the DisulfideList ID
1021
1022        Parameters
1023        ----------
1024        value : str
1025            List ID
1026        """
1027        self.pdb_id = value
1028
1029    def TorsionGraph(
1030        self, display=True, save=False, fname="ss_torsions.png", theme="Auto"
1031    ):
1032        """
1033        Generate and optionally display or save a torsion graph.
1034
1035        This method generates a torsion graph based on the torsion statistics
1036        of disulfide bonds. It can display the graph, save it to a file, or both.
1037
1038        :param display: If True, the torsion graph will be displayed. Default is True.
1039        :type display: bool
1040        :param save: If True, the torsion graph will be saved to a file. Default is False.
1041        :type save: bool
1042        :param fname: The filename to save the torsion graph. Default is "ss_torsions.png".
1043        :type fname: str
1044        :param theme: One of 'auto', 'light', or 'dark'. Default is 'auto'.
1045        :type theme: str
1046
1047        :return: None
1048        """
1049        # tor_stats, dist_stats = self.calculate_torsion_statistics()
1050        self.display_torsion_statistics(
1051            display=display, save=save, fname=fname, theme=theme
1052        )
1053
1054    def translate(self, translation_vector) -> None:
1055        """
1056        Translate the DisulfideList by the given translation vector.
1057        Note: The translation is a vector SUBTRACTION, not addition.
1058        This is used primarily to move a list to its geometric center of mass
1059        and is a destructive operation, in the sense that it updates the list in place.
1060
1061        :param translation_vector: The translation vector to apply.
1062        :type translation_vector: Vector3D
1063        """
1064        for ss in self.data:
1065            ss.translate(translation_vector)
1066
1067    def insert(self, index, item):
1068        """
1069        Insert a Disulfide into the list at the specified index
1070
1071        :param index: insertion point
1072        :type index: int
1073        :param item: Disulfide to insert
1074        :type item: Disulfide
1075        """
1076        self.data.insert(index, self.validate_ss(item))
1077
1078    @property
1079    def length(self):
1080        """Return the length of the list"""
1081        return len(self.data)
1082
1083    @property
1084    def min(self) -> Disulfide:
1085        """
1086        Return Disulfide from the list with the minimum energy
1087
1088        :return: Disulfide with the minimum energy.
1089        """
1090        sslist = sorted(self.data)
1091        return sslist[0]
1092
1093    @property
1094    def max(self) -> Disulfide:
1095        """
1096        Return Disulfide from the list with the maximum energy
1097
1098        :return: Disulfide with the maximum energy. This assumes that
1099        the comparison is based on the energy attribute.
1100        """
1101        sslist = sorted(self.data)
1102        return sslist[-1]
1103
1104    def minmax_distance(self):
1105        """
1106        Return the Disulfides with the minimum and
1107        maximum Cα distances in the list.
1108
1109        :return: SSmin, SSmax
1110        """
1111        sslist = self.data
1112
1113        if not sslist:
1114            return None, None
1115
1116        ssmin = min(sslist, key=lambda ss: ss.ca_distance)
1117        ssmax = max(sslist, key=lambda ss: ss.ca_distance)
1118
1119        return ssmin, ssmax
1120
1121    @property
1122    def minmax_energy(self):
1123        """
1124        Return the Disulfides with the minimum and maximum energies
1125        from the DisulfideList.
1126
1127        :return: Disulfides with minimum and maximum energies
1128        """
1129        sslist = self.data
1130
1131        if not sslist:
1132            return None, None
1133
1134        sslist = sorted(sslist, key=lambda ss: ss.energy)
1135        return sslist[0], sslist[-1]
1136
1137    def nearest_neighbors(self, cutoff: float, *args):
1138        """
1139        Return all Disulfides within the given angle cutoff of the input Disulfide.
1140
1141        :param cutoff: Distance cutoff, degrees
1142        :param args: Either 5 individual angles (chi1, chi2, chi3, chi4, chi5) or a list of 5 angles
1143        :return: DisulfideList of neighbors within the cutoff
1144        """
1145        if len(args) == 1 and isinstance(args[0], list) and len(args[0]) == 5:
1146            chi1, chi2, chi3, chi4, chi5 = args[0]
1147        elif len(args) == 5:
1148            chi1, chi2, chi3, chi4, chi5 = args
1149        else:
1150            raise ValueError(
1151                "You must provide either 5 individual angles or a list of 5 angles."
1152            )
1153
1154        sslist = self.data
1155        modelss = proteusPy.Disulfide("model", torsions=[chi1, chi2, chi3, chi4, chi5])
1156        res = modelss.torsion_neighbors(sslist, cutoff)
1157
1158        resname = f"Neighbors within {cutoff:.2f}° of [{', '.join(f'{angle:.2f}' for angle in modelss.dihedrals)}]"
1159        res.pdb_id = resname
1160
1161        return res
1162
1163    def nearest_neighbors_ss(self, ss, cutoff: float):
1164        """
1165        Return the list of Disulfides within the torsional cutoff
1166        of the input Disulfide.
1167
1168        :param ss: Disulfide to compare to
1169        :param cutoff: Distance cutoff, degrees
1170        :return: DisulfideList of neighbors
1171        """
1172
1173        sslist = self.data
1174        res = ss.torsion_neighbors(sslist, cutoff)
1175
1176        resname = f"{ss.name} neighbors within {cutoff}°"
1177        res.pdb_id = resname
1178
1179        return res
1180
1181    def pprint(self):
1182        """
1183        Pretty print self.
1184        """
1185        sslist = self.data
1186        for ss in sslist:
1187            ss.pprint()
1188
1189    def pprint_all(self):
1190        """
1191        Pretty print full disulfide descriptions in self.
1192        """
1193        sslist = self.data
1194        for ss in sslist:
1195            ss.pprint_all()
1196
1197    @property
1198    def torsion_df(self):
1199        """Return the Torsion DataFrame for the DisulfideList"""
1200        return self.build_torsion_df()
1201
1202    @property
1203    def torsion_array(self):
1204        """Return the Torsions as an Array"""
1205        return self.get_torsion_array()
1206
1207    def validate_ss(self, value):
1208        """Return the Disulfide object if it is a Disulfide, otherwise raise an error"""
1209        from proteusPy.Disulfide import Disulfide
1210
1211        if value is None:
1212            raise ValueError("The value cannot be None.")
1213
1214        if not isinstance(value, Disulfide):
1215            raise TypeError("The value must be an instance of Disulfide.")
1216        return value
1217
1218    def create_deviation_dataframe(self, verbose=False):
1219        """
1220        Create a DataFrame with columns PDB_ID, SS_Name, Angle_Deviation, Distance_Deviation,
1221        Ca Distance from a list of disulfides.
1222
1223        :param verbose: Whether to display a progress bar.
1224        :type verbose: bool
1225        :return: DataFrame containing the disulfide information.
1226        :rtype: pd.DataFrame
1227        """
1228        disulfide_list = self.data
1229        data = {
1230            "PDB_ID": [],
1231            "Resolution": [],
1232            "SS_Name": [],
1233            "Angle_Deviation": [],
1234            "Bondlength_Deviation": [],
1235            "Ca_Distance": [],
1236            "Sg_Distance": [],
1237        }
1238
1239        if verbose:
1240            pbar = tqdm(disulfide_list, desc="Processing...", leave=False)
1241        else:
1242            pbar = disulfide_list
1243
1244        for ss in pbar:
1245            data["PDB_ID"].append(ss.pdb_id)
1246            data["Resolution"].append(ss.resolution)
1247            data["SS_Name"].append(ss.name)
1248            data["Angle_Deviation"].append(ss.bond_angle_ideality)
1249            data["Bondlength_Deviation"].append(ss.bond_length_ideality)
1250            data["Ca_Distance"].append(ss.ca_distance)
1251            data["Sg_Distance"].append(ss.sg_distance)
1252
1253        df = pd.DataFrame(data)
1254        return df
1255
1256    def extract_distances(self, distance_type="sg", comparison="less", cutoff=-1):
1257        """
1258        Extract and filter the distance values from the disulfide list based on the specified type and comparison.
1259
1260        :param disulfide_list: List of disulfide objects.
1261        :param distance_type: Type of distance to extract ('sg' or 'ca').
1262        :param comparison: If 'less', return distances less than the cutoff value, otherwise return distances greater than or equal to the cutoff value.
1263        :param cutoff: Cutoff value for filtering distances.
1264        :return: List of filtered distance values.
1265        """
1266        disulfide_list = self.data
1267        distances = filtered_distances = []
1268
1269        match distance_type:
1270            case "sg":
1271                distances = [ds.sg_distance for ds in disulfide_list]
1272            case "ca":
1273                distances = [ds.ca_distance for ds in disulfide_list]
1274            case _:
1275                raise ValueError("Invalid distance_type. Must be 'sg' or 'ca'.")
1276
1277        if cutoff == -1.0:
1278            return distances
1279
1280        if comparison == "greater":
1281            filtered_distances = [d for d in distances if d > cutoff]
1282        else:
1283            filtered_distances = [d for d in distances if d <= cutoff]
1284
1285        return filtered_distances
1286
1287    @staticmethod
1288    def plot_distances(
1289        distances,
1290        distance_type="sg",
1291        cutoff=-1,
1292        comparison="less",
1293        theme="auto",
1294        log=True,
1295    ):
1296        """
1297        Plot the distance values as a histogram using plotly express.
1298
1299        :param distances: List of distance values.
1300        :param distance_type: Type of distance to plot ('sg' or 'ca').
1301        :param cutoff: Cutoff value for the x-axis title.
1302        :param flip: Whether to flip the comparison in the x-axis title.
1303        :param theme: The plotly theme to use. Default is 'auto', which will use the current system theme.
1304        :param log: Whether to use a logarithmic scale for the y-axis. Default is True.
1305        """
1306
1307        set_plotly_theme(theme)
1308
1309        yaxis_type = "log" if log else "linear"
1310        flip = False if comparison == "less" else True
1311
1312        match distance_type:
1313            case "sg":
1314                column_name = "SG Distance"
1315                title = "Sγ Distance Distribution"
1316                if cutoff == -1.0:
1317                    xtitle = "Sγ-Sγ Distances, (no cutoff)"
1318                else:
1319                    xtitle = (
1320                        f"Sγ Distance < {cutoff} Å"
1321                        if not flip
1322                        else f"Sγ-Sγ Distance >= {cutoff} Å"
1323                    )
1324            case "ca":
1325                column_name = "Ca Distance"
1326                title = "Cα Distance Distribution"
1327                if cutoff == -1.0:
1328                    xtitle = "Cα-Cα Distances, (no cutoff)"
1329                else:
1330                    xtitle = (
1331                        f"Cα Distance < {cutoff} Å"
1332                        if not flip
1333                        else f"Cα-Cα Distance >= {cutoff} Å"
1334                    )
1335            case _:
1336                raise ValueError("Invalid distance_type. Must be 'sg' or 'ca'.")
1337
1338        # Convert to a Pandas DataFrame with the appropriate column name
1339        df = pd.DataFrame(distances, columns=[column_name])
1340
1341        fig = px.histogram(
1342            df,
1343            x=column_name,  # Use the column name for the x-axis
1344            nbins=NBINS,
1345            title=title,
1346        )
1347        fig.update_layout(
1348            title={"text": "Distance Distribution", "x": 0.5, "xanchor": "center"},
1349            xaxis_title=xtitle,
1350            yaxis_title="Frequency",
1351            yaxis_type=yaxis_type,
1352            bargap=0.2,
1353        )
1354        fig.show()
1355
1356    def plot_deviation_scatterplots(self, verbose=False, theme="auto"):
1357        """
1358        Plot scatter plots for Bondlength_Deviation, Angle_Deviation, Ca_Distance, and Sg_Distance
1359        with the row index as the x-axis.
1360
1361        :param verbose: If True, display additional information during processing, defaults to False.
1362        :type verbose: bool
1363        :param theme: The theme to use for the plot ('auto', 'light', or 'dark'), defaults to 'auto'.
1364        :type theme: str
1365        """
1366        set_plotly_theme(theme)
1367        dotsize = 2
1368
1369        df = self.create_deviation_dataframe(verbose=verbose)
1370
1371        fig = px.scatter(
1372            df, x=df.index, y="Bondlength_Deviation", title="Bondlength Deviation"
1373        )
1374        fig.update_layout(xaxis_title="Row Index", yaxis_title="Bondlength Deviation")
1375        fig.update_traces(marker=dict(size=dotsize))  # Adjust the size as needed
1376        fig.show()
1377
1378        fig = px.scatter(df, x=df.index, y="Angle_Deviation", title="Angle Deviation")
1379        fig.update_layout(xaxis_title="Row Index", yaxis_title="Angle Deviation")
1380        fig.update_traces(marker=dict(size=dotsize))  # Adjust the size as needed
1381        fig.show()
1382
1383        fig = px.scatter(df, x=df.index, y="Ca_Distance", title="Cα Distance")
1384        fig.update_layout(xaxis_title="Row Index", yaxis_title="Cα Distance")
1385        fig.update_traces(marker=dict(size=dotsize))  # Adjust the size as needed
1386        fig.show()
1387
1388        fig = px.scatter(df, x=df.index, y="Sg_Distance", title="Sg Distance")
1389        fig.update_layout(xaxis_title="Row Index", yaxis_title="Sg Distance")
1390        fig.update_traces(marker=dict(size=dotsize))  # Adjust the size as needed
1391        fig.show()
1392
1393    def plot_deviation_histograms(self, verbose=False, theme="auto", log=True) -> None:
1394        """
1395        Plot histograms for Bondlength_Deviation, Angle_Deviation, and Ca_Distance.
1396
1397        This function creates and displays histograms for the bond length deviation,
1398        bond angle deviation from the disulfide list. The histograms
1399        are displayed on a logarithmic scale for the y-axis.
1400
1401        :param verbose: Whether to display a progress bar.
1402        :type verbose: bool
1403        :param theme: The plotly theme to use. Default is 'auto', which will use the current system theme.
1404        :param log: Whether to use a logarithmic scale for the y-axis. Default is True.
1405        """
1406
1407        set_plotly_theme(theme)
1408        if log:
1409            yaxis_type = "log"
1410        else:
1411            yaxis_type = "linear"
1412
1413        df = self.create_deviation_dataframe(verbose=verbose)
1414
1415        fig = px.histogram(
1416            df,
1417            x="Bondlength_Deviation",
1418            nbins=NBINS,
1419            title="Bond Length Deviation (Å)",
1420        )
1421
1422        fig.update_layout(
1423            title={"text": "Bond Length Deviation", "x": 0.5, "xanchor": "center"},
1424            xaxis_title="Bond Length Deviation (Å)",
1425            yaxis_title="Frequency",
1426            yaxis_type=yaxis_type,
1427        )
1428        fig.show()
1429
1430        fig2 = px.histogram(
1431            df, x="Angle_Deviation", nbins=NBINS, title="Bond Angle Deviation, (°)"
1432        )
1433        fig2.update_layout(
1434            title={"text": "Bond Angle Deviation", "x": 0.5, "xanchor": "center"},
1435            xaxis_title="Bond Angle Deviation (°)",
1436            yaxis_title="Frequency",
1437            yaxis_type=yaxis_type,
1438        )
1439
1440        fig2.show()
1441
1442        return
1443
1444    def filter_deviation_df_by_cutoffs(
1445        self,
1446        length_cutoff=10.0,
1447        angle_cutoff=100.0,
1448        ca_cutoff=1000.0,
1449        sg_cutoff=10.0,
1450        minimum_distance=0.0,
1451    ) -> pd.DataFrame:
1452        """
1453        Filter the DataFrame based on bond length, angle, Ca and Sg distance cutoffs.
1454
1455        Note: The default values are set to high values to allow all structures to pass the filter.
1456
1457        :param df: DataFrame containing the deviations.
1458        :type df: pd.DataFrame
1459        :param length_cutoff: Cutoff value for Bond Length Deviation.
1460        :type distance_cutoff: float
1461        :param angle_cutoff: Cutoff value for angle deviation.
1462        :type angle_cutoff: float
1463        :param ca_cutoff: Cutoff value for Ca distance.
1464        :type ca_cutoff: float
1465        :param sg_cutoff: Cutoff value for Sg distance.
1466        :type sg_cutoff: float
1467        :return: Filtered DataFrame.
1468        :rtype: pd.DataFrame
1469        """
1470        df = self.create_deviation_dataframe()
1471
1472        filtered_df = df[
1473            (df["Bondlength_Deviation"] <= length_cutoff)
1474            & (df["Angle_Deviation"] <= angle_cutoff)
1475            & (df["Ca_Distance"] >= minimum_distance)
1476            & (df["Ca_Distance"] <= ca_cutoff)
1477            & (df["Sg_Distance"] >= minimum_distance)
1478            & (df["Sg_Distance"] <= sg_cutoff)
1479        ]
1480        return filtered_df
1481
1482    def bad_filter_deviation_df_by_cutoffs(
1483        self,
1484        length_cutoff=0.0,
1485        angle_cutoff=0.0,
1486        ca_cutoff=0.0,
1487        sg_cutoff=0.0,
1488        minimum_distance=0.0,
1489    ) -> pd.DataFrame:
1490        """
1491        Return the DataFrame objects that are GREATER than the cutoff based on distance,
1492        angle, Ca and Sg distance cutoffs. Used to get the bad structures.
1493
1494        Note: The default values are set to low values to allow all structures to pass the filter.
1495
1496        :param df: DataFrame containing the deviations.
1497        :type df: pd.DataFrame
1498        :param length_cutoff: Cutoff value for Bond Length Deviation.
1499        :type length_cutoff: float
1500        :param angle_cutoff: Cutoff value for angle deviation.
1501        :type angle_cutoff: float
1502        :param ca_cutoff: Cutoff value for Ca distance.
1503        :type ca_cutoff: float
1504        :return: Filtered DataFrame.
1505        :rtype: pd.DataFrame
1506        """
1507        df = self.create_deviation_dataframe()
1508
1509        filtered_df = df[
1510            (df["Bondlength_Deviation"] > length_cutoff)
1511            & (df["Angle_Deviation"] > angle_cutoff)
1512            & (df["Ca_Distance"] > ca_cutoff)
1513            & (df["Ca_Distance"] < minimum_distance)
1514            & (df["Sg_Distance"] > sg_cutoff)
1515            & (df["Sg_Distance"] < minimum_distance)
1516        ]
1517        return filtered_df
1518
1519    def calculate_torsion_statistics(self) -> tuple:
1520        """
1521        Calculate and return the torsion and distance statistics for the DisulfideList.
1522
1523        This method builds a DataFrame containing torsional parameters, Cα-Cα distance,
1524        energy, and phi-psi angles for the DisulfideList. It then calculates the mean
1525        and standard deviation for the torsional and distance parameters.
1526
1527        :return: A tuple containing two DataFrames:
1528                - tor_stats: DataFrame with mean and standard deviation for torsional parameters.
1529                - dist_stats: DataFrame with mean and standard deviation for distance parameters.
1530        :rtype: tuple (pd.DataFrame, pd.DataFrame)
1531        """
1532
1533        df = self.torsion_df
1534
1535        tor_cols = ["chi1", "chi2", "chi3", "chi4", "chi5", "torsion_length"]
1536        dist_cols = ["ca_distance", "cb_distance", "sg_distance", "energy", "rho"]
1537        tor_stats = {}
1538        dist_stats = {}
1539
1540        def circular_mean(series):
1541            """
1542            Calculate the circular mean of a series of angles.
1543
1544            This function converts the input series of angles from degrees to radians,
1545            computes the mean of the sine and cosine of these angles, and then converts
1546            the result back to degrees.
1547
1548            :param series: A sequence of angles in degrees.
1549            :type series: array-like
1550            :return: The circular mean of the input angles in degrees.
1551            :rtype: float
1552            """
1553            radians = np.deg2rad(series)
1554            sin_mean = np.sin(radians).mean()
1555            cos_mean = np.cos(radians).mean()
1556            return np.rad2deg(np.arctan2(sin_mean, cos_mean))
1557
1558        for col in tor_cols[:5]:
1559            tor_stats[col] = {"mean": circular_mean(df[col]), "std": df[col].std()}
1560
1561        tor_stats["torsion_length"] = {
1562            "mean": df["torsion_length"].mean(),
1563            "std": df["torsion_length"].std(),
1564        }
1565
1566        for col in dist_cols:
1567            dist_stats[col] = {"mean": df[col].mean(), "std": df[col].std()}
1568
1569        tor_stats = pd.DataFrame(tor_stats, columns=tor_cols)
1570        dist_stats = pd.DataFrame(dist_stats, columns=dist_cols)
1571
1572        return tor_stats, dist_stats
1573
1574    # class ends
1575
1576
1577def load_disulfides_from_id(
1578    pdb_id: str,
1579    pdb_dir=MODEL_DIR,
1580    verbose=False,
1581    quiet=True,
1582    dbg=False,
1583    cutoff=-1.0,
1584    sg_cutoff=-1.0,
1585) -> DisulfideList:
1586    """
1587    Loads the Disulfides by PDB ID and returns a DisulfideList of Disulfide objects.
1588    Assumes the file is downloaded in the pdb_dir path.
1589
1590    :param pdb_id: The name of the PDB entry.
1591    :param pdb_dir: Path to the PDB files, defaults to MODEL_DIR. This is: PDB_DIR/good and are
1592                    the pre-parsed PDB files that have been scanned by the DisulfideDownloader program.
1593    :param verbose: Print info while parsing.
1594    :param quiet: Suppress non-error logging output.
1595    :param dbg: Enable debug logging.
1596    :param cutoff: Distance cutoff for filtering disulfides.
1597    :param sg_cutoff: SG distance cutoff for filtering disulfides.
1598    :return: A DisulfideList of Disulfide objects initialized from the file.
1599
1600    Example:
1601
1602    PDB_DIR defaults to os.getenv('PDB').
1603    To load the Disulfides from the PDB ID 5rsa we'd use the following:
1604
1605    >>> from proteusPy.DisulfideList import DisulfideList, load_disulfides_from_id
1606    >>> from proteusPy.ProteusGlobals import DATA_DIR
1607    >>> SSlist = DisulfideList([],'5rsa')
1608    >>> SSlist = load_disulfides_from_id('5rsa', pdb_dir=DATA_DIR, verbose=False)
1609    >>> SSlist
1610    [<Disulfide 5rsa_26A_84A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_40A_95A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_58A_110A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_65A_72A, Source: 5rsa, Resolution: 2.0 Å>]
1611    """
1612
1613    from proteusPy.Disulfide import Initialize_Disulfide_From_Coords
1614    from proteusPy.ssparser import extract_ssbonds_and_atoms
1615
1616    i = 1
1617    proximal = distal = -1
1618    chain1_id = chain2_id = ""
1619    ssbond_atom_list = {}
1620    num_ssbonds = 0
1621    delta = 0
1622    errors = 0
1623    resolution = -1.0
1624
1625    structure_fname = os.path.join(pdb_dir, f"pdb{pdb_id}.ent")
1626
1627    if verbose:
1628        mess = f"Parsing structure: {pdb_id}:"
1629        _logger.info(mess)
1630
1631    SSList = DisulfideList([], pdb_id, resolution)
1632
1633    ssbond_atom_list, num_ssbonds, errors = extract_ssbonds_and_atoms(
1634        structure_fname, verbose=verbose
1635    )
1636
1637    if num_ssbonds == 0:
1638        mess = f"->{pdb_id} has no SSBonds."
1639        if verbose:
1640            print(mess)
1641        _logger.warning(mess)
1642        return None
1643
1644    if quiet:
1645        _logger.setLevel(logging.ERROR)
1646
1647    if verbose:
1648        mess = f"{pdb_id} has {num_ssbonds} SSBonds, found: {errors} errors"
1649        _logger.info(mess)
1650
1651    resolution = ssbond_atom_list["resolution"]
1652    for pair in ssbond_atom_list["pairs"]:
1653        proximal = pair["proximal"][1]
1654        chain1_id = pair["proximal"][0]
1655        distal = pair["distal"][1]
1656        chain2_id = pair["distal"][0]
1657        proximal_secondary = pair["prox_secondary"]
1658        distal_secondary = pair["dist_secondary"]
1659
1660        if dbg:
1661            mess = f"Proximal: {proximal} {chain1_id} Distal: {distal} {chain2_id}"
1662            _logger.debug(mess)
1663
1664        proximal_int = int(proximal)
1665        distal_int = int(distal)
1666
1667        if proximal == distal:
1668            if verbose:
1669                mess = (
1670                    f"SSBond record has (proximal == distal): "
1671                    f"{pdb_id} Prox: {proximal} {chain1_id} Dist: {distal} {chain2_id}."
1672                )
1673                _logger.error(mess)
1674
1675        if proximal == distal and chain1_id == chain2_id:
1676            mess = (
1677                f"SSBond record has self reference, skipping: "
1678                f"{pdb_id} <{proximal} {chain1_id}> <{distal} {chain2_id}>"
1679            )
1680
1681            _logger.error(mess)
1682            continue
1683
1684        if verbose:
1685            mess = (
1686                f"SSBond: {i}: {pdb_id}: {proximal} {chain1_id} - {distal} {chain2_id}"
1687            )
1688            _logger.info(mess)
1689
1690        new_ss = Initialize_Disulfide_From_Coords(
1691            ssbond_atom_list,
1692            pdb_id,
1693            chain1_id,
1694            chain2_id,
1695            proximal_int,
1696            distal_int,
1697            resolution,
1698            proximal_secondary,
1699            distal_secondary,
1700            verbose=verbose,
1701            quiet=quiet,
1702            dbg=dbg,
1703        )
1704
1705        if new_ss is not None:
1706            SSList.append(new_ss)
1707            if verbose:
1708                mess = f"Initialized Disulfide: {pdb_id} Prox: {proximal} {chain1_id} Dist: {distal} {chain2_id}."
1709                _logger.info(mess)
1710        else:
1711            mess = f"Cannot initialize Disulfide: {pdb_id} <{proximal} {chain1_id}> <{distal} {chain2_id}>"
1712            _logger.error(mess)
1713
1714        i += 1
1715
1716    # restore default logging level
1717    if quiet:
1718        _logger.setLevel(logging.WARNING)
1719
1720    num_ssbonds = len(SSList)
1721
1722    if cutoff > 0:
1723        SSList = SSList.filter_by_distance(cutoff)
1724        delta = num_ssbonds - len(SSList)
1725        if delta:
1726            _logger.error(
1727                "Filtered %d -> %d SSBonds by Ca distance, %s, delta is: %d",
1728                num_ssbonds,
1729                len(SSList),
1730                pdb_id,
1731                delta,
1732            )
1733        num_ssbonds = len(SSList)
1734
1735    if sg_cutoff > 0:
1736        SSList = SSList.filter_by_sg_distance(sg_cutoff)
1737        delta = num_ssbonds - len(SSList)
1738        if delta:
1739            _logger.error(
1740                "Filtered %d -> %d SSBonds by Sγ distance, %s, delta is: %d",
1741                num_ssbonds,
1742                len(SSList),
1743                pdb_id,
1744                delta,
1745            )
1746
1747    return copy.deepcopy(SSList)
1748
1749
1750def extract_disulfide(
1751    pdb_filename: str, verbose=False, quiet=True, pdbdir=PDB_DIR
1752) -> DisulfideList:
1753    """
1754    Read the PDB file represented by `pdb_filename` and return a `DisulfideList`
1755    containing the Disulfide bonds found.
1756
1757    :param pdb_filename:   The filename of the PDB file to read.
1758    :param verbose:        Display more messages (default: False).
1759    :param quiet:          Turn off DisulfideConstruction warnings (default: True).
1760    :param pdbdir:         Path to PDB files (default: PDB_DIR).
1761    :return:               A `DisulfideList` containing the Disulfide bonds found.
1762    :rtype:                DisulfideList
1763    """
1764
1765    def extract_id_from_filename(filename: str) -> str:
1766        """
1767        Extract the ID from a filename formatted as 'pdb{id}.ent'.
1768
1769        :param filename: The filename to extract the ID from.
1770        :type filename: str
1771        :return: The extracted ID.
1772        :rtype: str
1773        """
1774        basename = os.path.basename(filename)
1775        # Check if the filename follows the expected format
1776        if basename.startswith("pdb") and filename.endswith(".ent"):
1777            # Extract the ID part of the filename
1778            return filename[3:-4]
1779
1780        mess = f"Filename {filename} does not follow the expected format 'pdb{id}.ent'"
1781        raise ValueError(mess)
1782
1783    pdbid = extract_id_from_filename(pdb_filename)
1784
1785    # returns an empty list if none are found.
1786    _sslist = DisulfideList([], pdbid)
1787    _sslist = load_disulfides_from_id(
1788        pdbid, verbose=verbose, quiet=quiet, pdb_dir=pdbdir
1789    )
1790
1791    if len(_sslist) == 0 or _sslist is None:
1792        mess = f"Can't find SSBonds: {pdbid}"
1793        _logger.error(mess)
1794        return DisulfideList([], pdbid)
1795
1796    return _sslist
1797
1798
1799if __name__ == "__main__":
1800    import doctest
1801
1802    doctest.testmod()
1803
1804# end of file
DPI = 220
WIDTH = 6.0
HEIGHT = 6.0
TORMIN = -179.9
TORMAX = 180.0
NBINS = 380
Distance_DF_Cols = ['source', 'ss_id', 'proximal', 'distal', 'energy', 'ca_distance', 'cb_distance', 'sg_distance']
class DisulfideList(collections.UserList):
  91class DisulfideList(UserList):
  92    """
  93    The class provides a sortable list for Disulfide objects.
  94    Indexing and slicing are supported, as well as typical list operations like
  95    ``.insert()``, ``.append()`` and ``.extend().`` The DisulfideList object must be initialized
  96    with an iterable (tuple, list) and a name. Sorting is keyed by torsional energy.
  97
  98    The class can also render Disulfides to a pyVista window using the
  99    [display()](#DisulfideList.display) and [display_overlay()](#DisulfideList.display_overlay)methods.
 100    See below for examples.\n
 101
 102    Examples:
 103    >>> from proteusPy import Disulfide, DisulfideLoader, DisulfideList, Load_PDB_SS
 104
 105    Instantiate some variables. Note: the list is initialifzed with an iterable and a name (optional)
 106
 107    >>> SS = Disulfide('tmp')
 108
 109    The list is initialized with an iterable, a name and resolution. Name and resolution
 110    are optional.
 111    >>> SSlist = DisulfideList([],'ss', -1.0)
 112
 113    Load the database.
 114    >>> PDB_SS = Load_PDB_SS(verbose=False, subset=True)
 115
 116    Get the first disulfide via indexing.
 117    >>> SS = PDB_SS[0]
 118
 119    # assert str(SS) == "<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å>"
 120
 121    >>> SS4yys = PDB_SS['4yys']
 122
 123    # assert str(SS4yys) == "[<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å>, <Disulfide 4yys_56A_98A, Source: 4yys, Resolution: 1.35 Å>, <Disulfide 4yys_156A_207A, Source: 4yys, Resolution: 1.35 Å>]"
 124
 125    Make some empty disulfides.
 126    >>> ss1 = Disulfide('ss1')
 127    >>> ss2 = Disulfide('ss2')
 128
 129    Make a DisulfideList containing ss1, named 'tmp'
 130    >>> sslist = DisulfideList([ss1], 'tmp')
 131    >>> sslist.append(ss2)
 132
 133    Extract the first disulfide
 134    >>> ss1 = PDB_SS[0]
 135
 136    # assert str(ss1.pprint_all()) == "<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å\n Proximal Chain fullID: <('4yys', 0, 'A', (' ', 22, ' '))> Distal Chain fullID: <('4yys', 0, 'A', (' ', 65, ' '))>\nProximal Coordinates:\n   N: <Vector -2.36, -20.48, 5.21>\n   Cα: <Vector -2.10, -19.89, 3.90>\n   C: <Vector -1.12, -18.78, 4.12>\n   O: <Vector -1.30, -17.96, 5.03>\n   Cβ: <Vector -3.38, -19.31, 3.32>\n   Sγ: <Vector -3.24, -18.40, 1.76>\n   Cprev <Vector -2.67, -21.75, 5.36>\n   Nnext: <Vector -0.02, -18.76, 3.36>\n Distal Coordinates:\n   N: <Vector -0.60, -18.71, -1.62>\n   Cα: <Vector -0.48, -19.10, -0.22>\n   C: <Vector 0.92, -19.52, 0.18>\n   O: <Vector 1.10, -20.09, 1.25>\n   Cβ: <Vector -1.48, -20.23, 0.08>\n   Sγ: <Vector -3.22, -19.69, 0.18>\n   Cprev <Vector -0.73, -17.44, -2.01>\n   Nnext: <Vector 1.92, -19.18, -0.63>\n<BLANKLINE>\n Proximal Internal Coords:\n   N: <Vector -0.41, 1.40, -0.00>\n   Cα: <Vector 0.00, 0.00, 0.00>\n   C: <Vector 1.50, 0.00, 0.00>\n   O: <Vector 2.12, 0.71, -0.80>\n   Cβ: <Vector -0.50, -0.70, -1.25>\n   Sγ: <Vector 0.04, -2.41, -1.50>\n   Cprev <Vector -2.67, -21.75, 5.36>\n   Nnext: <Vector -0.02, -18.76, 3.36>\nDistal Internal Coords:\n   N: <Vector 1.04, -5.63, 1.17>\n   Cα: <Vector 1.04, -4.18, 1.31>\n   C: <Vector 1.72, -3.68, 2.57>\n   O: <Vector 1.57, -2.51, 2.92>\n   Cβ: <Vector -0.41, -3.66, 1.24>\n   Sγ: <Vector -1.14, -3.69, -0.43>\n   Cprev <Vector -0.73, -17.44, -2.01>\n   Nnext: <Vector 1.92, -19.18, -0.63>\n Χ1-Χ5: 174.63°, 82.52°, -83.32°, -62.52° -73.83°, 138.89°, 1.70 kcal/mol\n Cα Distance: 4.50 Å\n Torsion length: 231.53 deg>"
 137
 138    Get a list of disulfides via slicing
 139    >>> subset = DisulfideList(PDB_SS[0:10],'subset')
 140
 141    Display the subset disulfides overlaid onto the same coordinate frame,
 142    (proximal N, Ca, C').
 143
 144    The disulfides are colored individually to facilitate inspection.
 145
 146    >>> subset.display_overlay()
 147    """
 148
 149    def __init__(self, iterable, pid: str = "nil", res=-1.0, quiet=True, fast=False):
 150        """
 151        Initialize the DisulfideList.
 152
 153        :param iterable: An iterable of disulfide bonds.
 154        :type iterable: iterable
 155        :param pid: Name for the list, default is "nil".
 156        :type pid: str
 157        :param res: Resolution, default is -1.0. If -1, the average resolution is used.
 158        :type res: float
 159        :param quiet: If True, suppress output, default is True.
 160        :type quiet: bool
 161        :param fast: If True, enable fast mode, default is False.
 162        :type fast: bool
 163
 164        Example:
 165        >>> from proteusPy import DisulfideList, Disulfide
 166
 167        Initialize some empty disulfides.
 168        >>> ss1 = Disulfide('ss1')
 169        >>> ss2 = Disulfide('ss2')
 170        >>> ss3 = Disulfide('ss3')
 171
 172        Make a list containing the disulfides.
 173        >>> sslist = DisulfideList([ss1, ss2], 'sslist')
 174        >>> sslist
 175        [<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>]
 176        >>> sslist.append(ss3)
 177        >>> sslist
 178        [<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss3, Source: 1egs, Resolution: -1.0 Å>]
 179        """
 180
 181        super().__init__(self.validate_ss(item) for item in iterable)
 182
 183        self.pdb_id = pid
 184        self.quiet = quiet
 185
 186        if not fast:
 187            if res == -1:
 188                self._res = self.average_resolution
 189            else:
 190                self._res = res
 191        else:
 192            self._res = res
 193
 194    def __getitem__(self, item):
 195        """
 196        Retrieve a disulfide from the list. Internal only.
 197
 198        :param item: Index or slice
 199        :return: Sublist
 200        """
 201        if isinstance(item, slice):
 202            indices = range(*item.indices(len(self.data)))
 203            ind_list = list(indices)
 204            first_ind = ind_list[0]
 205            last_ind = ind_list[-1]
 206            name = (
 207                self.data[first_ind].pdb_id
 208                + f"_slice[{first_ind}:{last_ind+1}]_{self.data[last_ind].pdb_id}"
 209            )
 210            sublist = [self.data[i] for i in indices]
 211            return DisulfideList(sublist, name)
 212        return UserList.__getitem__(self, item)
 213
 214    def __setitem__(self, index, item):
 215        self.data[index] = self.validate_ss(item)
 216
 217    # Rendering engine calculates and instantiates all bond
 218    # cylinders and atomic sphere meshes. Called by all high level routines
 219
 220    def _render(self, pl, style, res=100, panelsize=WINSIZE) -> pv.Plotter:
 221        """
 222        Display a window showing the list of disulfides in the given style.
 223        :param style: one of 'cpk', 'bs', 'sb', 'plain', 'cov', 'pd'
 224        :return: Window in the relevant style
 225        """
 226        ssList = self.data
 227        tot_ss = len(ssList)  # number off ssbonds
 228        rows, cols = grid_dimensions(tot_ss)
 229        res = 100
 230
 231        if tot_ss > 30:
 232            res = 60
 233        if tot_ss > 60:
 234            res = 30
 235        if tot_ss > 90:
 236            res = 12
 237
 238        total_plots = rows * cols
 239        for idx in range(min(tot_ss, total_plots)):
 240            if not self.quiet:
 241                if idx % 5 == 0:
 242                    _logger.info("Rendering %d of %d bonds.", idx + 1, tot_ss)
 243
 244            r = idx // cols
 245            c = idx % cols
 246            pl.subplot(r, c)
 247
 248            ss = ssList[idx]
 249            src = ss.pdb_id
 250            enrg = ss.energy
 251            title = f"{src} {ss.proximal}{ss.proximal_chain}-{ss.distal}{ss.distal_chain}: E: {enrg:.2f}, Cα: {ss.ca_distance:.2f} Å, Tors: {ss.torsion_length:.2f}°"
 252            fontsize = calculate_fontsize(title, panelsize)
 253            pl.add_title(title=title, font_size=fontsize)
 254            ss._render(
 255                pl,
 256                style=style,
 257                res=res,
 258            )
 259
 260        return pl
 261
 262    @property
 263    def average_ca_distance(self):
 264        """
 265        Return the Average energy (kcal/mol) for the Disulfides in the list.
 266
 267        :return: Average energy (kcal/mol) between all atoms in the list
 268        """
 269        sslist = self.data
 270        tot = len(sslist)
 271        if tot == 0:
 272            return 0.0
 273
 274        total_dist = sum(ss.ca_distance for ss in sslist)
 275        return total_dist / tot
 276
 277    @property
 278    def average_distance(self):
 279        """
 280        Return the Average distance (Å) between the atoms in the list.
 281
 282        :return: Average distance (Å) between all atoms in the list
 283
 284        """
 285        sslist = self.data
 286        cnt = 1
 287
 288        total = 0.0
 289        for ss1 in sslist:
 290            for ss2 in sslist:
 291                if ss2 == ss1:
 292                    continue
 293                total += ss1.Distance_RMS(ss2)
 294                cnt += 1
 295
 296        return total / cnt
 297
 298    @property
 299    def average_energy(self):
 300        """
 301        Return the Average energy (kcal/mol) for the Disulfides in the list.
 302
 303        :return: Average energy (kcal/mol) between all atoms in the list
 304        """
 305        sslist = self.data
 306        tot = len(sslist)
 307        if tot == 0:
 308            return 0.0
 309
 310        total_energy = sum(ss.energy for ss in sslist)
 311        return total_energy / tot
 312
 313    @property
 314    def average_conformation(self):
 315        """
 316        Return the average conformation for the disulfides in the list.
 317
 318        :return: Average conformation: [x1, x2, x3, x4, x5]
 319        """
 320        sslist = self.data
 321        res = np.mean([ss.torsion_array for ss in sslist], axis=0)
 322        return res
 323
 324    def append(self, item):
 325        """
 326        Append the list with item
 327
 328        :param item: Disulfide to add
 329        :type item: Disulfide
 330        """
 331        self.data.append(self.validate_ss(item))
 332
 333    @property
 334    def average_resolution(self) -> float:
 335        """
 336        Compute and return the average structure resolution for the given list.
 337
 338        :return: Average resolution (A)
 339        """
 340        resolutions = [ss.resolution for ss in self.data if ss.resolution != -1.0]
 341        return sum(resolutions) / len(resolutions) if resolutions else -1.0
 342
 343    @property
 344    def resolution(self) -> float:
 345        """
 346        Compute and return the average structure resolution for the given list.
 347
 348        :return: Average resolution (A)
 349        """
 350        return self._res
 351
 352    @resolution.setter
 353    def resolution(self, value: float):
 354        """
 355        Set the average structure resolution for the given list.
 356
 357        :param value: The new resolution value to set.
 358        :type value: float
 359        """
 360        if not isinstance(value, float):
 361            raise TypeError("Resolution must be a float.")
 362        self._res = value
 363
 364    @property
 365    def average_torsion_distance(self):
 366        """
 367        Return the average distance in torsion space (degrees), between all pairs in the
 368        DisulfideList
 369
 370        :return: Torsion Distance (degrees)
 371        """
 372        sslist = self.data
 373        total = 0
 374        cnt = 0
 375
 376        for ss1, ss2 in combinations(sslist, 2):
 377            total += ss1.torsion_distance(ss2)
 378            cnt += 1
 379
 380        return float(total / cnt) if cnt > 0 else 0
 381
 382    def build_distance_df(self) -> pd.DataFrame:
 383        """
 384        Create a dataframe containing the input DisulfideList Cα-Cα and Sg-Sg distances, energy.
 385        This can take several minutes for the entire database.
 386
 387        :return: DataFrame containing Ca distances
 388        :rtype: pd.DataFrame
 389        """
 390        # create a list to collect rows as dictionaries
 391        rows = []
 392        i = 0
 393        sslist = self.data
 394        total_length = len(sslist)
 395        update_interval = max(1, total_length // 20)  # 5% of the list length
 396
 397        if self.quiet:
 398            pbar = sslist
 399        else:
 400            pbar = tqdm(sslist, ncols=PBAR_COLS, leave=False)
 401
 402        for ss in pbar:
 403            new_row = {
 404                "source": ss.pdb_id,
 405                "ss_id": ss.name,
 406                "proximal": ss.proximal,
 407                "distal": ss.distal,
 408                "energy": ss.energy,
 409                "ca_distance": ss.ca_distance,
 410                "cb_distance": ss.cb_distance,
 411                "sg_distance": ss.sg_distance,
 412            }
 413            rows.append(new_row)
 414            i += 1
 415
 416            if not self.quiet:
 417                if i % update_interval == 0 or i == total_length - 1:
 418                    pbar.update(update_interval)
 419
 420        # create the dataframe from the list of dictionaries
 421        SS_df = pd.DataFrame(rows, columns=Distance_DF_Cols)
 422
 423        return SS_df
 424
 425    def build_torsion_df(self) -> pd.DataFrame:
 426        """
 427        Create a dataframe containing the input DisulfideList torsional parameters,
 428        Cα-Cα and Sg-Sg distances, energy, and phi-psi angles. This can take several minutes for the
 429        entire database.
 430
 431        :return: pd.DataFrame containing the torsions
 432        """
 433        # create a list to collect rows as dictionaries
 434        rows = []
 435        i = 0
 436        total_length = len(self.data)
 437        update_interval = max(1, total_length // 20)  # 5% of the list length
 438
 439        sslist = self.data
 440        if self.quiet:
 441            pbar = sslist
 442        else:
 443            pbar = tqdm(sslist, ncols=PBAR_COLS, leave=False)
 444
 445        for ss in pbar:
 446            new_row = {
 447                "source": ss.pdb_id,
 448                "ss_id": ss.name,
 449                "proximal": ss.proximal,
 450                "distal": ss.distal,
 451                "chi1": ss.chi1,
 452                "chi2": ss.chi2,
 453                "chi3": ss.chi3,
 454                "chi4": ss.chi4,
 455                "chi5": ss.chi5,
 456                "energy": ss.energy,
 457                "ca_distance": ss.ca_distance,
 458                "cb_distance": ss.cb_distance,
 459                "sg_distance": ss.sg_distance,
 460                "psi_prox": ss.psiprox,
 461                "phi_prox": ss.phiprox,
 462                "phi_dist": ss.phidist,
 463                "psi_dist": ss.psidist,
 464                "torsion_length": ss.torsion_length,
 465                "rho": ss.rho,
 466                "binary_class_string": ss.binary_class_string,
 467                "octant_class_string": ss.octant_class_string,
 468            }
 469            rows.append(new_row)
 470            i += 1
 471
 472            if not self.quiet:
 473                if i % update_interval == 0 or i == total_length - 1:
 474                    pbar.update(update_interval)
 475
 476        if not self.quiet:
 477            pbar.close()
 478
 479        # create the dataframe from the list of dictionaries
 480        SS_df = pd.DataFrame(rows, columns=Torsion_DF_Cols)
 481
 482        return SS_df
 483
 484    def by_chain(self, chain: str):
 485        """
 486        Return a DisulfideList from the input chain identifier.
 487
 488        :param chain: chain identifier, 'A', 'B, etc
 489        :return: DisulfideList containing disulfides within that chain.
 490        """
 491
 492        reslist = DisulfideList([], chain)
 493        sslist = self.data
 494
 495        for ss in sslist:
 496            pchain = ss.proximal_chain
 497            dchain = ss.distal_chain
 498            if pchain == dchain:
 499                if pchain == chain:
 500                    reslist.append(ss)
 501            else:
 502                print(f"Cross chain SS: {ss.repr_compact}:")
 503        return reslist
 504
 505    @property
 506    def center_of_mass(self):
 507        """
 508        Calculate the center of mass for the Disulfide list
 509        """
 510        sslist = self.data
 511        tot = len(sslist)
 512        if tot == 0:
 513            return 0.0
 514
 515        total_cofmass = sum(ss.cofmass for ss in sslist)
 516        return total_cofmass / tot
 517
 518    def describe(self):
 519        """
 520        Prints out relevant attributes of the given disulfideList.
 521
 522        :param disulfideList: A list of disulfide objects.
 523        :param list_name: The name of the list.
 524        """
 525        name = self.pdb_id
 526        avg_distance = self.average_ca_distance
 527        avg_energy = self.average_energy
 528        avg_resolution = self.average_resolution
 529        list_length = len(self.data)
 530
 531        if list_length == 0:
 532            avg_bondangle = 0
 533            avg_bondlength = 0
 534        else:
 535            total_bondangle = 0
 536            total_bondlength = 0
 537
 538            for ss in self.data:
 539                total_bondangle += ss.bond_angle_ideality
 540                total_bondlength += ss.bond_length_ideality
 541
 542            avg_bondangle = total_bondangle / list_length
 543            avg_bondlength = total_bondlength / list_length
 544
 545        print(f"DisulfideList: {name}")
 546        print(f"Length: {list_length}")
 547        print(f"Average energy: {avg_energy:.2f} kcal/mol")
 548        print(f"Average CA distance: {avg_distance:.2f} Å")
 549        print(f"Average Resolution: {avg_resolution:.2f} Å")
 550        print(f"Bond angle deviation: {avg_bondangle:.2f}°")
 551        print(f"Bond length deviation: {avg_bondlength:.2f} Å")
 552
 553    def display(self, style="sb", light="auto", panelsize=512):
 554        """
 555        Display the Disulfide list in the specific rendering style.
 556
 557        :param single: Display the bond in a single panel in the specific style.
 558        :param style:  Rendering style: One of:\n
 559            - 'sb' - split bonds
 560            - 'bs' - ball and stick
 561            - 'cpk' - CPK style
 562            - 'pd' - Proximal/Distal style - Red=proximal, Green=Distal
 563            - 'plain' - boring single color
 564        :light: If True, light background, if False, dark
 565        """
 566        # from proteusPy.utility import get_theme
 567
 568        ssbonds = self.data
 569        tot_ss = len(ssbonds)  # number off ssbonds
 570        rows, cols = grid_dimensions(tot_ss)
 571        winsize = (panelsize * cols, panelsize * rows)
 572
 573        set_pyvista_theme(light)
 574
 575        # title = f"<{pid}> {resolution:.2f} Å: ({tot_ss} SS), Avg E: {avg_enrg:.2f} kcal/mol, Avg Dist: {avg_dist:.2f} Å"
 576
 577        pl = pv.Plotter(window_size=winsize, shape=(rows, cols))
 578        pl = self._render(pl, style, panelsize=panelsize)
 579        pl.enable_anti_aliasing("msaa")
 580
 581        # the subwindows already show a title
 582        # pl.add_title(title=title, font_size=fontsize)
 583
 584        pl.link_views()
 585        pl.reset_camera()
 586        pl.show()
 587
 588    def display_torsion_statistics(
 589        self,
 590        display=True,
 591        save=False,
 592        fname="ss_torsions.png",
 593        theme="auto",
 594    ):
 595        """
 596        Display torsion and distance statistics for a given Disulfide list.
 597
 598        :param display: Whether to display the plot in the notebook. Default is True.
 599        :type display: bool
 600        :param save: Whether to save the plot as an image file. Default is False.
 601        :type save: bool
 602        :param fname: The name of the image file to save. Default is 'ss_torsions.png'.
 603        :type fname: str
 604        :param theme: The theme to use for the plot. Default is 'Auto'. Options are 'Auto', 'light', and 'dark'.
 605        :type theme: str
 606        :return: none
 607        """
 608
 609        if self.length == 0:
 610            _logger.warning("Empty DisulfideList. Nothing to display.")
 611            return
 612
 613        set_plotly_theme(theme)
 614        title = f"{self.id}: {self.length} members"
 615
 616        tor_vals, dist_vals = self.calculate_torsion_statistics()
 617
 618        tor_mean_vals = tor_vals.loc["mean"]
 619        tor_std_vals = tor_vals.loc["std"]
 620
 621        dist_mean_vals = dist_vals.loc["mean"]
 622        dist_std_vals = dist_vals.loc["std"]
 623
 624        fig = make_subplots(
 625            rows=2, cols=2, vertical_spacing=0.125, column_widths=[1, 1]
 626        )
 627
 628        fig.update_layout(
 629            title={
 630                "text": title,
 631                "xanchor": "center",
 632                # 'y':.9,
 633                "x": 0.5,
 634                "yanchor": "top",
 635            },
 636            width=1024,
 637            height=1024,
 638        )
 639
 640        fig.add_trace(
 641            go.Bar(
 642                x=["X1", "X2", "X3", "X4", "X5"],
 643                y=tor_mean_vals[:5],
 644                name="Torsion Angle (°) ",
 645                error_y=dict(type="data", array=tor_std_vals, visible=True),
 646            ),
 647            row=1,
 648            col=1,
 649        )
 650
 651        fig.add_trace(
 652            go.Bar(
 653                x=["rho"],
 654                y=[dist_mean_vals[4]],
 655                name="ρ (°)",
 656                error_y=dict(type="data", array=[dist_std_vals[4]], visible=True),
 657            ),
 658            row=1,
 659            col=1,
 660        )
 661
 662        # Update the layout of the subplot
 663        # Cα N, Cα, Cβ, C', Sγ Å °
 664
 665        fig.update_yaxes(
 666            title_text="Dihedral Angle (°)", range=[-200, 200], row=1, col=1
 667        )
 668        fig.update_yaxes(range=[0, 320], row=2, col=2)
 669
 670        # Add another subplot for the mean values of energy
 671        fig.add_trace(
 672            go.Bar(
 673                x=["Strain Energy (kcal/mol)"],
 674                y=[dist_mean_vals[3]],
 675                name="Energy (kcal/mol)",
 676                error_y=dict(
 677                    type="data",
 678                    array=[dist_std_vals[3].tolist()],
 679                    width=0.25,
 680                    visible=True,
 681                ),
 682            ),
 683            row=1,
 684            col=2,
 685        )
 686        fig.update_traces(width=0.25, row=1, col=2)
 687
 688        # Update the layout of the subplot
 689        # fig.update_xaxes(title_text="Energy", row=1, col=2)
 690        fig.update_yaxes(
 691            title_text="kcal/mol", range=[0, 8], row=1, col=2
 692        )  # max possible DSE
 693
 694        # Add another subplot for the mean values of ca_distance
 695        fig.add_trace(
 696            go.Bar(
 697                x=["Cα Distance (Å)", "Cβ Distance (Å)", "Sγ Distance (Å)"],
 698                y=[dist_mean_vals[0], dist_mean_vals[1], dist_mean_vals[2]],
 699                name="Distances (Å)",
 700                error_y=dict(
 701                    type="data",
 702                    array=[
 703                        dist_std_vals[0].tolist(),
 704                        dist_std_vals[1].tolist(),
 705                        dist_std_vals[2].tolist(),
 706                    ],
 707                    width=0.25,
 708                    visible=True,
 709                ),
 710            ),
 711            row=2,
 712            col=1,
 713        )
 714        # Update the layout of the subplot
 715        fig.update_yaxes(title_text="Distance (A)", range=[0, 8], row=2, col=1)  #
 716        fig.update_traces(width=0.25, row=2, col=1)
 717
 718        # Add a scatter subplot for torsion length column
 719        fig.add_trace(
 720            go.Bar(
 721                x=["Torsion Length (Å)"],
 722                y=[tor_mean_vals[5]],
 723                name="Torsion Length (Å)",
 724                error_y=dict(
 725                    type="data", array=[tor_std_vals[5]], width=0.25, visible=True
 726                ),
 727            ),
 728            row=2,
 729            col=2,
 730        )
 731        # Update the layout of the subplot
 732        fig.update_yaxes(title_text="Torsion Length", range=[0, 350], row=2, col=2)
 733        fig.update_traces(width=0.25, row=2, col=2)
 734
 735        # Update the error bars
 736        fig.update_traces(
 737            error_y_thickness=2,
 738            error_y_color="gray",
 739            texttemplate="%{y:.2f} ± %{error_y.array:.2f}",
 740            textposition="outside",
 741        )  # , row=1, col=1)
 742
 743        if display:
 744            fig.show()
 745
 746        if save:
 747            fig.write_image(Path(fname))
 748
 749        return
 750
 751    @property
 752    def distance_df(self) -> pd.DataFrame:
 753        """
 754        Build and return the distance dataframe for the input list.
 755        This can take considerable time for the entire list.
 756
 757        :return: Dataframe containing the Cα-Cα distances for the given list.
 758
 759        Example:
 760        >>> from proteusPy import Disulfide, Load_PDB_SS, DisulfideList
 761        >>> PDB_SS = Load_PDB_SS()
 762
 763        """
 764        return self.build_distance_df()
 765
 766    def display_overlay(
 767        self,
 768        screenshot=False,
 769        movie=False,
 770        verbose=False,
 771        fname="ss_overlay.png",
 772        light="auto",
 773        winsize=WINSIZE,
 774    ):
 775        """
 776        Display all disulfides in the list overlaid in stick mode against
 777        a common coordinate frames. This allows us to see all of the disulfides
 778        at one time in a single view. Colors vary smoothy between bonds.
 779
 780        :param screenshot: Save a screenshot, defaults to False
 781        :param movie: Save a movie, defaults to False
 782        :param verbose: Verbosity, defaults to True
 783        :param fname: Filename to save for the movie or screenshot, defaults to 'ss_overlay.png'
 784        :param light: Background color, defaults to True for White. False for Dark.
 785        """
 786
 787        # from proteusPy.utility import get_theme
 788
 789        pid = self.pdb_id
 790
 791        ssbonds = self.data
 792        tot_ss = len(ssbonds)  # number off ssbonds
 793        avg_enrg = self.average_energy
 794        avg_dist = self.average_distance
 795        resolution = self.average_resolution
 796
 797        res = 64
 798
 799        if tot_ss > 30:
 800            res = 48
 801        if tot_ss > 60:
 802            res = 16
 803        if tot_ss > 90:
 804            res = 8
 805
 806        title = f"<{pid}> {resolution:.2f} Å: ({tot_ss} SS), E: {avg_enrg:.2f} kcal/mol, Dist: {avg_dist:.2f} Å"
 807        fontsize = calculate_fontsize(title, winsize[0])
 808
 809        set_pyvista_theme(light)
 810
 811        if movie:
 812            pl = pv.Plotter(window_size=winsize, off_screen=True)
 813        else:
 814            pl = pv.Plotter(window_size=winsize, off_screen=False)
 815
 816        pl.add_title(title=title, font_size=fontsize)
 817        pl.enable_anti_aliasing("msaa")
 818        pl.add_axes()
 819
 820        mycol = np.zeros(shape=(tot_ss, 3))
 821        mycol = get_jet_colormap(tot_ss)
 822
 823        # scale the overlay bond radii down so that we can see the individual elements better
 824        # maximum 90% reduction
 825
 826        brad = BOND_RADIUS if tot_ss < 10 else BOND_RADIUS * 0.75
 827        brad = brad if tot_ss < 25 else brad * 0.8
 828        brad = brad if tot_ss < 50 else brad * 0.8
 829        brad = brad if tot_ss < 100 else brad * 0.6
 830
 831        # print(f'Brad: {brad}')
 832        if verbose:
 833            pbar = tqdm(range(tot_ss), ncols=PBAR_COLS)
 834        else:
 835            pbar = range(tot_ss)
 836
 837        for i, ss in zip(pbar, ssbonds):
 838            color = [int(mycol[i][0]), int(mycol[i][1]), int(mycol[i][2])]
 839            ss._render(
 840                pl,
 841                style="plain",
 842                bondcolor=color,
 843                translate=False,
 844                bond_radius=brad,
 845                res=res,
 846            )
 847
 848        pl.reset_camera()
 849
 850        if screenshot:
 851            pl.show(auto_close=False)  # allows for manipulation
 852            # Take the screenshot after ensuring the plotter is still active
 853            try:
 854                pl.screenshot(fname)
 855                if verbose:
 856                    print(f" -> display_overlay(): Saved image to: {fname}")
 857            except RuntimeError as e:
 858                _logger.error("Error saving screenshot: %s", e)
 859
 860        elif movie:
 861            if verbose:
 862                print(f" -> display_overlay(): Saving mp4 animation to: {fname}")
 863
 864            pl.open_movie(fname)
 865            path = pl.generate_orbital_path(n_points=360)
 866            pl.orbit_on_path(path, write_frames=True)
 867            pl.close()
 868
 869            if verbose:
 870                print(f" -> display_overlay(): Saved mp4 animation to: {fname}")
 871        else:
 872            pl.show()
 873
 874        return
 875
 876    def extend(self, other):
 877        """
 878        Extend the Disulfide list with other.
 879
 880        :param other: extension
 881        :type item: DisulfideList
 882        """
 883
 884        if isinstance(other, type(self)):
 885            self.data.extend(other)
 886        else:
 887            self.data.extend(self.validate_ss(item) for item in other)
 888
 889    def filter_by_distance(self, distance: float = -1.0, minimum: float = 2.0):
 890        """
 891        Return a DisulfideList filtered by to between the maxium Ca distance and
 892        the minimum, which defaults to 2.0A.
 893
 894        :param distance: Distance in Å
 895        :param minimum: Distance in Å
 896        :return: DisulfideList containing disulfides with the given distance.
 897        """
 898
 899        reslist = []
 900        sslist = self.data
 901
 902        # if distance is -1.0, return the entire list
 903        if distance == -1.0:
 904            return sslist.copy()
 905
 906        reslist = [
 907            ss
 908            for ss in sslist
 909            if ss.ca_distance < distance and ss.ca_distance > minimum
 910        ]
 911
 912        return DisulfideList(reslist, f"filtered by distance < {distance:.2f}")
 913
 914    def filter_by_sg_distance(self, distance: float = -1.0, minimum: float = 1.0):
 915        """
 916        Return a DisulfideList filtered by to between the maxium Sg distance and
 917        the minimum, which defaults to 1.0A.
 918
 919        :param distance: Distance in Å
 920        :param minimum: Distance in Å
 921        :return: DisulfideList containing disulfides with the given distance.
 922        """
 923
 924        reslist = []
 925        sslist = self.data
 926
 927        # if distance is -1.0, return the entire list
 928        if distance == -1.0:
 929            return sslist.copy()
 930
 931        reslist = [
 932            ss
 933            for ss in sslist
 934            if ss.sg_distance < distance and ss.sg_distance > minimum
 935        ]
 936
 937        return DisulfideList(reslist, f"filtered by Sγ distance < {distance:.2f}")
 938
 939    def filter_by_bond_ideality(self, angle: float = -1.0):
 940        """
 941        Return a DisulfideList filtered by bond angle ideality between the maxium angle
 942        and the minimum, which defaults to 0.0°.
 943
 944        :param angle: Angle in degrees
 945        :param minimum: Angle in degrees
 946        :return: DisulfideList containing disulfides with the given angle.
 947        """
 948
 949        reslist = []
 950        sslist = self.data
 951
 952        # if angle is -1.0, return the entire list
 953        if angle == -1.0:
 954            return sslist.copy()
 955
 956        reslist = [ss for ss in sslist if ss.bond_angle_ideality < angle]
 957
 958        return DisulfideList(reslist, f"filtered by bond angle < {angle:.2f}")
 959
 960    def get_by_name(self, name):
 961        """
 962        Returns the Disulfide with the given name from the list.
 963        """
 964        for ss in self.data:
 965            if ss.name == name:
 966                return ss.copy()  # or ss.copy() !!!
 967        return None
 968
 969    def get_chains(self):
 970        """
 971        Return the chain IDs for chains within the given Disulfide.
 972        :return: Chain IDs for given Disulfide
 973        """
 974
 975        res_dict = {"xxx"}
 976        sslist = self.data
 977
 978        for ss in sslist:
 979            pchain = ss.proximal_chain
 980            dchain = ss.distal_chain
 981            res_dict.update(pchain)
 982            res_dict.update(dchain)
 983
 984        res_dict.remove("xxx")
 985
 986        return res_dict
 987
 988    def get_torsion_array(self):
 989        """
 990        Return a 2D NumPy array representing the dihedral angles in the given disulfide list.
 991
 992        :return: A 2D NumPy array of shape (n, 5), where n is the number of disulfide bonds in the list. Each row
 993                of the array represents the dihedral angles of a disulfide bond, in the following order:
 994                [X1, X, X3, X4, X5], where i is the index of the disulfide bond in the list.
 995        """
 996        return np.array([ss.torsion_array for ss in self.data])
 997
 998    def has_chain(self, chain) -> bool:
 999        """
1000        Returns True if given chain contained in Disulfide, False otherwise.
1001        :return: Returns True if given chain contained in Disulfide, False otherwise.
1002        """
1003
1004        chns = {"xxx"}
1005        chns = self.get_chains()
1006        if chain in chns:
1007            return True
1008        else:
1009            return False
1010
1011    @property
1012    def id(self):
1013        """
1014        PDB ID of the list
1015        """
1016        return self.pdb_id
1017
1018    @id.setter
1019    def id(self, value):
1020        """
1021        Set the DisulfideList ID
1022
1023        Parameters
1024        ----------
1025        value : str
1026            List ID
1027        """
1028        self.pdb_id = value
1029
1030    def TorsionGraph(
1031        self, display=True, save=False, fname="ss_torsions.png", theme="Auto"
1032    ):
1033        """
1034        Generate and optionally display or save a torsion graph.
1035
1036        This method generates a torsion graph based on the torsion statistics
1037        of disulfide bonds. It can display the graph, save it to a file, or both.
1038
1039        :param display: If True, the torsion graph will be displayed. Default is True.
1040        :type display: bool
1041        :param save: If True, the torsion graph will be saved to a file. Default is False.
1042        :type save: bool
1043        :param fname: The filename to save the torsion graph. Default is "ss_torsions.png".
1044        :type fname: str
1045        :param theme: One of 'auto', 'light', or 'dark'. Default is 'auto'.
1046        :type theme: str
1047
1048        :return: None
1049        """
1050        # tor_stats, dist_stats = self.calculate_torsion_statistics()
1051        self.display_torsion_statistics(
1052            display=display, save=save, fname=fname, theme=theme
1053        )
1054
1055    def translate(self, translation_vector) -> None:
1056        """
1057        Translate the DisulfideList by the given translation vector.
1058        Note: The translation is a vector SUBTRACTION, not addition.
1059        This is used primarily to move a list to its geometric center of mass
1060        and is a destructive operation, in the sense that it updates the list in place.
1061
1062        :param translation_vector: The translation vector to apply.
1063        :type translation_vector: Vector3D
1064        """
1065        for ss in self.data:
1066            ss.translate(translation_vector)
1067
1068    def insert(self, index, item):
1069        """
1070        Insert a Disulfide into the list at the specified index
1071
1072        :param index: insertion point
1073        :type index: int
1074        :param item: Disulfide to insert
1075        :type item: Disulfide
1076        """
1077        self.data.insert(index, self.validate_ss(item))
1078
1079    @property
1080    def length(self):
1081        """Return the length of the list"""
1082        return len(self.data)
1083
1084    @property
1085    def min(self) -> Disulfide:
1086        """
1087        Return Disulfide from the list with the minimum energy
1088
1089        :return: Disulfide with the minimum energy.
1090        """
1091        sslist = sorted(self.data)
1092        return sslist[0]
1093
1094    @property
1095    def max(self) -> Disulfide:
1096        """
1097        Return Disulfide from the list with the maximum energy
1098
1099        :return: Disulfide with the maximum energy. This assumes that
1100        the comparison is based on the energy attribute.
1101        """
1102        sslist = sorted(self.data)
1103        return sslist[-1]
1104
1105    def minmax_distance(self):
1106        """
1107        Return the Disulfides with the minimum and
1108        maximum Cα distances in the list.
1109
1110        :return: SSmin, SSmax
1111        """
1112        sslist = self.data
1113
1114        if not sslist:
1115            return None, None
1116
1117        ssmin = min(sslist, key=lambda ss: ss.ca_distance)
1118        ssmax = max(sslist, key=lambda ss: ss.ca_distance)
1119
1120        return ssmin, ssmax
1121
1122    @property
1123    def minmax_energy(self):
1124        """
1125        Return the Disulfides with the minimum and maximum energies
1126        from the DisulfideList.
1127
1128        :return: Disulfides with minimum and maximum energies
1129        """
1130        sslist = self.data
1131
1132        if not sslist:
1133            return None, None
1134
1135        sslist = sorted(sslist, key=lambda ss: ss.energy)
1136        return sslist[0], sslist[-1]
1137
1138    def nearest_neighbors(self, cutoff: float, *args):
1139        """
1140        Return all Disulfides within the given angle cutoff of the input Disulfide.
1141
1142        :param cutoff: Distance cutoff, degrees
1143        :param args: Either 5 individual angles (chi1, chi2, chi3, chi4, chi5) or a list of 5 angles
1144        :return: DisulfideList of neighbors within the cutoff
1145        """
1146        if len(args) == 1 and isinstance(args[0], list) and len(args[0]) == 5:
1147            chi1, chi2, chi3, chi4, chi5 = args[0]
1148        elif len(args) == 5:
1149            chi1, chi2, chi3, chi4, chi5 = args
1150        else:
1151            raise ValueError(
1152                "You must provide either 5 individual angles or a list of 5 angles."
1153            )
1154
1155        sslist = self.data
1156        modelss = proteusPy.Disulfide("model", torsions=[chi1, chi2, chi3, chi4, chi5])
1157        res = modelss.torsion_neighbors(sslist, cutoff)
1158
1159        resname = f"Neighbors within {cutoff:.2f}° of [{', '.join(f'{angle:.2f}' for angle in modelss.dihedrals)}]"
1160        res.pdb_id = resname
1161
1162        return res
1163
1164    def nearest_neighbors_ss(self, ss, cutoff: float):
1165        """
1166        Return the list of Disulfides within the torsional cutoff
1167        of the input Disulfide.
1168
1169        :param ss: Disulfide to compare to
1170        :param cutoff: Distance cutoff, degrees
1171        :return: DisulfideList of neighbors
1172        """
1173
1174        sslist = self.data
1175        res = ss.torsion_neighbors(sslist, cutoff)
1176
1177        resname = f"{ss.name} neighbors within {cutoff}°"
1178        res.pdb_id = resname
1179
1180        return res
1181
1182    def pprint(self):
1183        """
1184        Pretty print self.
1185        """
1186        sslist = self.data
1187        for ss in sslist:
1188            ss.pprint()
1189
1190    def pprint_all(self):
1191        """
1192        Pretty print full disulfide descriptions in self.
1193        """
1194        sslist = self.data
1195        for ss in sslist:
1196            ss.pprint_all()
1197
1198    @property
1199    def torsion_df(self):
1200        """Return the Torsion DataFrame for the DisulfideList"""
1201        return self.build_torsion_df()
1202
1203    @property
1204    def torsion_array(self):
1205        """Return the Torsions as an Array"""
1206        return self.get_torsion_array()
1207
1208    def validate_ss(self, value):
1209        """Return the Disulfide object if it is a Disulfide, otherwise raise an error"""
1210        from proteusPy.Disulfide import Disulfide
1211
1212        if value is None:
1213            raise ValueError("The value cannot be None.")
1214
1215        if not isinstance(value, Disulfide):
1216            raise TypeError("The value must be an instance of Disulfide.")
1217        return value
1218
1219    def create_deviation_dataframe(self, verbose=False):
1220        """
1221        Create a DataFrame with columns PDB_ID, SS_Name, Angle_Deviation, Distance_Deviation,
1222        Ca Distance from a list of disulfides.
1223
1224        :param verbose: Whether to display a progress bar.
1225        :type verbose: bool
1226        :return: DataFrame containing the disulfide information.
1227        :rtype: pd.DataFrame
1228        """
1229        disulfide_list = self.data
1230        data = {
1231            "PDB_ID": [],
1232            "Resolution": [],
1233            "SS_Name": [],
1234            "Angle_Deviation": [],
1235            "Bondlength_Deviation": [],
1236            "Ca_Distance": [],
1237            "Sg_Distance": [],
1238        }
1239
1240        if verbose:
1241            pbar = tqdm(disulfide_list, desc="Processing...", leave=False)
1242        else:
1243            pbar = disulfide_list
1244
1245        for ss in pbar:
1246            data["PDB_ID"].append(ss.pdb_id)
1247            data["Resolution"].append(ss.resolution)
1248            data["SS_Name"].append(ss.name)
1249            data["Angle_Deviation"].append(ss.bond_angle_ideality)
1250            data["Bondlength_Deviation"].append(ss.bond_length_ideality)
1251            data["Ca_Distance"].append(ss.ca_distance)
1252            data["Sg_Distance"].append(ss.sg_distance)
1253
1254        df = pd.DataFrame(data)
1255        return df
1256
1257    def extract_distances(self, distance_type="sg", comparison="less", cutoff=-1):
1258        """
1259        Extract and filter the distance values from the disulfide list based on the specified type and comparison.
1260
1261        :param disulfide_list: List of disulfide objects.
1262        :param distance_type: Type of distance to extract ('sg' or 'ca').
1263        :param comparison: If 'less', return distances less than the cutoff value, otherwise return distances greater than or equal to the cutoff value.
1264        :param cutoff: Cutoff value for filtering distances.
1265        :return: List of filtered distance values.
1266        """
1267        disulfide_list = self.data
1268        distances = filtered_distances = []
1269
1270        match distance_type:
1271            case "sg":
1272                distances = [ds.sg_distance for ds in disulfide_list]
1273            case "ca":
1274                distances = [ds.ca_distance for ds in disulfide_list]
1275            case _:
1276                raise ValueError("Invalid distance_type. Must be 'sg' or 'ca'.")
1277
1278        if cutoff == -1.0:
1279            return distances
1280
1281        if comparison == "greater":
1282            filtered_distances = [d for d in distances if d > cutoff]
1283        else:
1284            filtered_distances = [d for d in distances if d <= cutoff]
1285
1286        return filtered_distances
1287
1288    @staticmethod
1289    def plot_distances(
1290        distances,
1291        distance_type="sg",
1292        cutoff=-1,
1293        comparison="less",
1294        theme="auto",
1295        log=True,
1296    ):
1297        """
1298        Plot the distance values as a histogram using plotly express.
1299
1300        :param distances: List of distance values.
1301        :param distance_type: Type of distance to plot ('sg' or 'ca').
1302        :param cutoff: Cutoff value for the x-axis title.
1303        :param flip: Whether to flip the comparison in the x-axis title.
1304        :param theme: The plotly theme to use. Default is 'auto', which will use the current system theme.
1305        :param log: Whether to use a logarithmic scale for the y-axis. Default is True.
1306        """
1307
1308        set_plotly_theme(theme)
1309
1310        yaxis_type = "log" if log else "linear"
1311        flip = False if comparison == "less" else True
1312
1313        match distance_type:
1314            case "sg":
1315                column_name = "SG Distance"
1316                title = "Sγ Distance Distribution"
1317                if cutoff == -1.0:
1318                    xtitle = "Sγ-Sγ Distances, (no cutoff)"
1319                else:
1320                    xtitle = (
1321                        f"Sγ Distance < {cutoff} Å"
1322                        if not flip
1323                        else f"Sγ-Sγ Distance >= {cutoff} Å"
1324                    )
1325            case "ca":
1326                column_name = "Ca Distance"
1327                title = "Cα Distance Distribution"
1328                if cutoff == -1.0:
1329                    xtitle = "Cα-Cα Distances, (no cutoff)"
1330                else:
1331                    xtitle = (
1332                        f"Cα Distance < {cutoff} Å"
1333                        if not flip
1334                        else f"Cα-Cα Distance >= {cutoff} Å"
1335                    )
1336            case _:
1337                raise ValueError("Invalid distance_type. Must be 'sg' or 'ca'.")
1338
1339        # Convert to a Pandas DataFrame with the appropriate column name
1340        df = pd.DataFrame(distances, columns=[column_name])
1341
1342        fig = px.histogram(
1343            df,
1344            x=column_name,  # Use the column name for the x-axis
1345            nbins=NBINS,
1346            title=title,
1347        )
1348        fig.update_layout(
1349            title={"text": "Distance Distribution", "x": 0.5, "xanchor": "center"},
1350            xaxis_title=xtitle,
1351            yaxis_title="Frequency",
1352            yaxis_type=yaxis_type,
1353            bargap=0.2,
1354        )
1355        fig.show()
1356
1357    def plot_deviation_scatterplots(self, verbose=False, theme="auto"):
1358        """
1359        Plot scatter plots for Bondlength_Deviation, Angle_Deviation, Ca_Distance, and Sg_Distance
1360        with the row index as the x-axis.
1361
1362        :param verbose: If True, display additional information during processing, defaults to False.
1363        :type verbose: bool
1364        :param theme: The theme to use for the plot ('auto', 'light', or 'dark'), defaults to 'auto'.
1365        :type theme: str
1366        """
1367        set_plotly_theme(theme)
1368        dotsize = 2
1369
1370        df = self.create_deviation_dataframe(verbose=verbose)
1371
1372        fig = px.scatter(
1373            df, x=df.index, y="Bondlength_Deviation", title="Bondlength Deviation"
1374        )
1375        fig.update_layout(xaxis_title="Row Index", yaxis_title="Bondlength Deviation")
1376        fig.update_traces(marker=dict(size=dotsize))  # Adjust the size as needed
1377        fig.show()
1378
1379        fig = px.scatter(df, x=df.index, y="Angle_Deviation", title="Angle Deviation")
1380        fig.update_layout(xaxis_title="Row Index", yaxis_title="Angle Deviation")
1381        fig.update_traces(marker=dict(size=dotsize))  # Adjust the size as needed
1382        fig.show()
1383
1384        fig = px.scatter(df, x=df.index, y="Ca_Distance", title="Cα Distance")
1385        fig.update_layout(xaxis_title="Row Index", yaxis_title="Cα Distance")
1386        fig.update_traces(marker=dict(size=dotsize))  # Adjust the size as needed
1387        fig.show()
1388
1389        fig = px.scatter(df, x=df.index, y="Sg_Distance", title="Sg Distance")
1390        fig.update_layout(xaxis_title="Row Index", yaxis_title="Sg Distance")
1391        fig.update_traces(marker=dict(size=dotsize))  # Adjust the size as needed
1392        fig.show()
1393
1394    def plot_deviation_histograms(self, verbose=False, theme="auto", log=True) -> None:
1395        """
1396        Plot histograms for Bondlength_Deviation, Angle_Deviation, and Ca_Distance.
1397
1398        This function creates and displays histograms for the bond length deviation,
1399        bond angle deviation from the disulfide list. The histograms
1400        are displayed on a logarithmic scale for the y-axis.
1401
1402        :param verbose: Whether to display a progress bar.
1403        :type verbose: bool
1404        :param theme: The plotly theme to use. Default is 'auto', which will use the current system theme.
1405        :param log: Whether to use a logarithmic scale for the y-axis. Default is True.
1406        """
1407
1408        set_plotly_theme(theme)
1409        if log:
1410            yaxis_type = "log"
1411        else:
1412            yaxis_type = "linear"
1413
1414        df = self.create_deviation_dataframe(verbose=verbose)
1415
1416        fig = px.histogram(
1417            df,
1418            x="Bondlength_Deviation",
1419            nbins=NBINS,
1420            title="Bond Length Deviation (Å)",
1421        )
1422
1423        fig.update_layout(
1424            title={"text": "Bond Length Deviation", "x": 0.5, "xanchor": "center"},
1425            xaxis_title="Bond Length Deviation (Å)",
1426            yaxis_title="Frequency",
1427            yaxis_type=yaxis_type,
1428        )
1429        fig.show()
1430
1431        fig2 = px.histogram(
1432            df, x="Angle_Deviation", nbins=NBINS, title="Bond Angle Deviation, (°)"
1433        )
1434        fig2.update_layout(
1435            title={"text": "Bond Angle Deviation", "x": 0.5, "xanchor": "center"},
1436            xaxis_title="Bond Angle Deviation (°)",
1437            yaxis_title="Frequency",
1438            yaxis_type=yaxis_type,
1439        )
1440
1441        fig2.show()
1442
1443        return
1444
1445    def filter_deviation_df_by_cutoffs(
1446        self,
1447        length_cutoff=10.0,
1448        angle_cutoff=100.0,
1449        ca_cutoff=1000.0,
1450        sg_cutoff=10.0,
1451        minimum_distance=0.0,
1452    ) -> pd.DataFrame:
1453        """
1454        Filter the DataFrame based on bond length, angle, Ca and Sg distance cutoffs.
1455
1456        Note: The default values are set to high values to allow all structures to pass the filter.
1457
1458        :param df: DataFrame containing the deviations.
1459        :type df: pd.DataFrame
1460        :param length_cutoff: Cutoff value for Bond Length Deviation.
1461        :type distance_cutoff: float
1462        :param angle_cutoff: Cutoff value for angle deviation.
1463        :type angle_cutoff: float
1464        :param ca_cutoff: Cutoff value for Ca distance.
1465        :type ca_cutoff: float
1466        :param sg_cutoff: Cutoff value for Sg distance.
1467        :type sg_cutoff: float
1468        :return: Filtered DataFrame.
1469        :rtype: pd.DataFrame
1470        """
1471        df = self.create_deviation_dataframe()
1472
1473        filtered_df = df[
1474            (df["Bondlength_Deviation"] <= length_cutoff)
1475            & (df["Angle_Deviation"] <= angle_cutoff)
1476            & (df["Ca_Distance"] >= minimum_distance)
1477            & (df["Ca_Distance"] <= ca_cutoff)
1478            & (df["Sg_Distance"] >= minimum_distance)
1479            & (df["Sg_Distance"] <= sg_cutoff)
1480        ]
1481        return filtered_df
1482
1483    def bad_filter_deviation_df_by_cutoffs(
1484        self,
1485        length_cutoff=0.0,
1486        angle_cutoff=0.0,
1487        ca_cutoff=0.0,
1488        sg_cutoff=0.0,
1489        minimum_distance=0.0,
1490    ) -> pd.DataFrame:
1491        """
1492        Return the DataFrame objects that are GREATER than the cutoff based on distance,
1493        angle, Ca and Sg distance cutoffs. Used to get the bad structures.
1494
1495        Note: The default values are set to low values to allow all structures to pass the filter.
1496
1497        :param df: DataFrame containing the deviations.
1498        :type df: pd.DataFrame
1499        :param length_cutoff: Cutoff value for Bond Length Deviation.
1500        :type length_cutoff: float
1501        :param angle_cutoff: Cutoff value for angle deviation.
1502        :type angle_cutoff: float
1503        :param ca_cutoff: Cutoff value for Ca distance.
1504        :type ca_cutoff: float
1505        :return: Filtered DataFrame.
1506        :rtype: pd.DataFrame
1507        """
1508        df = self.create_deviation_dataframe()
1509
1510        filtered_df = df[
1511            (df["Bondlength_Deviation"] > length_cutoff)
1512            & (df["Angle_Deviation"] > angle_cutoff)
1513            & (df["Ca_Distance"] > ca_cutoff)
1514            & (df["Ca_Distance"] < minimum_distance)
1515            & (df["Sg_Distance"] > sg_cutoff)
1516            & (df["Sg_Distance"] < minimum_distance)
1517        ]
1518        return filtered_df
1519
1520    def calculate_torsion_statistics(self) -> tuple:
1521        """
1522        Calculate and return the torsion and distance statistics for the DisulfideList.
1523
1524        This method builds a DataFrame containing torsional parameters, Cα-Cα distance,
1525        energy, and phi-psi angles for the DisulfideList. It then calculates the mean
1526        and standard deviation for the torsional and distance parameters.
1527
1528        :return: A tuple containing two DataFrames:
1529                - tor_stats: DataFrame with mean and standard deviation for torsional parameters.
1530                - dist_stats: DataFrame with mean and standard deviation for distance parameters.
1531        :rtype: tuple (pd.DataFrame, pd.DataFrame)
1532        """
1533
1534        df = self.torsion_df
1535
1536        tor_cols = ["chi1", "chi2", "chi3", "chi4", "chi5", "torsion_length"]
1537        dist_cols = ["ca_distance", "cb_distance", "sg_distance", "energy", "rho"]
1538        tor_stats = {}
1539        dist_stats = {}
1540
1541        def circular_mean(series):
1542            """
1543            Calculate the circular mean of a series of angles.
1544
1545            This function converts the input series of angles from degrees to radians,
1546            computes the mean of the sine and cosine of these angles, and then converts
1547            the result back to degrees.
1548
1549            :param series: A sequence of angles in degrees.
1550            :type series: array-like
1551            :return: The circular mean of the input angles in degrees.
1552            :rtype: float
1553            """
1554            radians = np.deg2rad(series)
1555            sin_mean = np.sin(radians).mean()
1556            cos_mean = np.cos(radians).mean()
1557            return np.rad2deg(np.arctan2(sin_mean, cos_mean))
1558
1559        for col in tor_cols[:5]:
1560            tor_stats[col] = {"mean": circular_mean(df[col]), "std": df[col].std()}
1561
1562        tor_stats["torsion_length"] = {
1563            "mean": df["torsion_length"].mean(),
1564            "std": df["torsion_length"].std(),
1565        }
1566
1567        for col in dist_cols:
1568            dist_stats[col] = {"mean": df[col].mean(), "std": df[col].std()}
1569
1570        tor_stats = pd.DataFrame(tor_stats, columns=tor_cols)
1571        dist_stats = pd.DataFrame(dist_stats, columns=dist_cols)
1572
1573        return tor_stats, dist_stats
1574
1575    # class ends

The class provides a sortable list for Disulfide objects. Indexing and slicing are supported, as well as typical list operations like .insert(), .append() and .extend(). The DisulfideList object must be initialized with an iterable (tuple, list) and a name. Sorting is keyed by torsional energy.

The class can also render Disulfides to a pyVista window using the
[display()](#DisulfideList.display) and [display_overlay()](#DisulfideList.display_overlay)methods.
See below for examples.


Examples:
>>> from proteusPy import Disulfide, DisulfideLoader, DisulfideList, Load_PDB_SS

Instantiate some variables. Note: the list is initialifzed with an iterable and a name (optional)

>>> SS = Disulfide('tmp')

The list is initialized with an iterable, a name and resolution. Name and resolution
are optional.
>>> SSlist = DisulfideList([],'ss', -1.0)

Load the database.
>>> PDB_SS = Load_PDB_SS(verbose=False, subset=True)

Get the first disulfide via indexing.
>>> SS = PDB_SS[0]

# assert str(SS) == "<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å>"

>>> SS4yys = PDB_SS['4yys']

# assert str(SS4yys) == "[<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å>, <Disulfide 4yys_56A_98A, Source: 4yys, Resolution: 1.35 Å>, <Disulfide 4yys_156A_207A, Source: 4yys, Resolution: 1.35 Å>]"

Make some empty disulfides.
>>> ss1 = Disulfide('ss1')
>>> ss2 = Disulfide('ss2')

Make a DisulfideList containing ss1, named 'tmp'
>>> sslist = DisulfideList([ss1], 'tmp')
>>> sslist.append(ss2)

Extract the first disulfide
>>> ss1 = PDB_SS[0]

# assert str(ss1.pprint_all()) == "<Disulfide 4yys_22A_65A, Source: 4yys, Resolution: 1.35 Å

Proximal Chain fullID: <('4yys', 0, 'A', (' ', 22, ' '))> Distal Chain fullID: <('4yys', 0, 'A', (' ', 65, ' '))> Proximal Coordinates: N: Cα: C: O: Cβ: Sγ: Cprev Nnext: Distal Coordinates: N: Cα: C: O: Cβ: Sγ: Cprev Nnext: Proximal Internal Coords: N: Cα: C: O: Cβ: Sγ: Cprev Nnext: Distal Internal Coords: N: Cα: C: O: Cβ: Sγ: Cprev Nnext: Χ1-Χ5: 174.63°, 82.52°, -83.32°, -62.52° -73.83°, 138.89°, 1.70 kcal/mol Cα Distance: 4.50 Å Torsion length: 231.53 deg>"

Get a list of disulfides via slicing
>>> subset = DisulfideList(PDB_SS[0:10],'subset')

Display the subset disulfides overlaid onto the same coordinate frame,
(proximal N, Ca, C').

The disulfides are colored individually to facilitate inspection.

>>> subset.display_overlay()
DisulfideList(iterable, pid: str = 'nil', res=-1.0, quiet=True, fast=False)
149    def __init__(self, iterable, pid: str = "nil", res=-1.0, quiet=True, fast=False):
150        """
151        Initialize the DisulfideList.
152
153        :param iterable: An iterable of disulfide bonds.
154        :type iterable: iterable
155        :param pid: Name for the list, default is "nil".
156        :type pid: str
157        :param res: Resolution, default is -1.0. If -1, the average resolution is used.
158        :type res: float
159        :param quiet: If True, suppress output, default is True.
160        :type quiet: bool
161        :param fast: If True, enable fast mode, default is False.
162        :type fast: bool
163
164        Example:
165        >>> from proteusPy import DisulfideList, Disulfide
166
167        Initialize some empty disulfides.
168        >>> ss1 = Disulfide('ss1')
169        >>> ss2 = Disulfide('ss2')
170        >>> ss3 = Disulfide('ss3')
171
172        Make a list containing the disulfides.
173        >>> sslist = DisulfideList([ss1, ss2], 'sslist')
174        >>> sslist
175        [<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>]
176        >>> sslist.append(ss3)
177        >>> sslist
178        [<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss3, Source: 1egs, Resolution: -1.0 Å>]
179        """
180
181        super().__init__(self.validate_ss(item) for item in iterable)
182
183        self.pdb_id = pid
184        self.quiet = quiet
185
186        if not fast:
187            if res == -1:
188                self._res = self.average_resolution
189            else:
190                self._res = res
191        else:
192            self._res = res

Initialize the DisulfideList.

Parameters
  • iterable: An iterable of disulfide bonds.
  • pid: Name for the list, default is "nil".
  • res: Resolution, default is -1.0. If -1, the average resolution is used.
  • quiet: If True, suppress output, default is True.
  • fast: If True, enable fast mode, default is False.

Example:

>>> from proteusPy import DisulfideList, Disulfide

Initialize some empty disulfides.

>>> ss1 = Disulfide('ss1')
>>> ss2 = Disulfide('ss2')
>>> ss3 = Disulfide('ss3')

Make a list containing the disulfides.

>>> sslist = DisulfideList([ss1, ss2], 'sslist')
>>> sslist
[<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>]
>>> sslist.append(ss3)
>>> sslist
[<Disulfide ss1, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss2, Source: 1egs, Resolution: -1.0 Å>, <Disulfide ss3, Source: 1egs, Resolution: -1.0 Å>]
pdb_id
quiet
average_ca_distance
262    @property
263    def average_ca_distance(self):
264        """
265        Return the Average energy (kcal/mol) for the Disulfides in the list.
266
267        :return: Average energy (kcal/mol) between all atoms in the list
268        """
269        sslist = self.data
270        tot = len(sslist)
271        if tot == 0:
272            return 0.0
273
274        total_dist = sum(ss.ca_distance for ss in sslist)
275        return total_dist / tot

Return the Average energy (kcal/mol) for the Disulfides in the list.

Returns

Average energy (kcal/mol) between all atoms in the list

average_distance
277    @property
278    def average_distance(self):
279        """
280        Return the Average distance (Å) between the atoms in the list.
281
282        :return: Average distance (Å) between all atoms in the list
283
284        """
285        sslist = self.data
286        cnt = 1
287
288        total = 0.0
289        for ss1 in sslist:
290            for ss2 in sslist:
291                if ss2 == ss1:
292                    continue
293                total += ss1.Distance_RMS(ss2)
294                cnt += 1
295
296        return total / cnt

Return the Average distance (Å) between the atoms in the list.

Returns

Average distance (Å) between all atoms in the list

average_energy
298    @property
299    def average_energy(self):
300        """
301        Return the Average energy (kcal/mol) for the Disulfides in the list.
302
303        :return: Average energy (kcal/mol) between all atoms in the list
304        """
305        sslist = self.data
306        tot = len(sslist)
307        if tot == 0:
308            return 0.0
309
310        total_energy = sum(ss.energy for ss in sslist)
311        return total_energy / tot

Return the Average energy (kcal/mol) for the Disulfides in the list.

Returns

Average energy (kcal/mol) between all atoms in the list

average_conformation
313    @property
314    def average_conformation(self):
315        """
316        Return the average conformation for the disulfides in the list.
317
318        :return: Average conformation: [x1, x2, x3, x4, x5]
319        """
320        sslist = self.data
321        res = np.mean([ss.torsion_array for ss in sslist], axis=0)
322        return res

Return the average conformation for the disulfides in the list.

Returns

Average conformation: [x1, x2, x3, x4, x5]

def append(self, item):
324    def append(self, item):
325        """
326        Append the list with item
327
328        :param item: Disulfide to add
329        :type item: Disulfide
330        """
331        self.data.append(self.validate_ss(item))

Append the list with item

Parameters
  • item: Disulfide to add
average_resolution: float
333    @property
334    def average_resolution(self) -> float:
335        """
336        Compute and return the average structure resolution for the given list.
337
338        :return: Average resolution (A)
339        """
340        resolutions = [ss.resolution for ss in self.data if ss.resolution != -1.0]
341        return sum(resolutions) / len(resolutions) if resolutions else -1.0

Compute and return the average structure resolution for the given list.

Returns

Average resolution (A)

resolution: float
343    @property
344    def resolution(self) -> float:
345        """
346        Compute and return the average structure resolution for the given list.
347
348        :return: Average resolution (A)
349        """
350        return self._res

Compute and return the average structure resolution for the given list.

Returns

Average resolution (A)

average_torsion_distance
364    @property
365    def average_torsion_distance(self):
366        """
367        Return the average distance in torsion space (degrees), between all pairs in the
368        DisulfideList
369
370        :return: Torsion Distance (degrees)
371        """
372        sslist = self.data
373        total = 0
374        cnt = 0
375
376        for ss1, ss2 in combinations(sslist, 2):
377            total += ss1.torsion_distance(ss2)
378            cnt += 1
379
380        return float(total / cnt) if cnt > 0 else 0

Return the average distance in torsion space (degrees), between all pairs in the DisulfideList

Returns

Torsion Distance (degrees)

def build_distance_df(self) -> pandas.core.frame.DataFrame:
382    def build_distance_df(self) -> pd.DataFrame:
383        """
384        Create a dataframe containing the input DisulfideList Cα-Cα and Sg-Sg distances, energy.
385        This can take several minutes for the entire database.
386
387        :return: DataFrame containing Ca distances
388        :rtype: pd.DataFrame
389        """
390        # create a list to collect rows as dictionaries
391        rows = []
392        i = 0
393        sslist = self.data
394        total_length = len(sslist)
395        update_interval = max(1, total_length // 20)  # 5% of the list length
396
397        if self.quiet:
398            pbar = sslist
399        else:
400            pbar = tqdm(sslist, ncols=PBAR_COLS, leave=False)
401
402        for ss in pbar:
403            new_row = {
404                "source": ss.pdb_id,
405                "ss_id": ss.name,
406                "proximal": ss.proximal,
407                "distal": ss.distal,
408                "energy": ss.energy,
409                "ca_distance": ss.ca_distance,
410                "cb_distance": ss.cb_distance,
411                "sg_distance": ss.sg_distance,
412            }
413            rows.append(new_row)
414            i += 1
415
416            if not self.quiet:
417                if i % update_interval == 0 or i == total_length - 1:
418                    pbar.update(update_interval)
419
420        # create the dataframe from the list of dictionaries
421        SS_df = pd.DataFrame(rows, columns=Distance_DF_Cols)
422
423        return SS_df

Create a dataframe containing the input DisulfideList Cα-Cα and Sg-Sg distances, energy. This can take several minutes for the entire database.

Returns

DataFrame containing Ca distances

def build_torsion_df(self) -> pandas.core.frame.DataFrame:
425    def build_torsion_df(self) -> pd.DataFrame:
426        """
427        Create a dataframe containing the input DisulfideList torsional parameters,
428        Cα-Cα and Sg-Sg distances, energy, and phi-psi angles. This can take several minutes for the
429        entire database.
430
431        :return: pd.DataFrame containing the torsions
432        """
433        # create a list to collect rows as dictionaries
434        rows = []
435        i = 0
436        total_length = len(self.data)
437        update_interval = max(1, total_length // 20)  # 5% of the list length
438
439        sslist = self.data
440        if self.quiet:
441            pbar = sslist
442        else:
443            pbar = tqdm(sslist, ncols=PBAR_COLS, leave=False)
444
445        for ss in pbar:
446            new_row = {
447                "source": ss.pdb_id,
448                "ss_id": ss.name,
449                "proximal": ss.proximal,
450                "distal": ss.distal,
451                "chi1": ss.chi1,
452                "chi2": ss.chi2,
453                "chi3": ss.chi3,
454                "chi4": ss.chi4,
455                "chi5": ss.chi5,
456                "energy": ss.energy,
457                "ca_distance": ss.ca_distance,
458                "cb_distance": ss.cb_distance,
459                "sg_distance": ss.sg_distance,
460                "psi_prox": ss.psiprox,
461                "phi_prox": ss.phiprox,
462                "phi_dist": ss.phidist,
463                "psi_dist": ss.psidist,
464                "torsion_length": ss.torsion_length,
465                "rho": ss.rho,
466                "binary_class_string": ss.binary_class_string,
467                "octant_class_string": ss.octant_class_string,
468            }
469            rows.append(new_row)
470            i += 1
471
472            if not self.quiet:
473                if i % update_interval == 0 or i == total_length - 1:
474                    pbar.update(update_interval)
475
476        if not self.quiet:
477            pbar.close()
478
479        # create the dataframe from the list of dictionaries
480        SS_df = pd.DataFrame(rows, columns=Torsion_DF_Cols)
481
482        return SS_df

Create a dataframe containing the input DisulfideList torsional parameters, Cα-Cα and Sg-Sg distances, energy, and phi-psi angles. This can take several minutes for the entire database.

Returns

pd.DataFrame containing the torsions

def by_chain(self, chain: str):
484    def by_chain(self, chain: str):
485        """
486        Return a DisulfideList from the input chain identifier.
487
488        :param chain: chain identifier, 'A', 'B, etc
489        :return: DisulfideList containing disulfides within that chain.
490        """
491
492        reslist = DisulfideList([], chain)
493        sslist = self.data
494
495        for ss in sslist:
496            pchain = ss.proximal_chain
497            dchain = ss.distal_chain
498            if pchain == dchain:
499                if pchain == chain:
500                    reslist.append(ss)
501            else:
502                print(f"Cross chain SS: {ss.repr_compact}:")
503        return reslist

Return a DisulfideList from the input chain identifier.

Parameters
  • chain: chain identifier, 'A', 'B, etc
Returns

DisulfideList containing disulfides within that chain.

center_of_mass
505    @property
506    def center_of_mass(self):
507        """
508        Calculate the center of mass for the Disulfide list
509        """
510        sslist = self.data
511        tot = len(sslist)
512        if tot == 0:
513            return 0.0
514
515        total_cofmass = sum(ss.cofmass for ss in sslist)
516        return total_cofmass / tot

Calculate the center of mass for the Disulfide list

def describe(self):
518    def describe(self):
519        """
520        Prints out relevant attributes of the given disulfideList.
521
522        :param disulfideList: A list of disulfide objects.
523        :param list_name: The name of the list.
524        """
525        name = self.pdb_id
526        avg_distance = self.average_ca_distance
527        avg_energy = self.average_energy
528        avg_resolution = self.average_resolution
529        list_length = len(self.data)
530
531        if list_length == 0:
532            avg_bondangle = 0
533            avg_bondlength = 0
534        else:
535            total_bondangle = 0
536            total_bondlength = 0
537
538            for ss in self.data:
539                total_bondangle += ss.bond_angle_ideality
540                total_bondlength += ss.bond_length_ideality
541
542            avg_bondangle = total_bondangle / list_length
543            avg_bondlength = total_bondlength / list_length
544
545        print(f"DisulfideList: {name}")
546        print(f"Length: {list_length}")
547        print(f"Average energy: {avg_energy:.2f} kcal/mol")
548        print(f"Average CA distance: {avg_distance:.2f} Å")
549        print(f"Average Resolution: {avg_resolution:.2f} Å")
550        print(f"Bond angle deviation: {avg_bondangle:.2f}°")
551        print(f"Bond length deviation: {avg_bondlength:.2f} Å")

Prints out relevant attributes of the given disulfideList.

Parameters
  • disulfideList: A list of disulfide objects.
  • list_name: The name of the list.
def display(self, style='sb', light='auto', panelsize=512):
553    def display(self, style="sb", light="auto", panelsize=512):
554        """
555        Display the Disulfide list in the specific rendering style.
556
557        :param single: Display the bond in a single panel in the specific style.
558        :param style:  Rendering style: One of:\n
559            - 'sb' - split bonds
560            - 'bs' - ball and stick
561            - 'cpk' - CPK style
562            - 'pd' - Proximal/Distal style - Red=proximal, Green=Distal
563            - 'plain' - boring single color
564        :light: If True, light background, if False, dark
565        """
566        # from proteusPy.utility import get_theme
567
568        ssbonds = self.data
569        tot_ss = len(ssbonds)  # number off ssbonds
570        rows, cols = grid_dimensions(tot_ss)
571        winsize = (panelsize * cols, panelsize * rows)
572
573        set_pyvista_theme(light)
574
575        # title = f"<{pid}> {resolution:.2f} Å: ({tot_ss} SS), Avg E: {avg_enrg:.2f} kcal/mol, Avg Dist: {avg_dist:.2f} Å"
576
577        pl = pv.Plotter(window_size=winsize, shape=(rows, cols))
578        pl = self._render(pl, style, panelsize=panelsize)
579        pl.enable_anti_aliasing("msaa")
580
581        # the subwindows already show a title
582        # pl.add_title(title=title, font_size=fontsize)
583
584        pl.link_views()
585        pl.reset_camera()
586        pl.show()

Display the Disulfide list in the specific rendering style.

Parameters
  • single: Display the bond in a single panel in the specific style.
  • style: Rendering style: One of:

    • 'sb' - split bonds
    • 'bs' - ball and stick
    • 'cpk' - CPK style
    • 'pd' - Proximal/Distal style - Red=proximal, Green=Distal
    • 'plain' - boring single color :light: If True, light background, if False, dark
def display_torsion_statistics( self, display=True, save=False, fname='ss_torsions.png', theme='auto'):
588    def display_torsion_statistics(
589        self,
590        display=True,
591        save=False,
592        fname="ss_torsions.png",
593        theme="auto",
594    ):
595        """
596        Display torsion and distance statistics for a given Disulfide list.
597
598        :param display: Whether to display the plot in the notebook. Default is True.
599        :type display: bool
600        :param save: Whether to save the plot as an image file. Default is False.
601        :type save: bool
602        :param fname: The name of the image file to save. Default is 'ss_torsions.png'.
603        :type fname: str
604        :param theme: The theme to use for the plot. Default is 'Auto'. Options are 'Auto', 'light', and 'dark'.
605        :type theme: str
606        :return: none
607        """
608
609        if self.length == 0:
610            _logger.warning("Empty DisulfideList. Nothing to display.")
611            return
612
613        set_plotly_theme(theme)
614        title = f"{self.id}: {self.length} members"
615
616        tor_vals, dist_vals = self.calculate_torsion_statistics()
617
618        tor_mean_vals = tor_vals.loc["mean"]
619        tor_std_vals = tor_vals.loc["std"]
620
621        dist_mean_vals = dist_vals.loc["mean"]
622        dist_std_vals = dist_vals.loc["std"]
623
624        fig = make_subplots(
625            rows=2, cols=2, vertical_spacing=0.125, column_widths=[1, 1]
626        )
627
628        fig.update_layout(
629            title={
630                "text": title,
631                "xanchor": "center",
632                # 'y':.9,
633                "x": 0.5,
634                "yanchor": "top",
635            },
636            width=1024,
637            height=1024,
638        )
639
640        fig.add_trace(
641            go.Bar(
642                x=["X1", "X2", "X3", "X4", "X5"],
643                y=tor_mean_vals[:5],
644                name="Torsion Angle (°) ",
645                error_y=dict(type="data", array=tor_std_vals, visible=True),
646            ),
647            row=1,
648            col=1,
649        )
650
651        fig.add_trace(
652            go.Bar(
653                x=["rho"],
654                y=[dist_mean_vals[4]],
655                name="ρ (°)",
656                error_y=dict(type="data", array=[dist_std_vals[4]], visible=True),
657            ),
658            row=1,
659            col=1,
660        )
661
662        # Update the layout of the subplot
663        # Cα N, Cα, Cβ, C', Sγ Å °
664
665        fig.update_yaxes(
666            title_text="Dihedral Angle (°)", range=[-200, 200], row=1, col=1
667        )
668        fig.update_yaxes(range=[0, 320], row=2, col=2)
669
670        # Add another subplot for the mean values of energy
671        fig.add_trace(
672            go.Bar(
673                x=["Strain Energy (kcal/mol)"],
674                y=[dist_mean_vals[3]],
675                name="Energy (kcal/mol)",
676                error_y=dict(
677                    type="data",
678                    array=[dist_std_vals[3].tolist()],
679                    width=0.25,
680                    visible=True,
681                ),
682            ),
683            row=1,
684            col=2,
685        )
686        fig.update_traces(width=0.25, row=1, col=2)
687
688        # Update the layout of the subplot
689        # fig.update_xaxes(title_text="Energy", row=1, col=2)
690        fig.update_yaxes(
691            title_text="kcal/mol", range=[0, 8], row=1, col=2
692        )  # max possible DSE
693
694        # Add another subplot for the mean values of ca_distance
695        fig.add_trace(
696            go.Bar(
697                x=["Cα Distance (Å)", "Cβ Distance (Å)", "Sγ Distance (Å)"],
698                y=[dist_mean_vals[0], dist_mean_vals[1], dist_mean_vals[2]],
699                name="Distances (Å)",
700                error_y=dict(
701                    type="data",
702                    array=[
703                        dist_std_vals[0].tolist(),
704                        dist_std_vals[1].tolist(),
705                        dist_std_vals[2].tolist(),
706                    ],
707                    width=0.25,
708                    visible=True,
709                ),
710            ),
711            row=2,
712            col=1,
713        )
714        # Update the layout of the subplot
715        fig.update_yaxes(title_text="Distance (A)", range=[0, 8], row=2, col=1)  #
716        fig.update_traces(width=0.25, row=2, col=1)
717
718        # Add a scatter subplot for torsion length column
719        fig.add_trace(
720            go.Bar(
721                x=["Torsion Length (Å)"],
722                y=[tor_mean_vals[5]],
723                name="Torsion Length (Å)",
724                error_y=dict(
725                    type="data", array=[tor_std_vals[5]], width=0.25, visible=True
726                ),
727            ),
728            row=2,
729            col=2,
730        )
731        # Update the layout of the subplot
732        fig.update_yaxes(title_text="Torsion Length", range=[0, 350], row=2, col=2)
733        fig.update_traces(width=0.25, row=2, col=2)
734
735        # Update the error bars
736        fig.update_traces(
737            error_y_thickness=2,
738            error_y_color="gray",
739            texttemplate="%{y:.2f} ± %{error_y.array:.2f}",
740            textposition="outside",
741        )  # , row=1, col=1)
742
743        if display:
744            fig.show()
745
746        if save:
747            fig.write_image(Path(fname))
748
749        return

Display torsion and distance statistics for a given Disulfide list.

Parameters
  • display: Whether to display the plot in the notebook. Default is True.
  • save: Whether to save the plot as an image file. Default is False.
  • fname: The name of the image file to save. Default is 'ss_torsions.png'.
  • theme: The theme to use for the plot. Default is 'Auto'. Options are 'Auto', 'light', and 'dark'.
Returns

none

distance_df: pandas.core.frame.DataFrame
751    @property
752    def distance_df(self) -> pd.DataFrame:
753        """
754        Build and return the distance dataframe for the input list.
755        This can take considerable time for the entire list.
756
757        :return: Dataframe containing the Cα-Cα distances for the given list.
758
759        Example:
760        >>> from proteusPy import Disulfide, Load_PDB_SS, DisulfideList
761        >>> PDB_SS = Load_PDB_SS()
762
763        """
764        return self.build_distance_df()

Build and return the distance dataframe for the input list. This can take considerable time for the entire list.

Returns

Dataframe containing the Cα-Cα distances for the given list.

Example:

>>> from proteusPy import Disulfide, Load_PDB_SS, DisulfideList
>>> PDB_SS = Load_PDB_SS()
def display_overlay( self, screenshot=False, movie=False, verbose=False, fname='ss_overlay.png', light='auto', winsize=(1024, 1024)):
766    def display_overlay(
767        self,
768        screenshot=False,
769        movie=False,
770        verbose=False,
771        fname="ss_overlay.png",
772        light="auto",
773        winsize=WINSIZE,
774    ):
775        """
776        Display all disulfides in the list overlaid in stick mode against
777        a common coordinate frames. This allows us to see all of the disulfides
778        at one time in a single view. Colors vary smoothy between bonds.
779
780        :param screenshot: Save a screenshot, defaults to False
781        :param movie: Save a movie, defaults to False
782        :param verbose: Verbosity, defaults to True
783        :param fname: Filename to save for the movie or screenshot, defaults to 'ss_overlay.png'
784        :param light: Background color, defaults to True for White. False for Dark.
785        """
786
787        # from proteusPy.utility import get_theme
788
789        pid = self.pdb_id
790
791        ssbonds = self.data
792        tot_ss = len(ssbonds)  # number off ssbonds
793        avg_enrg = self.average_energy
794        avg_dist = self.average_distance
795        resolution = self.average_resolution
796
797        res = 64
798
799        if tot_ss > 30:
800            res = 48
801        if tot_ss > 60:
802            res = 16
803        if tot_ss > 90:
804            res = 8
805
806        title = f"<{pid}> {resolution:.2f} Å: ({tot_ss} SS), E: {avg_enrg:.2f} kcal/mol, Dist: {avg_dist:.2f} Å"
807        fontsize = calculate_fontsize(title, winsize[0])
808
809        set_pyvista_theme(light)
810
811        if movie:
812            pl = pv.Plotter(window_size=winsize, off_screen=True)
813        else:
814            pl = pv.Plotter(window_size=winsize, off_screen=False)
815
816        pl.add_title(title=title, font_size=fontsize)
817        pl.enable_anti_aliasing("msaa")
818        pl.add_axes()
819
820        mycol = np.zeros(shape=(tot_ss, 3))
821        mycol = get_jet_colormap(tot_ss)
822
823        # scale the overlay bond radii down so that we can see the individual elements better
824        # maximum 90% reduction
825
826        brad = BOND_RADIUS if tot_ss < 10 else BOND_RADIUS * 0.75
827        brad = brad if tot_ss < 25 else brad * 0.8
828        brad = brad if tot_ss < 50 else brad * 0.8
829        brad = brad if tot_ss < 100 else brad * 0.6
830
831        # print(f'Brad: {brad}')
832        if verbose:
833            pbar = tqdm(range(tot_ss), ncols=PBAR_COLS)
834        else:
835            pbar = range(tot_ss)
836
837        for i, ss in zip(pbar, ssbonds):
838            color = [int(mycol[i][0]), int(mycol[i][1]), int(mycol[i][2])]
839            ss._render(
840                pl,
841                style="plain",
842                bondcolor=color,
843                translate=False,
844                bond_radius=brad,
845                res=res,
846            )
847
848        pl.reset_camera()
849
850        if screenshot:
851            pl.show(auto_close=False)  # allows for manipulation
852            # Take the screenshot after ensuring the plotter is still active
853            try:
854                pl.screenshot(fname)
855                if verbose:
856                    print(f" -> display_overlay(): Saved image to: {fname}")
857            except RuntimeError as e:
858                _logger.error("Error saving screenshot: %s", e)
859
860        elif movie:
861            if verbose:
862                print(f" -> display_overlay(): Saving mp4 animation to: {fname}")
863
864            pl.open_movie(fname)
865            path = pl.generate_orbital_path(n_points=360)
866            pl.orbit_on_path(path, write_frames=True)
867            pl.close()
868
869            if verbose:
870                print(f" -> display_overlay(): Saved mp4 animation to: {fname}")
871        else:
872            pl.show()
873
874        return

Display all disulfides in the list overlaid in stick mode against a common coordinate frames. This allows us to see all of the disulfides at one time in a single view. Colors vary smoothy between bonds.

Parameters
  • screenshot: Save a screenshot, defaults to False
  • movie: Save a movie, defaults to False
  • verbose: Verbosity, defaults to True
  • fname: Filename to save for the movie or screenshot, defaults to 'ss_overlay.png'
  • light: Background color, defaults to True for White. False for Dark.
def extend(self, other):
876    def extend(self, other):
877        """
878        Extend the Disulfide list with other.
879
880        :param other: extension
881        :type item: DisulfideList
882        """
883
884        if isinstance(other, type(self)):
885            self.data.extend(other)
886        else:
887            self.data.extend(self.validate_ss(item) for item in other)

Extend the Disulfide list with other.

Parameters
  • other: extension
def filter_by_distance(self, distance: float = -1.0, minimum: float = 2.0):
889    def filter_by_distance(self, distance: float = -1.0, minimum: float = 2.0):
890        """
891        Return a DisulfideList filtered by to between the maxium Ca distance and
892        the minimum, which defaults to 2.0A.
893
894        :param distance: Distance in Å
895        :param minimum: Distance in Å
896        :return: DisulfideList containing disulfides with the given distance.
897        """
898
899        reslist = []
900        sslist = self.data
901
902        # if distance is -1.0, return the entire list
903        if distance == -1.0:
904            return sslist.copy()
905
906        reslist = [
907            ss
908            for ss in sslist
909            if ss.ca_distance < distance and ss.ca_distance > minimum
910        ]
911
912        return DisulfideList(reslist, f"filtered by distance < {distance:.2f}")

Return a DisulfideList filtered by to between the maxium Ca distance and the minimum, which defaults to 2.0A.

Parameters
  • distance: Distance in Å
  • minimum: Distance in Å
Returns

DisulfideList containing disulfides with the given distance.

def filter_by_sg_distance(self, distance: float = -1.0, minimum: float = 1.0):
914    def filter_by_sg_distance(self, distance: float = -1.0, minimum: float = 1.0):
915        """
916        Return a DisulfideList filtered by to between the maxium Sg distance and
917        the minimum, which defaults to 1.0A.
918
919        :param distance: Distance in Å
920        :param minimum: Distance in Å
921        :return: DisulfideList containing disulfides with the given distance.
922        """
923
924        reslist = []
925        sslist = self.data
926
927        # if distance is -1.0, return the entire list
928        if distance == -1.0:
929            return sslist.copy()
930
931        reslist = [
932            ss
933            for ss in sslist
934            if ss.sg_distance < distance and ss.sg_distance > minimum
935        ]
936
937        return DisulfideList(reslist, f"filtered by Sγ distance < {distance:.2f}")

Return a DisulfideList filtered by to between the maxium Sg distance and the minimum, which defaults to 1.0A.

Parameters
  • distance: Distance in Å
  • minimum: Distance in Å
Returns

DisulfideList containing disulfides with the given distance.

def filter_by_bond_ideality(self, angle: float = -1.0):
939    def filter_by_bond_ideality(self, angle: float = -1.0):
940        """
941        Return a DisulfideList filtered by bond angle ideality between the maxium angle
942        and the minimum, which defaults to 0.0°.
943
944        :param angle: Angle in degrees
945        :param minimum: Angle in degrees
946        :return: DisulfideList containing disulfides with the given angle.
947        """
948
949        reslist = []
950        sslist = self.data
951
952        # if angle is -1.0, return the entire list
953        if angle == -1.0:
954            return sslist.copy()
955
956        reslist = [ss for ss in sslist if ss.bond_angle_ideality < angle]
957
958        return DisulfideList(reslist, f"filtered by bond angle < {angle:.2f}")

Return a DisulfideList filtered by bond angle ideality between the maxium angle and the minimum, which defaults to 0.0°.

Parameters
  • angle: Angle in degrees
  • minimum: Angle in degrees
Returns

DisulfideList containing disulfides with the given angle.

def get_by_name(self, name):
960    def get_by_name(self, name):
961        """
962        Returns the Disulfide with the given name from the list.
963        """
964        for ss in self.data:
965            if ss.name == name:
966                return ss.copy()  # or ss.copy() !!!
967        return None

Returns the Disulfide with the given name from the list.

def get_chains(self):
969    def get_chains(self):
970        """
971        Return the chain IDs for chains within the given Disulfide.
972        :return: Chain IDs for given Disulfide
973        """
974
975        res_dict = {"xxx"}
976        sslist = self.data
977
978        for ss in sslist:
979            pchain = ss.proximal_chain
980            dchain = ss.distal_chain
981            res_dict.update(pchain)
982            res_dict.update(dchain)
983
984        res_dict.remove("xxx")
985
986        return res_dict

Return the chain IDs for chains within the given Disulfide.

Returns

Chain IDs for given Disulfide

def get_torsion_array(self):
988    def get_torsion_array(self):
989        """
990        Return a 2D NumPy array representing the dihedral angles in the given disulfide list.
991
992        :return: A 2D NumPy array of shape (n, 5), where n is the number of disulfide bonds in the list. Each row
993                of the array represents the dihedral angles of a disulfide bond, in the following order:
994                [X1, X, X3, X4, X5], where i is the index of the disulfide bond in the list.
995        """
996        return np.array([ss.torsion_array for ss in self.data])

Return a 2D NumPy array representing the dihedral angles in the given disulfide list.

Returns

A 2D NumPy array of shape (n, 5), where n is the number of disulfide bonds in the list. Each row of the array represents the dihedral angles of a disulfide bond, in the following order: [X1, X, X3, X4, X5], where i is the index of the disulfide bond in the list.

def has_chain(self, chain) -> bool:
 998    def has_chain(self, chain) -> bool:
 999        """
1000        Returns True if given chain contained in Disulfide, False otherwise.
1001        :return: Returns True if given chain contained in Disulfide, False otherwise.
1002        """
1003
1004        chns = {"xxx"}
1005        chns = self.get_chains()
1006        if chain in chns:
1007            return True
1008        else:
1009            return False

Returns True if given chain contained in Disulfide, False otherwise.

Returns

Returns True if given chain contained in Disulfide, False otherwise.

id
1011    @property
1012    def id(self):
1013        """
1014        PDB ID of the list
1015        """
1016        return self.pdb_id

PDB ID of the list

def TorsionGraph( self, display=True, save=False, fname='ss_torsions.png', theme='Auto'):
1030    def TorsionGraph(
1031        self, display=True, save=False, fname="ss_torsions.png", theme="Auto"
1032    ):
1033        """
1034        Generate and optionally display or save a torsion graph.
1035
1036        This method generates a torsion graph based on the torsion statistics
1037        of disulfide bonds. It can display the graph, save it to a file, or both.
1038
1039        :param display: If True, the torsion graph will be displayed. Default is True.
1040        :type display: bool
1041        :param save: If True, the torsion graph will be saved to a file. Default is False.
1042        :type save: bool
1043        :param fname: The filename to save the torsion graph. Default is "ss_torsions.png".
1044        :type fname: str
1045        :param theme: One of 'auto', 'light', or 'dark'. Default is 'auto'.
1046        :type theme: str
1047
1048        :return: None
1049        """
1050        # tor_stats, dist_stats = self.calculate_torsion_statistics()
1051        self.display_torsion_statistics(
1052            display=display, save=save, fname=fname, theme=theme
1053        )

Generate and optionally display or save a torsion graph.

This method generates a torsion graph based on the torsion statistics of disulfide bonds. It can display the graph, save it to a file, or both.

Parameters
  • display: If True, the torsion graph will be displayed. Default is True.
  • save: If True, the torsion graph will be saved to a file. Default is False.
  • fname: The filename to save the torsion graph. Default is "ss_torsions.png".
  • theme: One of 'auto', 'light', or 'dark'. Default is 'auto'.
Returns

None

def translate(self, translation_vector) -> None:
1055    def translate(self, translation_vector) -> None:
1056        """
1057        Translate the DisulfideList by the given translation vector.
1058        Note: The translation is a vector SUBTRACTION, not addition.
1059        This is used primarily to move a list to its geometric center of mass
1060        and is a destructive operation, in the sense that it updates the list in place.
1061
1062        :param translation_vector: The translation vector to apply.
1063        :type translation_vector: Vector3D
1064        """
1065        for ss in self.data:
1066            ss.translate(translation_vector)

Translate the DisulfideList by the given translation vector. Note: The translation is a vector SUBTRACTION, not addition. This is used primarily to move a list to its geometric center of mass and is a destructive operation, in the sense that it updates the list in place.

Parameters
  • translation_vector: The translation vector to apply.
def insert(self, index, item):
1068    def insert(self, index, item):
1069        """
1070        Insert a Disulfide into the list at the specified index
1071
1072        :param index: insertion point
1073        :type index: int
1074        :param item: Disulfide to insert
1075        :type item: Disulfide
1076        """
1077        self.data.insert(index, self.validate_ss(item))

Insert a Disulfide into the list at the specified index

Parameters
  • index: insertion point
  • item: Disulfide to insert
length
1079    @property
1080    def length(self):
1081        """Return the length of the list"""
1082        return len(self.data)

Return the length of the list

min: <module 'proteusPy.Disulfide' from '/Users/egs/repos/proteusPy/proteusPy/Disulfide.py'>
1084    @property
1085    def min(self) -> Disulfide:
1086        """
1087        Return Disulfide from the list with the minimum energy
1088
1089        :return: Disulfide with the minimum energy.
1090        """
1091        sslist = sorted(self.data)
1092        return sslist[0]

Return Disulfide from the list with the minimum energy

Returns

Disulfide with the minimum energy.

max: <module 'proteusPy.Disulfide' from '/Users/egs/repos/proteusPy/proteusPy/Disulfide.py'>
1094    @property
1095    def max(self) -> Disulfide:
1096        """
1097        Return Disulfide from the list with the maximum energy
1098
1099        :return: Disulfide with the maximum energy. This assumes that
1100        the comparison is based on the energy attribute.
1101        """
1102        sslist = sorted(self.data)
1103        return sslist[-1]

Return Disulfide from the list with the maximum energy

Returns

Disulfide with the maximum energy. This assumes that the comparison is based on the energy attribute.

def minmax_distance(self):
1105    def minmax_distance(self):
1106        """
1107        Return the Disulfides with the minimum and
1108        maximum Cα distances in the list.
1109
1110        :return: SSmin, SSmax
1111        """
1112        sslist = self.data
1113
1114        if not sslist:
1115            return None, None
1116
1117        ssmin = min(sslist, key=lambda ss: ss.ca_distance)
1118        ssmax = max(sslist, key=lambda ss: ss.ca_distance)
1119
1120        return ssmin, ssmax

Return the Disulfides with the minimum and maximum Cα distances in the list.

Returns

SSmin, SSmax

minmax_energy
1122    @property
1123    def minmax_energy(self):
1124        """
1125        Return the Disulfides with the minimum and maximum energies
1126        from the DisulfideList.
1127
1128        :return: Disulfides with minimum and maximum energies
1129        """
1130        sslist = self.data
1131
1132        if not sslist:
1133            return None, None
1134
1135        sslist = sorted(sslist, key=lambda ss: ss.energy)
1136        return sslist[0], sslist[-1]

Return the Disulfides with the minimum and maximum energies from the DisulfideList.

Returns

Disulfides with minimum and maximum energies

def nearest_neighbors(self, cutoff: float, *args):
1138    def nearest_neighbors(self, cutoff: float, *args):
1139        """
1140        Return all Disulfides within the given angle cutoff of the input Disulfide.
1141
1142        :param cutoff: Distance cutoff, degrees
1143        :param args: Either 5 individual angles (chi1, chi2, chi3, chi4, chi5) or a list of 5 angles
1144        :return: DisulfideList of neighbors within the cutoff
1145        """
1146        if len(args) == 1 and isinstance(args[0], list) and len(args[0]) == 5:
1147            chi1, chi2, chi3, chi4, chi5 = args[0]
1148        elif len(args) == 5:
1149            chi1, chi2, chi3, chi4, chi5 = args
1150        else:
1151            raise ValueError(
1152                "You must provide either 5 individual angles or a list of 5 angles."
1153            )
1154
1155        sslist = self.data
1156        modelss = proteusPy.Disulfide("model", torsions=[chi1, chi2, chi3, chi4, chi5])
1157        res = modelss.torsion_neighbors(sslist, cutoff)
1158
1159        resname = f"Neighbors within {cutoff:.2f}° of [{', '.join(f'{angle:.2f}' for angle in modelss.dihedrals)}]"
1160        res.pdb_id = resname
1161
1162        return res

Return all Disulfides within the given angle cutoff of the input Disulfide.

Parameters
  • cutoff: Distance cutoff, degrees
  • args: Either 5 individual angles (chi1, chi2, chi3, chi4, chi5) or a list of 5 angles
Returns

DisulfideList of neighbors within the cutoff

def nearest_neighbors_ss(self, ss, cutoff: float):
1164    def nearest_neighbors_ss(self, ss, cutoff: float):
1165        """
1166        Return the list of Disulfides within the torsional cutoff
1167        of the input Disulfide.
1168
1169        :param ss: Disulfide to compare to
1170        :param cutoff: Distance cutoff, degrees
1171        :return: DisulfideList of neighbors
1172        """
1173
1174        sslist = self.data
1175        res = ss.torsion_neighbors(sslist, cutoff)
1176
1177        resname = f"{ss.name} neighbors within {cutoff}°"
1178        res.pdb_id = resname
1179
1180        return res

Return the list of Disulfides within the torsional cutoff of the input Disulfide.

Parameters
  • ss: Disulfide to compare to
  • cutoff: Distance cutoff, degrees
Returns

DisulfideList of neighbors

def pprint(self):
1182    def pprint(self):
1183        """
1184        Pretty print self.
1185        """
1186        sslist = self.data
1187        for ss in sslist:
1188            ss.pprint()

Pretty print self.

def pprint_all(self):
1190    def pprint_all(self):
1191        """
1192        Pretty print full disulfide descriptions in self.
1193        """
1194        sslist = self.data
1195        for ss in sslist:
1196            ss.pprint_all()

Pretty print full disulfide descriptions in self.

torsion_df
1198    @property
1199    def torsion_df(self):
1200        """Return the Torsion DataFrame for the DisulfideList"""
1201        return self.build_torsion_df()

Return the Torsion DataFrame for the DisulfideList

torsion_array
1203    @property
1204    def torsion_array(self):
1205        """Return the Torsions as an Array"""
1206        return self.get_torsion_array()

Return the Torsions as an Array

def validate_ss(self, value):
1208    def validate_ss(self, value):
1209        """Return the Disulfide object if it is a Disulfide, otherwise raise an error"""
1210        from proteusPy.Disulfide import Disulfide
1211
1212        if value is None:
1213            raise ValueError("The value cannot be None.")
1214
1215        if not isinstance(value, Disulfide):
1216            raise TypeError("The value must be an instance of Disulfide.")
1217        return value

Return the Disulfide object if it is a Disulfide, otherwise raise an error

def create_deviation_dataframe(self, verbose=False):
1219    def create_deviation_dataframe(self, verbose=False):
1220        """
1221        Create a DataFrame with columns PDB_ID, SS_Name, Angle_Deviation, Distance_Deviation,
1222        Ca Distance from a list of disulfides.
1223
1224        :param verbose: Whether to display a progress bar.
1225        :type verbose: bool
1226        :return: DataFrame containing the disulfide information.
1227        :rtype: pd.DataFrame
1228        """
1229        disulfide_list = self.data
1230        data = {
1231            "PDB_ID": [],
1232            "Resolution": [],
1233            "SS_Name": [],
1234            "Angle_Deviation": [],
1235            "Bondlength_Deviation": [],
1236            "Ca_Distance": [],
1237            "Sg_Distance": [],
1238        }
1239
1240        if verbose:
1241            pbar = tqdm(disulfide_list, desc="Processing...", leave=False)
1242        else:
1243            pbar = disulfide_list
1244
1245        for ss in pbar:
1246            data["PDB_ID"].append(ss.pdb_id)
1247            data["Resolution"].append(ss.resolution)
1248            data["SS_Name"].append(ss.name)
1249            data["Angle_Deviation"].append(ss.bond_angle_ideality)
1250            data["Bondlength_Deviation"].append(ss.bond_length_ideality)
1251            data["Ca_Distance"].append(ss.ca_distance)
1252            data["Sg_Distance"].append(ss.sg_distance)
1253
1254        df = pd.DataFrame(data)
1255        return df

Create a DataFrame with columns PDB_ID, SS_Name, Angle_Deviation, Distance_Deviation, Ca Distance from a list of disulfides.

Parameters
  • verbose: Whether to display a progress bar.
Returns

DataFrame containing the disulfide information.

def extract_distances(self, distance_type='sg', comparison='less', cutoff=-1):
1257    def extract_distances(self, distance_type="sg", comparison="less", cutoff=-1):
1258        """
1259        Extract and filter the distance values from the disulfide list based on the specified type and comparison.
1260
1261        :param disulfide_list: List of disulfide objects.
1262        :param distance_type: Type of distance to extract ('sg' or 'ca').
1263        :param comparison: If 'less', return distances less than the cutoff value, otherwise return distances greater than or equal to the cutoff value.
1264        :param cutoff: Cutoff value for filtering distances.
1265        :return: List of filtered distance values.
1266        """
1267        disulfide_list = self.data
1268        distances = filtered_distances = []
1269
1270        match distance_type:
1271            case "sg":
1272                distances = [ds.sg_distance for ds in disulfide_list]
1273            case "ca":
1274                distances = [ds.ca_distance for ds in disulfide_list]
1275            case _:
1276                raise ValueError("Invalid distance_type. Must be 'sg' or 'ca'.")
1277
1278        if cutoff == -1.0:
1279            return distances
1280
1281        if comparison == "greater":
1282            filtered_distances = [d for d in distances if d > cutoff]
1283        else:
1284            filtered_distances = [d for d in distances if d <= cutoff]
1285
1286        return filtered_distances

Extract and filter the distance values from the disulfide list based on the specified type and comparison.

Parameters
  • disulfide_list: List of disulfide objects.
  • distance_type: Type of distance to extract ('sg' or 'ca').
  • comparison: If 'less', return distances less than the cutoff value, otherwise return distances greater than or equal to the cutoff value.
  • cutoff: Cutoff value for filtering distances.
Returns

List of filtered distance values.

@staticmethod
def plot_distances( distances, distance_type='sg', cutoff=-1, comparison='less', theme='auto', log=True):
1288    @staticmethod
1289    def plot_distances(
1290        distances,
1291        distance_type="sg",
1292        cutoff=-1,
1293        comparison="less",
1294        theme="auto",
1295        log=True,
1296    ):
1297        """
1298        Plot the distance values as a histogram using plotly express.
1299
1300        :param distances: List of distance values.
1301        :param distance_type: Type of distance to plot ('sg' or 'ca').
1302        :param cutoff: Cutoff value for the x-axis title.
1303        :param flip: Whether to flip the comparison in the x-axis title.
1304        :param theme: The plotly theme to use. Default is 'auto', which will use the current system theme.
1305        :param log: Whether to use a logarithmic scale for the y-axis. Default is True.
1306        """
1307
1308        set_plotly_theme(theme)
1309
1310        yaxis_type = "log" if log else "linear"
1311        flip = False if comparison == "less" else True
1312
1313        match distance_type:
1314            case "sg":
1315                column_name = "SG Distance"
1316                title = "Sγ Distance Distribution"
1317                if cutoff == -1.0:
1318                    xtitle = "Sγ-Sγ Distances, (no cutoff)"
1319                else:
1320                    xtitle = (
1321                        f"Sγ Distance < {cutoff} Å"
1322                        if not flip
1323                        else f"Sγ-Sγ Distance >= {cutoff} Å"
1324                    )
1325            case "ca":
1326                column_name = "Ca Distance"
1327                title = "Cα Distance Distribution"
1328                if cutoff == -1.0:
1329                    xtitle = "Cα-Cα Distances, (no cutoff)"
1330                else:
1331                    xtitle = (
1332                        f"Cα Distance < {cutoff} Å"
1333                        if not flip
1334                        else f"Cα-Cα Distance >= {cutoff} Å"
1335                    )
1336            case _:
1337                raise ValueError("Invalid distance_type. Must be 'sg' or 'ca'.")
1338
1339        # Convert to a Pandas DataFrame with the appropriate column name
1340        df = pd.DataFrame(distances, columns=[column_name])
1341
1342        fig = px.histogram(
1343            df,
1344            x=column_name,  # Use the column name for the x-axis
1345            nbins=NBINS,
1346            title=title,
1347        )
1348        fig.update_layout(
1349            title={"text": "Distance Distribution", "x": 0.5, "xanchor": "center"},
1350            xaxis_title=xtitle,
1351            yaxis_title="Frequency",
1352            yaxis_type=yaxis_type,
1353            bargap=0.2,
1354        )
1355        fig.show()

Plot the distance values as a histogram using plotly express.

Parameters
  • distances: List of distance values.
  • distance_type: Type of distance to plot ('sg' or 'ca').
  • cutoff: Cutoff value for the x-axis title.
  • flip: Whether to flip the comparison in the x-axis title.
  • theme: The plotly theme to use. Default is 'auto', which will use the current system theme.
  • log: Whether to use a logarithmic scale for the y-axis. Default is True.
def plot_deviation_scatterplots(self, verbose=False, theme='auto'):
1357    def plot_deviation_scatterplots(self, verbose=False, theme="auto"):
1358        """
1359        Plot scatter plots for Bondlength_Deviation, Angle_Deviation, Ca_Distance, and Sg_Distance
1360        with the row index as the x-axis.
1361
1362        :param verbose: If True, display additional information during processing, defaults to False.
1363        :type verbose: bool
1364        :param theme: The theme to use for the plot ('auto', 'light', or 'dark'), defaults to 'auto'.
1365        :type theme: str
1366        """
1367        set_plotly_theme(theme)
1368        dotsize = 2
1369
1370        df = self.create_deviation_dataframe(verbose=verbose)
1371
1372        fig = px.scatter(
1373            df, x=df.index, y="Bondlength_Deviation", title="Bondlength Deviation"
1374        )
1375        fig.update_layout(xaxis_title="Row Index", yaxis_title="Bondlength Deviation")
1376        fig.update_traces(marker=dict(size=dotsize))  # Adjust the size as needed
1377        fig.show()
1378
1379        fig = px.scatter(df, x=df.index, y="Angle_Deviation", title="Angle Deviation")
1380        fig.update_layout(xaxis_title="Row Index", yaxis_title="Angle Deviation")
1381        fig.update_traces(marker=dict(size=dotsize))  # Adjust the size as needed
1382        fig.show()
1383
1384        fig = px.scatter(df, x=df.index, y="Ca_Distance", title="Cα Distance")
1385        fig.update_layout(xaxis_title="Row Index", yaxis_title="Cα Distance")
1386        fig.update_traces(marker=dict(size=dotsize))  # Adjust the size as needed
1387        fig.show()
1388
1389        fig = px.scatter(df, x=df.index, y="Sg_Distance", title="Sg Distance")
1390        fig.update_layout(xaxis_title="Row Index", yaxis_title="Sg Distance")
1391        fig.update_traces(marker=dict(size=dotsize))  # Adjust the size as needed
1392        fig.show()

Plot scatter plots for Bondlength_Deviation, Angle_Deviation, Ca_Distance, and Sg_Distance with the row index as the x-axis.

Parameters
  • verbose: If True, display additional information during processing, defaults to False.
  • theme: The theme to use for the plot ('auto', 'light', or 'dark'), defaults to 'auto'.
def plot_deviation_histograms(self, verbose=False, theme='auto', log=True) -> None:
1394    def plot_deviation_histograms(self, verbose=False, theme="auto", log=True) -> None:
1395        """
1396        Plot histograms for Bondlength_Deviation, Angle_Deviation, and Ca_Distance.
1397
1398        This function creates and displays histograms for the bond length deviation,
1399        bond angle deviation from the disulfide list. The histograms
1400        are displayed on a logarithmic scale for the y-axis.
1401
1402        :param verbose: Whether to display a progress bar.
1403        :type verbose: bool
1404        :param theme: The plotly theme to use. Default is 'auto', which will use the current system theme.
1405        :param log: Whether to use a logarithmic scale for the y-axis. Default is True.
1406        """
1407
1408        set_plotly_theme(theme)
1409        if log:
1410            yaxis_type = "log"
1411        else:
1412            yaxis_type = "linear"
1413
1414        df = self.create_deviation_dataframe(verbose=verbose)
1415
1416        fig = px.histogram(
1417            df,
1418            x="Bondlength_Deviation",
1419            nbins=NBINS,
1420            title="Bond Length Deviation (Å)",
1421        )
1422
1423        fig.update_layout(
1424            title={"text": "Bond Length Deviation", "x": 0.5, "xanchor": "center"},
1425            xaxis_title="Bond Length Deviation (Å)",
1426            yaxis_title="Frequency",
1427            yaxis_type=yaxis_type,
1428        )
1429        fig.show()
1430
1431        fig2 = px.histogram(
1432            df, x="Angle_Deviation", nbins=NBINS, title="Bond Angle Deviation, (°)"
1433        )
1434        fig2.update_layout(
1435            title={"text": "Bond Angle Deviation", "x": 0.5, "xanchor": "center"},
1436            xaxis_title="Bond Angle Deviation (°)",
1437            yaxis_title="Frequency",
1438            yaxis_type=yaxis_type,
1439        )
1440
1441        fig2.show()
1442
1443        return

Plot histograms for Bondlength_Deviation, Angle_Deviation, and Ca_Distance.

This function creates and displays histograms for the bond length deviation, bond angle deviation from the disulfide list. The histograms are displayed on a logarithmic scale for the y-axis.

Parameters
  • verbose: Whether to display a progress bar.
  • theme: The plotly theme to use. Default is 'auto', which will use the current system theme.
  • log: Whether to use a logarithmic scale for the y-axis. Default is True.
def filter_deviation_df_by_cutoffs( self, length_cutoff=10.0, angle_cutoff=100.0, ca_cutoff=1000.0, sg_cutoff=10.0, minimum_distance=0.0) -> pandas.core.frame.DataFrame:
1445    def filter_deviation_df_by_cutoffs(
1446        self,
1447        length_cutoff=10.0,
1448        angle_cutoff=100.0,
1449        ca_cutoff=1000.0,
1450        sg_cutoff=10.0,
1451        minimum_distance=0.0,
1452    ) -> pd.DataFrame:
1453        """
1454        Filter the DataFrame based on bond length, angle, Ca and Sg distance cutoffs.
1455
1456        Note: The default values are set to high values to allow all structures to pass the filter.
1457
1458        :param df: DataFrame containing the deviations.
1459        :type df: pd.DataFrame
1460        :param length_cutoff: Cutoff value for Bond Length Deviation.
1461        :type distance_cutoff: float
1462        :param angle_cutoff: Cutoff value for angle deviation.
1463        :type angle_cutoff: float
1464        :param ca_cutoff: Cutoff value for Ca distance.
1465        :type ca_cutoff: float
1466        :param sg_cutoff: Cutoff value for Sg distance.
1467        :type sg_cutoff: float
1468        :return: Filtered DataFrame.
1469        :rtype: pd.DataFrame
1470        """
1471        df = self.create_deviation_dataframe()
1472
1473        filtered_df = df[
1474            (df["Bondlength_Deviation"] <= length_cutoff)
1475            & (df["Angle_Deviation"] <= angle_cutoff)
1476            & (df["Ca_Distance"] >= minimum_distance)
1477            & (df["Ca_Distance"] <= ca_cutoff)
1478            & (df["Sg_Distance"] >= minimum_distance)
1479            & (df["Sg_Distance"] <= sg_cutoff)
1480        ]
1481        return filtered_df

Filter the DataFrame based on bond length, angle, Ca and Sg distance cutoffs.

Note: The default values are set to high values to allow all structures to pass the filter.

Parameters
  • df: DataFrame containing the deviations.
  • length_cutoff: Cutoff value for Bond Length Deviation.
  • angle_cutoff: Cutoff value for angle deviation.
  • ca_cutoff: Cutoff value for Ca distance.
  • sg_cutoff: Cutoff value for Sg distance.
Returns

Filtered DataFrame.

def bad_filter_deviation_df_by_cutoffs( self, length_cutoff=0.0, angle_cutoff=0.0, ca_cutoff=0.0, sg_cutoff=0.0, minimum_distance=0.0) -> pandas.core.frame.DataFrame:
1483    def bad_filter_deviation_df_by_cutoffs(
1484        self,
1485        length_cutoff=0.0,
1486        angle_cutoff=0.0,
1487        ca_cutoff=0.0,
1488        sg_cutoff=0.0,
1489        minimum_distance=0.0,
1490    ) -> pd.DataFrame:
1491        """
1492        Return the DataFrame objects that are GREATER than the cutoff based on distance,
1493        angle, Ca and Sg distance cutoffs. Used to get the bad structures.
1494
1495        Note: The default values are set to low values to allow all structures to pass the filter.
1496
1497        :param df: DataFrame containing the deviations.
1498        :type df: pd.DataFrame
1499        :param length_cutoff: Cutoff value for Bond Length Deviation.
1500        :type length_cutoff: float
1501        :param angle_cutoff: Cutoff value for angle deviation.
1502        :type angle_cutoff: float
1503        :param ca_cutoff: Cutoff value for Ca distance.
1504        :type ca_cutoff: float
1505        :return: Filtered DataFrame.
1506        :rtype: pd.DataFrame
1507        """
1508        df = self.create_deviation_dataframe()
1509
1510        filtered_df = df[
1511            (df["Bondlength_Deviation"] > length_cutoff)
1512            & (df["Angle_Deviation"] > angle_cutoff)
1513            & (df["Ca_Distance"] > ca_cutoff)
1514            & (df["Ca_Distance"] < minimum_distance)
1515            & (df["Sg_Distance"] > sg_cutoff)
1516            & (df["Sg_Distance"] < minimum_distance)
1517        ]
1518        return filtered_df

Return the DataFrame objects that are GREATER than the cutoff based on distance, angle, Ca and Sg distance cutoffs. Used to get the bad structures.

Note: The default values are set to low values to allow all structures to pass the filter.

Parameters
  • df: DataFrame containing the deviations.
  • length_cutoff: Cutoff value for Bond Length Deviation.
  • angle_cutoff: Cutoff value for angle deviation.
  • ca_cutoff: Cutoff value for Ca distance.
Returns

Filtered DataFrame.

def calculate_torsion_statistics(self) -> tuple:
1520    def calculate_torsion_statistics(self) -> tuple:
1521        """
1522        Calculate and return the torsion and distance statistics for the DisulfideList.
1523
1524        This method builds a DataFrame containing torsional parameters, Cα-Cα distance,
1525        energy, and phi-psi angles for the DisulfideList. It then calculates the mean
1526        and standard deviation for the torsional and distance parameters.
1527
1528        :return: A tuple containing two DataFrames:
1529                - tor_stats: DataFrame with mean and standard deviation for torsional parameters.
1530                - dist_stats: DataFrame with mean and standard deviation for distance parameters.
1531        :rtype: tuple (pd.DataFrame, pd.DataFrame)
1532        """
1533
1534        df = self.torsion_df
1535
1536        tor_cols = ["chi1", "chi2", "chi3", "chi4", "chi5", "torsion_length"]
1537        dist_cols = ["ca_distance", "cb_distance", "sg_distance", "energy", "rho"]
1538        tor_stats = {}
1539        dist_stats = {}
1540
1541        def circular_mean(series):
1542            """
1543            Calculate the circular mean of a series of angles.
1544
1545            This function converts the input series of angles from degrees to radians,
1546            computes the mean of the sine and cosine of these angles, and then converts
1547            the result back to degrees.
1548
1549            :param series: A sequence of angles in degrees.
1550            :type series: array-like
1551            :return: The circular mean of the input angles in degrees.
1552            :rtype: float
1553            """
1554            radians = np.deg2rad(series)
1555            sin_mean = np.sin(radians).mean()
1556            cos_mean = np.cos(radians).mean()
1557            return np.rad2deg(np.arctan2(sin_mean, cos_mean))
1558
1559        for col in tor_cols[:5]:
1560            tor_stats[col] = {"mean": circular_mean(df[col]), "std": df[col].std()}
1561
1562        tor_stats["torsion_length"] = {
1563            "mean": df["torsion_length"].mean(),
1564            "std": df["torsion_length"].std(),
1565        }
1566
1567        for col in dist_cols:
1568            dist_stats[col] = {"mean": df[col].mean(), "std": df[col].std()}
1569
1570        tor_stats = pd.DataFrame(tor_stats, columns=tor_cols)
1571        dist_stats = pd.DataFrame(dist_stats, columns=dist_cols)
1572
1573        return tor_stats, dist_stats

Calculate and return the torsion and distance statistics for the DisulfideList.

This method builds a DataFrame containing torsional parameters, Cα-Cα distance, energy, and phi-psi angles for the DisulfideList. It then calculates the mean and standard deviation for the torsional and distance parameters.

Returns

A tuple containing two DataFrames: - tor_stats: DataFrame with mean and standard deviation for torsional parameters. - dist_stats: DataFrame with mean and standard deviation for distance parameters.

def load_disulfides_from_id( pdb_id: str, pdb_dir=PosixPath('/Users/egs/PDB/good'), verbose=False, quiet=True, dbg=False, cutoff=-1.0, sg_cutoff=-1.0) -> DisulfideList:
1578def load_disulfides_from_id(
1579    pdb_id: str,
1580    pdb_dir=MODEL_DIR,
1581    verbose=False,
1582    quiet=True,
1583    dbg=False,
1584    cutoff=-1.0,
1585    sg_cutoff=-1.0,
1586) -> DisulfideList:
1587    """
1588    Loads the Disulfides by PDB ID and returns a DisulfideList of Disulfide objects.
1589    Assumes the file is downloaded in the pdb_dir path.
1590
1591    :param pdb_id: The name of the PDB entry.
1592    :param pdb_dir: Path to the PDB files, defaults to MODEL_DIR. This is: PDB_DIR/good and are
1593                    the pre-parsed PDB files that have been scanned by the DisulfideDownloader program.
1594    :param verbose: Print info while parsing.
1595    :param quiet: Suppress non-error logging output.
1596    :param dbg: Enable debug logging.
1597    :param cutoff: Distance cutoff for filtering disulfides.
1598    :param sg_cutoff: SG distance cutoff for filtering disulfides.
1599    :return: A DisulfideList of Disulfide objects initialized from the file.
1600
1601    Example:
1602
1603    PDB_DIR defaults to os.getenv('PDB').
1604    To load the Disulfides from the PDB ID 5rsa we'd use the following:
1605
1606    >>> from proteusPy.DisulfideList import DisulfideList, load_disulfides_from_id
1607    >>> from proteusPy.ProteusGlobals import DATA_DIR
1608    >>> SSlist = DisulfideList([],'5rsa')
1609    >>> SSlist = load_disulfides_from_id('5rsa', pdb_dir=DATA_DIR, verbose=False)
1610    >>> SSlist
1611    [<Disulfide 5rsa_26A_84A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_40A_95A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_58A_110A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_65A_72A, Source: 5rsa, Resolution: 2.0 Å>]
1612    """
1613
1614    from proteusPy.Disulfide import Initialize_Disulfide_From_Coords
1615    from proteusPy.ssparser import extract_ssbonds_and_atoms
1616
1617    i = 1
1618    proximal = distal = -1
1619    chain1_id = chain2_id = ""
1620    ssbond_atom_list = {}
1621    num_ssbonds = 0
1622    delta = 0
1623    errors = 0
1624    resolution = -1.0
1625
1626    structure_fname = os.path.join(pdb_dir, f"pdb{pdb_id}.ent")
1627
1628    if verbose:
1629        mess = f"Parsing structure: {pdb_id}:"
1630        _logger.info(mess)
1631
1632    SSList = DisulfideList([], pdb_id, resolution)
1633
1634    ssbond_atom_list, num_ssbonds, errors = extract_ssbonds_and_atoms(
1635        structure_fname, verbose=verbose
1636    )
1637
1638    if num_ssbonds == 0:
1639        mess = f"->{pdb_id} has no SSBonds."
1640        if verbose:
1641            print(mess)
1642        _logger.warning(mess)
1643        return None
1644
1645    if quiet:
1646        _logger.setLevel(logging.ERROR)
1647
1648    if verbose:
1649        mess = f"{pdb_id} has {num_ssbonds} SSBonds, found: {errors} errors"
1650        _logger.info(mess)
1651
1652    resolution = ssbond_atom_list["resolution"]
1653    for pair in ssbond_atom_list["pairs"]:
1654        proximal = pair["proximal"][1]
1655        chain1_id = pair["proximal"][0]
1656        distal = pair["distal"][1]
1657        chain2_id = pair["distal"][0]
1658        proximal_secondary = pair["prox_secondary"]
1659        distal_secondary = pair["dist_secondary"]
1660
1661        if dbg:
1662            mess = f"Proximal: {proximal} {chain1_id} Distal: {distal} {chain2_id}"
1663            _logger.debug(mess)
1664
1665        proximal_int = int(proximal)
1666        distal_int = int(distal)
1667
1668        if proximal == distal:
1669            if verbose:
1670                mess = (
1671                    f"SSBond record has (proximal == distal): "
1672                    f"{pdb_id} Prox: {proximal} {chain1_id} Dist: {distal} {chain2_id}."
1673                )
1674                _logger.error(mess)
1675
1676        if proximal == distal and chain1_id == chain2_id:
1677            mess = (
1678                f"SSBond record has self reference, skipping: "
1679                f"{pdb_id} <{proximal} {chain1_id}> <{distal} {chain2_id}>"
1680            )
1681
1682            _logger.error(mess)
1683            continue
1684
1685        if verbose:
1686            mess = (
1687                f"SSBond: {i}: {pdb_id}: {proximal} {chain1_id} - {distal} {chain2_id}"
1688            )
1689            _logger.info(mess)
1690
1691        new_ss = Initialize_Disulfide_From_Coords(
1692            ssbond_atom_list,
1693            pdb_id,
1694            chain1_id,
1695            chain2_id,
1696            proximal_int,
1697            distal_int,
1698            resolution,
1699            proximal_secondary,
1700            distal_secondary,
1701            verbose=verbose,
1702            quiet=quiet,
1703            dbg=dbg,
1704        )
1705
1706        if new_ss is not None:
1707            SSList.append(new_ss)
1708            if verbose:
1709                mess = f"Initialized Disulfide: {pdb_id} Prox: {proximal} {chain1_id} Dist: {distal} {chain2_id}."
1710                _logger.info(mess)
1711        else:
1712            mess = f"Cannot initialize Disulfide: {pdb_id} <{proximal} {chain1_id}> <{distal} {chain2_id}>"
1713            _logger.error(mess)
1714
1715        i += 1
1716
1717    # restore default logging level
1718    if quiet:
1719        _logger.setLevel(logging.WARNING)
1720
1721    num_ssbonds = len(SSList)
1722
1723    if cutoff > 0:
1724        SSList = SSList.filter_by_distance(cutoff)
1725        delta = num_ssbonds - len(SSList)
1726        if delta:
1727            _logger.error(
1728                "Filtered %d -> %d SSBonds by Ca distance, %s, delta is: %d",
1729                num_ssbonds,
1730                len(SSList),
1731                pdb_id,
1732                delta,
1733            )
1734        num_ssbonds = len(SSList)
1735
1736    if sg_cutoff > 0:
1737        SSList = SSList.filter_by_sg_distance(sg_cutoff)
1738        delta = num_ssbonds - len(SSList)
1739        if delta:
1740            _logger.error(
1741                "Filtered %d -> %d SSBonds by Sγ distance, %s, delta is: %d",
1742                num_ssbonds,
1743                len(SSList),
1744                pdb_id,
1745                delta,
1746            )
1747
1748    return copy.deepcopy(SSList)

Loads the Disulfides by PDB ID and returns a DisulfideList of Disulfide objects. Assumes the file is downloaded in the pdb_dir path.

Parameters
  • pdb_id: The name of the PDB entry.
  • pdb_dir: Path to the PDB files, defaults to MODEL_DIR. This is: PDB_DIR/good and are the pre-parsed PDB files that have been scanned by the DisulfideDownloader program.
  • verbose: Print info while parsing.
  • quiet: Suppress non-error logging output.
  • dbg: Enable debug logging.
  • cutoff: Distance cutoff for filtering disulfides.
  • sg_cutoff: SG distance cutoff for filtering disulfides.
Returns

A DisulfideList of Disulfide objects initialized from the file.

Example:

PDB_DIR defaults to os.getenv('PDB'). To load the Disulfides from the PDB ID 5rsa we'd use the following:

>>> from proteusPy.DisulfideList import DisulfideList, load_disulfides_from_id
>>> from proteusPy.ProteusGlobals import DATA_DIR
>>> SSlist = DisulfideList([],'5rsa')
>>> SSlist = load_disulfides_from_id('5rsa', pdb_dir=DATA_DIR, verbose=False)
>>> SSlist
[<Disulfide 5rsa_26A_84A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_40A_95A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_58A_110A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_65A_72A, Source: 5rsa, Resolution: 2.0 Å>]
def extract_disulfide( pdb_filename: str, verbose=False, quiet=True, pdbdir='/Users/egs/PDB/') -> DisulfideList:
1751def extract_disulfide(
1752    pdb_filename: str, verbose=False, quiet=True, pdbdir=PDB_DIR
1753) -> DisulfideList:
1754    """
1755    Read the PDB file represented by `pdb_filename` and return a `DisulfideList`
1756    containing the Disulfide bonds found.
1757
1758    :param pdb_filename:   The filename of the PDB file to read.
1759    :param verbose:        Display more messages (default: False).
1760    :param quiet:          Turn off DisulfideConstruction warnings (default: True).
1761    :param pdbdir:         Path to PDB files (default: PDB_DIR).
1762    :return:               A `DisulfideList` containing the Disulfide bonds found.
1763    :rtype:                DisulfideList
1764    """
1765
1766    def extract_id_from_filename(filename: str) -> str:
1767        """
1768        Extract the ID from a filename formatted as 'pdb{id}.ent'.
1769
1770        :param filename: The filename to extract the ID from.
1771        :type filename: str
1772        :return: The extracted ID.
1773        :rtype: str
1774        """
1775        basename = os.path.basename(filename)
1776        # Check if the filename follows the expected format
1777        if basename.startswith("pdb") and filename.endswith(".ent"):
1778            # Extract the ID part of the filename
1779            return filename[3:-4]
1780
1781        mess = f"Filename {filename} does not follow the expected format 'pdb{id}.ent'"
1782        raise ValueError(mess)
1783
1784    pdbid = extract_id_from_filename(pdb_filename)
1785
1786    # returns an empty list if none are found.
1787    _sslist = DisulfideList([], pdbid)
1788    _sslist = load_disulfides_from_id(
1789        pdbid, verbose=verbose, quiet=quiet, pdb_dir=pdbdir
1790    )
1791
1792    if len(_sslist) == 0 or _sslist is None:
1793        mess = f"Can't find SSBonds: {pdbid}"
1794        _logger.error(mess)
1795        return DisulfideList([], pdbid)
1796
1797    return _sslist

Read the PDB file represented by pdb_filename and return a DisulfideList containing the Disulfide bonds found.

Parameters
  • pdb_filename: The filename of the PDB file to read.
  • verbose: Display more messages (default: False).
  • quiet: Turn off DisulfideConstruction warnings (default: True).
  • pdbdir: Path to PDB files (default: PDB_DIR).
Returns
           A `DisulfideList` containing the Disulfide bonds found.