pax_global_header00006660000000000000000000000064150665535510014525gustar00rootroot0000000000000052 comment=b98de953e8ae870308e7efaf0ff1403ffdd9bd4d python-modelcif-1.5/000077500000000000000000000000001506655355100144735ustar00rootroot00000000000000python-modelcif-1.5/.appveyor.yml000066400000000000000000000013431506655355100171420ustar00rootroot00000000000000environment: # For Python versions available on Appveyor, see # https://www.appveyor.com/docs/windows-images-software/#python matrix: - {APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017, PYTHON: "C:\\Python36-x64"} - {APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019, PYTHON: "C:\\Python38"} - {APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019, PYTHON: "C:\\Python38-x64"} - {APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019, PYTHON: "C:\\Python39-x64"} install: - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - "python.exe -m pip install codecov coverage nose" - "python.exe -m pip install -r requirements.txt" build: off test_script: - "nosetests --with-coverage --cover-branches" on_success: - "codecov" python-modelcif-1.5/.codecov.yml000066400000000000000000000000211506655355100167070ustar00rootroot00000000000000ignore: - test python-modelcif-1.5/.github/000077500000000000000000000000001506655355100160335ustar00rootroot00000000000000python-modelcif-1.5/.github/workflows/000077500000000000000000000000001506655355100200705ustar00rootroot00000000000000python-modelcif-1.5/.github/workflows/codeql-analysis.yml000066400000000000000000000034741506655355100237130ustar00rootroot00000000000000# For most projects, this workflow file will not need changing; you simply need # to commit it to your repository. # # You may wish to alter this file to override the set of languages analyzed, # or to provide custom queries or build logic. # # ******** NOTE ******** # We have attempted to detect the languages in your repository. Please check # the `language` matrix defined below to confirm you have the correct set of # supported CodeQL languages. # name: "CodeQL" on: push: branches: [ main ] pull_request: # The branches below must be a subset of the branches above branches: [ main ] schedule: - cron: '27 17 * * 5' jobs: analyze: name: Analyze runs-on: ubuntu-latest permissions: actions: read contents: read security-events: write strategy: fail-fast: false matrix: language: [ 'python' ] # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] # Learn more: # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed steps: - name: Checkout repository uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. # queries: ./path/to/local/query, your-org/your-repo/queries@main - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v2 python-modelcif-1.5/.github/workflows/linter.yml000066400000000000000000000007561506655355100221200ustar00rootroot00000000000000name: Lint Code Base # # Documentation: # https://help.github.com/en/articles/workflow-syntax-for-github-actions # on: [push] jobs: build: name: Lint Code Base runs-on: ubuntu-latest steps: - name: Checkout Code uses: actions/checkout@v4 - name: Lint Code Base uses: docker://github/super-linter:v2.1.0 env: VALIDATE_ALL_CODEBASE: false DEFAULT_BRANCH: main VALIDATE_PYTHON: false VALIDATE_BASH: true python-modelcif-1.5/.github/workflows/testpy.yml000066400000000000000000000016511506655355100221460ustar00rootroot00000000000000name: build on: [push, pull_request] jobs: build: strategy: fail-fast: false matrix: os: [ubuntu-latest] python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] include: - os: macos-latest python-version: '3.10' runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install coverage pytest-cov flake8 pip install -r requirements.txt - name: Test run: | pytest --cov=modelcif --cov-branch --cov-report=xml --cov-append -v . flake8 --ignore E402,W503,W504 - uses: codecov/codecov-action@v4 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} python-modelcif-1.5/.gitignore000066400000000000000000000022561506655355100164700ustar00rootroot00000000000000.DS_Store # vim swapfiles .*.swp # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # dotenv .env # virtualenv .venv venv/ ENV/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .pytest_cache python-modelcif-1.5/.pylintrc000066400000000000000000000002001506655355100163300ustar00rootroot00000000000000[MASTER] init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc()))" python-modelcif-1.5/.readthedocs.yaml000066400000000000000000000020061506655355100177200ustar00rootroot00000000000000# Read the Docs configuration file for Sphinx projects # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Set the OS, Python version and other tools you might need build: os: ubuntu-22.04 tools: python: "3.11" # You can also specify other tool versions: # nodejs: "20" # rust: "1.70" # golang: "1.20" # Build documentation in the "docs/" directory with Sphinx sphinx: configuration: docs/conf.py # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs # builder: "dirhtml" # Fail on all warnings to avoid broken references # fail_on_warning: true # Optionally build your docs in additional formats such as PDF and ePub # formats: # - pdf # - epub # Optional but recommended, declare the Python requirements required # to build your documentation # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html python: install: - requirements: docs/requirements.txt python-modelcif-1.5/CITATION.cff000066400000000000000000000034461506655355100163740ustar00rootroot00000000000000cff-version: 1.2.0 message: "If you use this software, please cite it as below." authors: - family-names: "Webb" given-names: "Benjamin" title: "python-modelcif" url: "https://github.com/ihmwg/python-modelcif" preferred-citation: type: article authors: - family-names: "Vallat" given-names: "Brinda" - family-names: "Tauriello" given-names: "Gerardo" - family-names: "Bienert" given-names: "Stefan" - family-names: "Haas" given-names: "Juergen" - family-names: "Webb" given-names: "Benjamin M." - family-names: "Žídek" given-names: "Augustin" - family-names: "Zheng" given-names: "Wei" - family-names: "Peisach" given-names: "Ezra" - family-names: "Piehl" given-names: "Dennis W." - family-names: "Anischanka" given-names: "Ivan" - family-names: "Sillitoe" given-names: "Ian" - family-names: "Tolchard" given-names: "James" - family-names: "Varadi" given-names: "Mihaly" - family-names: "Baker" given-names: "David" - family-names: "Orengo" given-names: "Christine" - family-names: "Zhang" given-names: "Yang" - family-names: "Hoch" given-names: "Jeffrey C." - family-names: "Kurisu" given-names: "Genji" - family-names: "Patwardhan" given-names: "Ardan" - family-names: "Velankar" given-names: "Sameer" - family-names: "Burley" given-names: "Stephen K." - family-names: "Sali" given-names: "Andrej" - family-names: "Schwede" given-names: "Torsten" - family-names: "Berman" given-names: "Helen M." - family-names: "Westbrook" given-names: "John D." doi: "10.1016/j.jmb.2023.168021" journal: "J Mol Biol" month: 7 start: 168021 title: "ModelCIF: An Extension of PDBx/mmCIF Data Representation for Computed Structure Models" volume: 435 issue: 14 year: 2023 python-modelcif-1.5/ChangeLog.rst000066400000000000000000000215471506655355100170650ustar00rootroot000000000000001.5 - 2025-09-17 ================ - The ``pdbx_nonpoly_scheme`` and ``pdbx_poly_seq_scheme`` tables are now read from and written to mmCIF or BinaryCIF files. This allows for files containing waters to be correctly processed (#52). - A ``pyproject.toml`` file is now provided for compatibility with modern versions of pip (#50). - Bugfix: the ``is_primary`` attribute of :class:`modelcif.reference.TargetReference` is now correctly set when reading files containing a ``_struct_ref`` table but no ``_ma_target_ref_db_details`` table (#51). 1.4 - 2025-06-11 ================ - Revision information and information on how the data in the file can be used are now read from or written to mmCIF or BinaryCIF files. See :attr:`System.revisions` and :attr:`System.data_usage` respectively. - New classes have been added to :mod:`modelcif.alignment`, :mod:`modelcif.qa_metric`, and :mod:`modelcif.protocol` to support all sequence identity denominators (#47), QA metric types (#45), and protocol step types (#44) respectively, as defined in the ModelCIF dictionary. - The new :class:`modelcif.alignment.Local` and :class:`modelcif.alignment.Multiple` classes allow for both local alignments and multiple sequence alignments to be described (#43). - Bugfix: sequence identity and alignment score (``identity`` and ``score`` arguments to :class:`modelcif.alignment.Pair`) are now optional, to match the mmCIF dictionary (#49). - Bugfix: information in ``_ma_template_non_poly.details`` is now read from mmCIF or BinaryCIF files (#48). - Bugfix: files containing empty or missing ``_ma_qa_metric.description`` can now be read (#46). 1.3 - 2025-01-14 ================ - The new :class:`modelcif.CustomTemplate` class allows for custom templates (that have not been deposited in a database such as PDB) to be referenced, together with their atomic coordinates (#1). - Model quality scores can now be defined that act on single features or pairs of features using the :class:`modelcif.qa_metric.Feature` and :class:`modelcif.qa_metric.FeaturePairwise` classes, respectively. Features can be defined as groups of atoms, residues, or asyms (#38). - The :class:`modelcif.associated.QAMetricsFile` class should now be used to reference files that contain model quality scores. The old name (LocalPairwiseQAScoresFile) is deprecated. This allows for all types of QA scores, not just local pairwise scores, to be stored in a separate file. - Sanity checks when writing out a file can now be disabled if desired, using the new ``check`` argument to :func:`modelcif.dumper.write`. - :class:`modelcif.reference.TargetReference` now takes an ``is_primary`` argument which can be used to denote the most pertinent sequence database reference. - Information on model groups (:class:`modelcif.model.ModelGroup`) is now written to the new ``ma_model_group`` and ``ma_model_group_link`` mmCIF tables, instead of ``ma_model_list``, to match the latest ModelCIF dictionary. Old-style information in ``ma_model_list`` will still be used when reading a file if these new tables are missing. 1.2 - 2024-10-23 ================ - Data that have been split over multiple mmCIF or BinaryCIF files can now be combined into a single :class:`modelcif.System` object using the new ``add_to_system`` argument to :func:`modelcif.reader.read` (#10). - A new example, ``associated.py``, has been added to demonstrate reading in data that has been split into multiple "associated" mmCIF files using :class:`modelcif.associated.CIFFile`. 1.1 - 2024-09-27 ================ - The new class :class:`modelcif.model.NotModeledResidueRange` allows for the annotation of residue ranges that were explicitly not modeled. Any residue marked as not-modeled in all models will be excluded from the ``pdbx_poly_seq_scheme`` table. - The ``util/make-mmcif.py`` script is now included in the installed package, so can be run if desired with ``python3 -m modelcif.util.make_mmcif``. - The ``make_mmcif`` utility script will now automatically add any missing :class:`modelcif.model.NotModeledResidueRange` objects for not-modeled residue ranges. 1.0 - 2024-06-20 ================ - Reference information in the ``struct_ref`` mmCIF table is now supported in addition to the ModelCIF-specific tables such as ``ma_target_ref_db_details``. :class:`modelcif.reference.TargetReference` now inherits from ``ihm.reference.Sequence`` and allows for the full database sequence, plus any differences between it and the modeled sequence, to be recorded. The ``align_begin`` and ``align_end`` arguments are now deprecated (#34). 0.9 - 2023-10-02 ================ - Bugfix: :class:`modelcif.SoftwareGroup` now allows for parameters to be associated with each piece of software in the group, rather than with the group as a whole (#33). 0.8 - 2023-08-04 ================ - :class:`modelcif.associated.File` now takes an optional ``data`` argument to allow describing any modeling input/output that is stored in that file. - RPM packages are now provided for Fedora and RedHat Enterprise Linux. 0.7 - 2023-01-25 ================ - More examples have been added to demonstrate interconversion between mmCIF and BinaryCIF, and to validate mmCIF files. - A utility script ``util/make-mmcif.py`` has been added which can add minimal ModelCIF-related tables to an mmCIF file, to add in deposition. - The reader is now more robust when handling files that are not ModelCIF compliant (#31). - The ``exptl`` table is no longer written to output mmCIF files, to conform with wwPDB's recommendation. Instead, the ``struct.pdbx_structure_determination_methodology`` data item denotes that the model is computational (#29). 0.6 - 2022-08-03 ================ - :class:`ihm.ChemComp` now allows for custom chemical components to be defined in a chemical component dictionary (CCD) outside of the wwPDB CCD, such as the ModelArchive CCD, or in the file itself using descriptors such as SMILES or InChI in the :mod:`modelcif.descriptor` module. - The ``ma_struct_assembly`` category is no longer written out to mmCIF files, as this is deprecated by ModelCIF (all models are required to have the same composition). - Templates can now be described in AlphaFoldDB or PubChem using new :class:`modelcif.reference.TemplateReference` subclasses. - HHblits e-values can now be used as alignment scores, using :class:`modelcif.alignment.HHblitsEValue`. - Bugfix: :class:`modelcif.associated.CIFFile` now writes local files (if requested via ``categories`` or ``copy_categories``) even if it is placed inside a :class:`modelcif.associated.ZipFile` (#26). 0.5 - 2022-05-10 ================ - A new class :class:`modelcif.ReferenceDatabase` allows describing collections of sequences that were used as part of the modeling protocol. - Lists of ints or floats can now be given as software parameters to the :class:`modelcif.SoftwareParameter` class. 0.4 - 2022-04-14 ================ - Sequence information for templates is now only written to template-specific categories in the mmCIF/BinaryCIF, not to the entity, entity_poly etc. tables, to properly comply with the ModelCIF dictionary. - :class:`modelcif.Template` now takes a ``entity_id`` argument which can be used to provide the entity ID (if known) of the template in its own mmCIF file. - External files (e.g. alignments, or quality scores) can now be referenced from the main file (using the :mod:`modelcif.associated` module). Selected CIF categories can automatically be written to these external files instead of the main file, in either mmCIF or BinaryCIF format (see :class:`modelcif.associated.CIFFile`). - Non-polymer models can now be linked to their template using the :class:`modelcif.NonPolymerFromTemplate` class. - Add classes for the PLDDT, PTM, and IpTM quality metrics. - :class:`modelcif.reference.TargetReference` now supports the version and CRC64 checksum of the reference sequence. 0.3 - 2022-03-21 ================ - Add a package to conda-forge so the library can be installed using ``conda install -c conda-forge modelcif`` - :class:`modelcif.Template` now takes a ``strand_id`` argument which can be used to provide the author-provided (e.g. PDB) chain ID. - Non-polymers can now be used as templates. 0.2 - 2022-01-27 ================ - Minor packaging and documentation improvements. - Add a basic "theoretical model" exptl category to output files. - Bugfix: fix output of alignments with an empty list of pairs. 0.1 - 2022-01-26 ================ - First stable release. This provides support for single-chain single-template models using the ModelCIF extension dictionary, and will read and write mmCIF and BinaryCIF files that are compliant with the PDBx and :class:`modelcif.alignment.HHblitsEValue`. python-modelcif-1.5/LICENSE000066400000000000000000000020671506655355100155050ustar00rootroot00000000000000MIT License Copyright (c) 2018-2025 IHM Working Group Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. python-modelcif-1.5/MANIFEST.in000066400000000000000000000000741506655355100162320ustar00rootroot00000000000000include ChangeLog.rst include LICENSE include examples/*.py python-modelcif-1.5/README.md000066400000000000000000000045441506655355100157610ustar00rootroot00000000000000[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5908678.svg)](https://doi.org/10.5281/zenodo.5908678) [![docs](https://readthedocs.org/projects/python-modelcif/badge/)](https://python-modelcif.readthedocs.org/) [![conda package](https://img.shields.io/conda/vn/conda-forge/modelcif.svg)](https://anaconda.org/conda-forge/modelcif) [![pypi package](https://badge.fury.io/py/modelcif.svg)](https://badge.fury.io/py/modelcif) [![Linux Build Status](https://github.com/ihmwg/python-modelcif/workflows/build/badge.svg)](https://github.com/ihmwg/python-modelcif/actions?query=workflow%3Abuild) [![Windows Build Status](https://ci.appveyor.com/api/projects/status/5o28oe477ii8ur4h?svg=true)](https://ci.appveyor.com/project/benmwebb/python-modelcif) [![codecov](https://codecov.io/gh/ihmwg/python-modelcif/branch/main/graph/badge.svg)](https://codecov.io/gh/ihmwg/python-modelcif) This is a Python package to assist in handling [mmCIF](http://mmcif.wwpdb.org/) and [BinaryCIF](https://github.com/molstar/BinaryCIF) files compliant with the [ModelCIF](https://mmcif.wwpdb.org/dictionaries/mmcif_ma.dic/Index/) extension. It works with Python 3.6 or later. Please [see the documentation](https://python-modelcif.readthedocs.org/) or some [worked examples](https://github.com/ihmwg/python-modelcif/tree/main/examples) for more details. # Installation with conda or pip If you are using [Anaconda Python](https://www.anaconda.com/), install with ``` conda install -c conda-forge modelcif ``` On a Fedora or RedHat Enterprise Linux box, install with ``` dnf copr enable salilab/salilab; dnf install python3-modelcif ``` Alternatively, install with pip: ``` pip install modelcif ``` # Installation from source code To build and install from a clone of the GitHub repository, first build and install version 2.6 or later of the [python-ihm](https://github.com/ihmwg/python-ihm) module. Then run: ``` python setup.py build python setup.py install ``` If you want to read or write [BinaryCIF](https://github.com/molstar/BinaryCIF) files, you will also need the Python [msgpack](https://github.com/msgpack/msgpack-python) package. # Testing There are a number of testcases in the `test` directory. Each one can be run like a normal Python script to test the library. They can also be all run at once using [nose](https://nose.readthedocs.io/en/latest/) or [pytest](https://docs.pytest.org/en/latest/). python-modelcif-1.5/docs/000077500000000000000000000000001506655355100154235ustar00rootroot00000000000000python-modelcif-1.5/docs/Makefile000066400000000000000000000011371506655355100170650ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = Python-IHM SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)python-modelcif-1.5/docs/alignment.rst000066400000000000000000000013621506655355100201350ustar00rootroot00000000000000.. highlight:: rest .. _alignment_module: The :mod:`modelcif.alignment` Python module =========================================== .. automodule:: modelcif.alignment .. autoclass:: Identity :members: .. autoclass:: ShorterSequenceIdentity .. autoclass:: AlignedPositionsIdentity .. autoclass:: AlignedResiduePairsIdentity .. autoclass:: MeanSequenceIdentity .. autoclass:: Pair :members: .. autoclass:: AlignmentMode :members: .. autoclass:: Global :members: .. autoclass:: Local :members: .. autoclass:: AlignmentType :members: .. autoclass:: Pairwise :members: .. autoclass:: Multiple :members: .. autoclass:: Score :members: .. autoclass:: BLASTEValue :members: .. autoclass:: HHblitsEValue :members: python-modelcif-1.5/docs/associated.rst000066400000000000000000000005531506655355100202770ustar00rootroot00000000000000.. highlight:: rest .. _associated_module: The :mod:`modelcif.associated` Python module ============================================ .. automodule:: modelcif.associated .. autoclass:: Repository :members: .. autoclass:: File :members: .. autoclass:: CIFFile :members: .. autoclass:: QAMetricsFile :members: .. autoclass:: ZipFile :members: python-modelcif-1.5/docs/changes.rst000066400000000000000000000001501506655355100175610ustar00rootroot00000000000000.. _changes: .. currentmodule:: modelcif Change history ************** .. include:: ../ChangeLog.rst python-modelcif-1.5/docs/conf.py000066400000000000000000000122331506655355100167230ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Python-ModelCIF documentation build configuration file, created by # sphinx-quickstart on Thu Mar 1 14:05:33 2018. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath('..')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.intersphinx'] # Link to IHM docs or Python standard library intersphinx_mapping = {'ihm': ('https://python-ihm.readthedocs.io/en/latest/', None), 'python': ('https://docs.python.org/3', None)} # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # General information about the project. project = u'Python-ModelCIF' copyright = u'2021-2025, Benjamin Webb' author = u'Benjamin Webb' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = u'' # The full version, including alpha/beta/rc tags. release = u'' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # html_sidebars = {} # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. htmlhelp_basename = 'Python-ModelCIFdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'Python-ModelCIF.tex', u'Python-ModelCIF Documentation', u'Benjamin Webb', 'manual'), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'python-ma', u'Python-ModelCIF Documentation', [author], 1) ] # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'Python-ModelCIF', u'Python-ModelCIF Documentation', author, 'Python-ModelCIF', 'One line description of project.', 'Miscellaneous'), ] # Warn about broken links to classes, etc. nitpicky = True python-modelcif-1.5/docs/data.rst000066400000000000000000000003351506655355100170670ustar00rootroot00000000000000.. highlight:: rest .. _data_module: The :mod:`modelcif.data` Python module ====================================== .. automodule:: modelcif.data .. autoclass:: Data :members: .. autoclass:: DataGroup :members: python-modelcif-1.5/docs/descriptor.rst000066400000000000000000000007051506655355100203350ustar00rootroot00000000000000.. highlight:: rest .. _descriptor_module: The :mod:`modelcif.descriptor` Python module ============================================ .. automodule:: modelcif.descriptor .. autoclass:: Descriptor :members: .. autoclass:: CanonicalSMILES :members: .. autoclass:: IsomericSMILES :members: .. autoclass:: IUPACName :members: .. autoclass:: InChI :members: .. autoclass:: InChIKey :members: .. autoclass:: PubChemCID :members: python-modelcif-1.5/docs/dumper.rst000066400000000000000000000003251506655355100174510ustar00rootroot00000000000000.. highlight:: rest .. _dumper_module: The :mod:`modelcif.dumper` Python module ======================================== .. automodule:: modelcif.dumper .. autofunction:: write .. autoclass:: ModelCIFVariant python-modelcif-1.5/docs/index.rst000066400000000000000000000013101506655355100172570ustar00rootroot00000000000000Python-ModelCIF documentation ============================= This is a Python package to assist in handling mmCIF files compliant with the ModelCIF extension. The documentation below documents the library API. For complete worked examples, see `the examples directory at GitHub `_. Contents ======== .. toctree:: :maxdepth: 2 introduction usage limitations changes API Reference: .. toctree:: :maxdepth: 1 main reference alignment data protocol model qa_metric associated descriptor dumper reader Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` python-modelcif-1.5/docs/introduction.rst000066400000000000000000000033711506655355100207020ustar00rootroot00000000000000Introduction ************ This package provides a mechanism to describe the generation of a theoretical model (for example, via comparative or homology model) a set of Python objects. This includes, if applicable - the templates(s) used for the modeling; - the alignment between the template(s) and target sequence; - the protocol used to generate models, such as template search, modeling, and model selection; - the actual coordinates of output models; - grouping of multiple models; - quality scores for models and/or alignments. Once created, this set of Python objects can be written to an mmCIF file that is compliant with the `ModelCIF extension `_ to the `PDBx/mmCIF dictionary `_, suitable for deposition in a repository such as `ModelArchive `_. The files can be viewed in any regular PDBx mmCIF viewer, such as `UCSF ChimeraX `_ (although most viewers to date will only show the model coordinates, not the ModelCIF-specific metadata). This package leverages functionality provided by the `python-ihm `_ package, which functions similarly, building a Python hierarchy and then reading/writing mmCIF files. (However, python-ihm is used for integrative models, and reads or writes files compliant with the `integrative/hybrid modeling `_ extension dictionary.) In particular, many python-ihm classes and modules are used in this library, for example the :class:`ihm.Grant` and :class:`ihm.Citation` classes for handling grant or citation information, or the :mod:`ihm.dictionary` module for validating files against the mmCIF dictionary itself. python-modelcif-1.5/docs/limitations.rst000066400000000000000000000025041506655355100205120ustar00rootroot00000000000000.. _limitations: .. currentmodule:: modelcif Limitations *********** By design the library maps the PDBx/ModelCIF data model to its own hierarchy of Python objects. This hierarchy does not cover every possible mmCIF category; thus, the library is not necessarily preserving of file contents (e.g. if a file is read in and then a new file is written out, categories or data items from the original file not understood by the library will be missing in the new file). In particular, many PDBx categories pertaining to experimentally-determined structures are ignored. Also, the following ModelCIF categories are currently not supported: - ``ma_template_customized`` - ``ma_template_coord`` - ``ma_coevolution_seq_db_ref`` - ``ma_coevolution_msa`` - ``ma_coevolution_msa_details`` - ``ma_restraints`` - ``ma_distance_restraints`` - ``ma_angle_restraints`` - ``ma_dihedral_restraints`` - ``ma_restraints_group`` - ``ma_poly_template_library_details`` - ``ma_poly_template_library_list`` - ``ma_poly_template_library_components`` Note that currently the library is only well-tested with single-chain homology models generated from a single template (such as those in the `ModBase database `_). Please `open an issue `_ if library support is lacking. python-modelcif-1.5/docs/main.rst000066400000000000000000000021271506655355100171030ustar00rootroot00000000000000.. highlight:: rest .. _main_module: The :mod:`modelcif` Python module ================================= .. automodule:: modelcif .. autoclass:: System :members: .. autoclass:: Database :members: .. autoclass:: Software :members: .. autoclass:: SoftwareGroup :members: .. autoclass:: SoftwareWithParameters :members: .. autoclass:: SoftwareParameter :members: .. autoclass:: Entity :members: .. autoclass:: AsymUnit :members: .. autoclass:: WaterAsymUnit :members: .. autoclass:: NonPolymerFromTemplate :members: .. autoclass:: Residue :members: .. autoclass:: Assembly :members: .. autoclass:: AsymUnitRange :members: .. autoclass:: Transformation :members: .. autoclass:: TemplateSegment :members: .. autoclass:: Template :members: .. autoclass:: CustomTemplate :members: .. autoclass:: TemplateAtom :members: .. autoclass:: ReferenceDatabase :members: .. autoclass:: Feature :members: .. autoclass:: AtomFeature :members: .. autoclass:: PolyResidueFeature :members: .. autoclass:: EntityInstanceFeature :members: python-modelcif-1.5/docs/make.bat000066400000000000000000000014561506655355100170360ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build set SPHINXPROJ=Python-IHM if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd python-modelcif-1.5/docs/model.rst000066400000000000000000000005501506655355100172550ustar00rootroot00000000000000.. highlight:: rest .. _model_module: The :mod:`modelcif.model` Python module ======================================= .. automodule:: modelcif.model .. autoclass:: Atom :members: .. autoclass:: Model :members: .. autoclass:: HomologyModel .. autoclass:: AbInitioModel .. autoclass:: NotModeledResidueRange .. autoclass:: ModelGroup :members: python-modelcif-1.5/docs/protocol.rst000066400000000000000000000011661506655355100200220ustar00rootroot00000000000000.. highlight:: rest .. _protocol_module: The :mod:`modelcif.protocol` Python module ========================================== .. automodule:: modelcif.protocol .. autoclass:: Step :members: .. autoclass:: TemplateSearchStep :members: .. autoclass:: TemplateSelectionStep :members: .. autoclass:: TargetTemplateAlignmentStep :members: .. autoclass:: CoevolutionMSAStep :members: .. autoclass:: ContactPredictionStep :members: .. autoclass:: ModelingStep :members: .. autoclass:: ModelSelectionStep :members: .. autoclass:: ModelRefinementStep :members: .. autoclass:: Protocol :members: python-modelcif-1.5/docs/qa_metric.rst000066400000000000000000000014441506655355100201240ustar00rootroot00000000000000.. highlight:: rest .. _qa_metric_module: The :mod:`modelcif.qa_metric` Python module =========================================== .. automodule:: modelcif.qa_metric .. autoclass:: MetricMode :members: .. autoclass:: Global :members: .. autoclass:: Local :members: .. autoclass:: LocalPairwise :members: .. autoclass:: Feature :members: .. autoclass:: FeaturePairwise :members: .. autoclass:: MetricType :members: .. autoclass:: ZScore .. autoclass:: Energy .. autoclass:: Distance .. autoclass:: NormalizedScore .. autoclass:: PAE .. autoclass:: ContactProbability .. autoclass:: PLDDT .. autoclass:: PLDDT01 .. autoclass:: PLDDTAllAtom .. autoclass:: PLDDTAllAtom01 .. autoclass:: PLDDTToPolymer .. autoclass:: PTM .. autoclass:: IpTM .. autoclass:: Boolean python-modelcif-1.5/docs/reader.rst000066400000000000000000000003241506655355100174160ustar00rootroot00000000000000.. highlight:: rest .. _reader_module: The :mod:`modelcif.reader` Python module ======================================== .. automodule:: modelcif.reader .. autofunction:: read .. autoclass:: ModelCIFVariant python-modelcif-1.5/docs/reference.rst000066400000000000000000000006571506655355100201230ustar00rootroot00000000000000.. highlight:: rest .. _reference_module: The :mod:`modelcif.reference` Python module =========================================== .. automodule:: modelcif.reference .. autoclass:: TargetReference :members: .. autoclass:: UniProt .. autoclass:: Alignment :members: .. autoclass:: SeqDif :members: .. autoclass:: TemplateReference :members: .. autoclass:: PDB .. autoclass:: AlphaFoldDB .. autoclass:: PubChem python-modelcif-1.5/docs/requirements.txt000066400000000000000000000000141506655355100207020ustar00rootroot00000000000000ihm >= 0.27 python-modelcif-1.5/docs/usage.rst000066400000000000000000000127251506655355100172700ustar00rootroot00000000000000Usage ***** Usage of the library for output consists of first creating a hierarchy of Python objects that together describe the system, and then dumping that hierarchy to an mmCIF or BinaryCIF file. For complete worked examples, see the `ModBase example `_ or the `ligands example `_. The top level of the hierarchy is the :class:`modelcif.System`. All other objects are referenced from a System object (either directly or via another object that is referenced by the System). System architecture =================== The architecture of the system is described with a number of classes: - :class:`modelcif.Entity` describes each unique sequence (used in the target model, in one or more templates, or both). - :class:`modelcif.AsymUnit` describes each asymmetric unit (chain) in the target model. For example, a homodimer would consist of two asymmetric units, both pointing to the same entity, while a heterodimer contains two entities. - Similarly, :class:`modelcif.Template` describes a chain used as a template. - :class:`modelcif.Assembly` groups asymmetric units, or parts of them. Assemblies are used to describe which parts of the system were modeled. - A variety of classes in the :mod:`modelcif.alignment` module can be used to describe alignments between the target and one or more templates. Modeling protocol ================= :class:`modelcif.protocol.Protocol` objects describe how models were generated from the input data. A protocol can consist of :class:`multiple steps `, such as template search, alignment, modeling, and model selection. These objects also describe what was used as input and what was generated on output by each step, as one or more :class:`modelcif.data.Data` objects. Model coordinates ================= :class:`modelcif.model.Model` objects give the actual coordinates of the final generated models. These point to the :class:`~modelcif.Assembly` of what was modeled. Quality scores can also be assigned to each model (see the :mod:`modelcif.qa_metric` module) or to individual residues or pairs of residues. Models can also be grouped together for any purpose using the :class:`modelcif.model.ModelGroup` class. Metadata ======== Metadata can also be added to the system, such as - :attr:`modelcif.System.citations`: publication(s) that describe this modeling or the methods used in it. - :class:`modelcif.Software`: software packages used at any stage in the modeling. - :attr:`modelcif.System.grants`: funding support for the modeling. - :class:`modelcif.reference.TemplateReference`: or :class:`modelcif.reference.TargetReference`: information on a template structure, or a target sequence. Residue numbering ================= The library keeps track of several numbering schemes to reflect the reality of the data used in modeling: - *Internal numbering*. Residues are always numbered sequentially starting at 1 in an :class:`~modelcif.Entity`. All references to residues or residue ranges in the library use this numbering. - *Author-provided numbering*. If a different numbering scheme is used by the authors, for example to correspond to the numbering of the original sequence that is modeled, this can be given as an author-provided numbering for one or more asymmetric units. See the ``auth_seq_id_map`` parameter to :class:`~modelcif.AsymUnit`. (The mapping between author-provided and internal numbering is given in the ``pdbx_poly_seq_scheme`` table in the mmCIF file.) Output ====== Once the hierarchy of classes is complete, it can be freely inspected or modified. All the classes are simple lightweight Python objects, generally with the relevant data available as member variables. The complete hierarchy can be written out to an mmCIF or BinaryCIF file using the :func:`modelcif.dumper.write` function. Input ===== Hierarchies of classes can also be read from mmCIF or BinaryCIF files. This is done using the :func:`modelcif.reader.read` function, which returns a list of :class:`modelcif.System` objects. Format conversion ================= The library can be employed to easily convert a ModelCIF file between mmCIF and BinaryCIF format by simply reading in one format and then writing in another. See the `convert_bcif example `_. Conversion from legacy PDB format to mmCIF or BinaryCIF is not generally possible because PDB format has no defined standard for including information about modeling protocols, alignments, and so on. This extra information must be deduced from other sources, for example custom PDB REMARK records or separate files, and provided to the library. For reference, a script that uses the library to convert `ModBase `_ models from PDB format to mmCIF can be `seen here `_. Validation ========== The library is designed to generate files that are consistent with the `PDBx `_ and `ModelCIF `_ dictionaries by construction. However, the library can also be used to validate ModelCIF (or other mmCIF/BinaryCIF files) if desired; see the `validator example `_. python-modelcif-1.5/examples/000077500000000000000000000000001506655355100163115ustar00rootroot00000000000000python-modelcif-1.5/examples/README.md000066400000000000000000000027211506655355100175720ustar00rootroot00000000000000These examples demonstrate some of the functionality of the python-modelcif software. See [the documentation](https://python-modelcif.readthedocs.io/) for more information. - [convert\_bcif.py](convert_bcif.py) converts an mmCIF file containing ModelCIF data to [BinaryCIF](https://github.com/molstar/BinaryCIF) format. - [ligands.py](ligands.py) demonstrates writing an mmCIF file of a typical single-template homology or comparative model, including a ligand. - [mkmodbase.py](mkmodbase.py) demonstrates using the library to make an mmCIF file of a typical single-template single-chain homology or comparative model, similar to those generated by [MODELLER](https://salilab.org/modeller/) and deposited in the [ModBase database](https://modbase.compbio.ucsf.edu/). - [associated.py](associated.py) demonstrates reading a deposition from [ModelArchive](https://www.modelarchive.org/) that has been split into multiple mmCIF files. - [validate\_mmcif.py](validate_mmcif.py) demonstrates the use of the library's validator to validate a user-provided input mmCIF file against the [ModelCIF dictionary](https://mmcif.wwpdb.org/dictionaries/mmcif_ma.dic/Index/). - [validate\_modbase.py](validate_modbase.py) demonstrates downloading an mmCIF structure from the [ModBase database](https://modbase.compbio.ucsf.edu/) and validating it against the [ModelCIF dictionary](https://mmcif.wwpdb.org/dictionaries/mmcif_ma.dic/Index/) for compliance. python-modelcif-1.5/examples/associated.py000066400000000000000000000037501506655355100210070ustar00rootroot00000000000000# This example demonstrates reading ModelCIF "associated" files. # Some repositories, such as ModelArchive, split the deposited data into # multiple mmCIF files, putting some of the quality score information not # into the main mmCIF file but into a separate "associated" file. # These associated files are referenced in the main file # (see System.repositories) so we can programmatically download and # extract them. # This example requires Python 3. import urllib.request import zipfile import tempfile import shutil import modelcif.reader # Get any associated files containing pairwise QA scores def _get_zip_scores_files(s): for repo in s.repositories: for f in repo.files: if isinstance(f, modelcif.associated.ZipFile): for zf in f.files: if isinstance( zf, modelcif.associated.QAMetricsFile): yield zf, f, repo # Download entry ma-bak-cepc-0944 directly from ModelArchive url = "https://www.modelarchive.org/doi/10.5452/ma-bak-cepc-0944.cif" with urllib.request.urlopen(url) as fh: s, = modelcif.reader.read(fh) # Get any referenced associated files containing QA scores. For ModelArchive, # these are stored in an mmCIF file that is then compressed into a zip file for scores, archive, repo in _get_zip_scores_files(s): url = repo.get_url(archive) # Download the referenced zip file directly from ModelArchive with urllib.request.urlopen(repo.get_url(archive)) as f_url: with tempfile.NamedTemporaryFile() as f_zip: shutil.copyfileobj(f_url, f_zip) # Extract the scores file from the zip file with zipfile.ZipFile(f_zip) as zf: with zf.open(scores.path) as f_scores: # Add scores in the file to our existing System modelcif.reader.read(f_scores, add_to_system=s) for mg in s.model_groups: for m in mg: print("Model %s contains %d QA metrics" % (m, len(m.qa_metrics))) python-modelcif-1.5/examples/convert_bcif.py000066400000000000000000000007421506655355100213310ustar00rootroot00000000000000# This example demonstrates using the library to convert an mmCIF file # containing ModelCIF data to BinaryCIF format. # Import used classes. import modelcif import modelcif.dumper import modelcif.reader # Read in an existing mmCIF file: with open('input/ligands.cif') as fh: systems = modelcif.reader.read(fh, format='mmCIF') # Write a new BinaryCIF file containing the same data: with open('ligands.bcif', 'wb') as fh: modelcif.dumper.write(fh, systems, format='BCIF') python-modelcif-1.5/examples/input/000077500000000000000000000000001506655355100174505ustar00rootroot00000000000000python-modelcif-1.5/examples/input/ligands.cif000066400000000000000000000201401506655355100215510ustar00rootroot00000000000000data_model _entry.id model _struct.entry_id model _struct.pdbx_model_details . _struct.pdbx_structure_determination_methodology computational _struct.title 'Ligand example' _audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/557bda7/base/mmcif_ma-core.dic _audit_conform.dict_name mmcif_ma.dic _audit_conform.dict_version 1.4.1 # loop_ _chem_comp.id _chem_comp.type _chem_comp.name _chem_comp.formula _chem_comp.formula_weight _chem_comp.ma_provenance ALA 'L-peptide linking' ALANINE 'C3 H7 N O2' 89.094 'CCD Core' ASN 'L-peptide linking' ASPARAGINE 'C4 H8 N2 O3' 132.119 'CCD Core' ASP 'L-peptide linking' 'ASPARTIC ACID' 'C4 H7 N O4' 133.103 'CCD Core' CYS 'L-peptide linking' CYSTEINE 'C3 H7 N O2 S' 121.154 'CCD Core' ILE 'L-peptide linking' ISOLEUCINE 'C6 H13 N O2' 131.175 'CCD Core' LYS 'L-peptide linking' LYSINE 'C6 H15 N2 O2 1' 147.198 'CCD Core' PHE 'L-peptide linking' PHENYLALANINE 'C9 H11 N O2' 165.192 'CCD Core' SER 'L-peptide linking' SERINE 'C3 H7 N O3' 105.093 'CCD Core' SF4 non-polymer 'IRON/SULFUR CLUSTER' 'Fe4 S4' 351.620 'CCD Core' THR 'L-peptide linking' THREONINE 'C4 H9 N O3' 119.120 'CCD Core' TYR 'L-peptide linking' TYROSINE 'C9 H11 N O3' 181.191 'CCD Core' VAL 'L-peptide linking' VALINE 'C5 H11 N O2' 117.148 'CCD Core' # # loop_ _entity.id _entity.type _entity.src_method _entity.pdbx_description _entity.formula_weight _entity.pdbx_number_of_molecules _entity.details 1 polymer man 'Model subunit' 1230.346 1 . 2 non-polymer man 'IRON/SULFUR CLUSTER' 351.620 1 . # # loop_ _entity_poly.entity_id _entity_poly.type _entity_poly.nstd_linkage _entity_poly.nstd_monomer _entity_poly.pdbx_strand_id _entity_poly.pdbx_seq_one_letter_code _entity_poly.pdbx_seq_one_letter_code_can 1 polypeptide(L) no no A AYVINDSCIA AYVINDSCIA # # loop_ _pdbx_entity_nonpoly.entity_id _pdbx_entity_nonpoly.name _pdbx_entity_nonpoly.comp_id _pdbx_entity_nonpoly.ma_model_mode 2 'IRON/SULFUR CLUSTER' SF4 implicit # # loop_ _entity_poly_seq.entity_id _entity_poly_seq.num _entity_poly_seq.mon_id _entity_poly_seq.hetero 1 1 ALA . 1 2 TYR . 1 3 VAL . 1 4 ILE . 1 5 ASN . 1 6 ASP . 1 7 SER . 1 8 CYS . 1 9 ILE . 1 10 ALA . # # loop_ _struct_asym.id _struct_asym.entity_id _struct_asym.details A 1 'Model subunit A' B 2 'Model subunit B' # # loop_ _pdbx_poly_seq_scheme.asym_id _pdbx_poly_seq_scheme.entity_id _pdbx_poly_seq_scheme.seq_id _pdbx_poly_seq_scheme.mon_id _pdbx_poly_seq_scheme.pdb_seq_num _pdbx_poly_seq_scheme.auth_seq_num _pdbx_poly_seq_scheme.pdb_mon_id _pdbx_poly_seq_scheme.auth_mon_id _pdbx_poly_seq_scheme.pdb_strand_id _pdbx_poly_seq_scheme.pdb_ins_code A 1 1 ALA 1 1 ALA ALA A . A 1 2 TYR 2 2 TYR TYR A . A 1 3 VAL 3 3 VAL VAL A . A 1 4 ILE 4 4 ILE ILE A . A 1 5 ASN 5 5 ASN ASN A . A 1 6 ASP 6 6 ASP ASP A . A 1 7 SER 7 7 SER SER A . A 1 8 CYS 8 8 CYS CYS A . A 1 9 ILE 9 9 ILE ILE A . A 1 10 ALA 10 10 ALA ALA A . # # loop_ _pdbx_nonpoly_scheme.asym_id _pdbx_nonpoly_scheme.entity_id _pdbx_nonpoly_scheme.mon_id _pdbx_nonpoly_scheme.ndb_seq_num _pdbx_nonpoly_scheme.pdb_seq_num _pdbx_nonpoly_scheme.auth_seq_num _pdbx_nonpoly_scheme.auth_mon_id _pdbx_nonpoly_scheme.pdb_strand_id _pdbx_nonpoly_scheme.pdb_ins_code B 2 SF4 1 1 1 SF4 B . # # loop_ _ma_data.id _ma_data.name _ma_data.content_type _ma_data.content_type_other_details 1 'Template polymer' 'template structure' . 2 'Template ligand' 'template structure' . 3 'Model subunit' target . 4 'IRON/SULFUR CLUSTER' target . 5 'Modeling alignment' 'target-template alignment' . 6 'Best scoring model' 'model coordinates' . # # loop_ _ma_data_group.ordinal_id _ma_data_group.group_id _ma_data_group.data_id 1 1 5 2 2 6 # # loop_ _ma_target_entity.entity_id _ma_target_entity.data_id _ma_target_entity.origin 1 3 designed 2 4 designed # # loop_ _ma_target_entity_instance.asym_id _ma_target_entity_instance.entity_id _ma_target_entity_instance.details A 1 'Model subunit A' B 2 'Model subunit B' # # loop_ _ma_template_trans_matrix.id _ma_template_trans_matrix.rot_matrix[1][1] _ma_template_trans_matrix.rot_matrix[2][1] _ma_template_trans_matrix.rot_matrix[3][1] _ma_template_trans_matrix.rot_matrix[1][2] _ma_template_trans_matrix.rot_matrix[2][2] _ma_template_trans_matrix.rot_matrix[3][2] _ma_template_trans_matrix.rot_matrix[1][3] _ma_template_trans_matrix.rot_matrix[2][3] _ma_template_trans_matrix.rot_matrix[3][3] _ma_template_trans_matrix.tr_vector[1] _ma_template_trans_matrix.tr_vector[2] _ma_template_trans_matrix.tr_vector[3] 1 1.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 1.000000 0 0 0 # # loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 1 'reference database' polymer 1 1 A A . 1 A 2 2 'reference database' non-polymer 1 2 B B . 1 B # # loop_ _ma_template_poly.template_id _ma_template_poly.seq_one_letter_code _ma_template_poly.seq_one_letter_code_can 1 AFVVTDNCIKCK AFVVTDNCIKCK # # loop_ _ma_template_poly_segment.id _ma_template_poly_segment.template_id _ma_template_poly_segment.residue_number_begin _ma_template_poly_segment.residue_number_end 1 1 1 12 # # loop_ _ma_template_non_poly.template_id _ma_template_non_poly.comp_id _ma_template_non_poly.details 2 SF4 'IRON/SULFUR CLUSTER' # # loop_ _ma_template_ref_db_details.template_id _ma_template_ref_db_details.db_name _ma_template_ref_db_details.db_name_other_details _ma_template_ref_db_details.db_accession_code _ma_template_ref_db_details.db_version_date 1 PDB . 5fd1 . 2 PDB . 5fd1 . # # loop_ _ma_target_template_poly_mapping.id _ma_target_template_poly_mapping.template_segment_id _ma_target_template_poly_mapping.target_asym_id _ma_target_template_poly_mapping.target_seq_id_begin _ma_target_template_poly_mapping.target_seq_id_end 1 1 A 1 10 # # loop_ _ma_alignment_info.alignment_id _ma_alignment_info.data_id _ma_alignment_info.software_group_id _ma_alignment_info.alignment_length _ma_alignment_info.alignment_type _ma_alignment_info.alignment_mode 1 5 . 12 'target-template pairwise alignment' global # # loop_ _ma_alignment_details.ordinal_id _ma_alignment_details.alignment_id _ma_alignment_details.template_segment_id _ma_alignment_details.target_asym_id _ma_alignment_details.score_type _ma_alignment_details.score_type_other_details _ma_alignment_details.score_value _ma_alignment_details.percent_sequence_identity _ma_alignment_details.sequence_identity_denominator _ma_alignment_details.sequence_identity_denominator_other_details 1 1 1 A 'BLAST e-value' . 1e-15 45.000 'Length of the shorter sequence' . # # loop_ _ma_alignment.ordinal_id _ma_alignment.alignment_id _ma_alignment.target_template_flag _ma_alignment.sequence 1 1 1 AYVINDSC--IA 2 1 2 AFVVTDNCIKCK # # loop_ _ma_protocol_step.ordinal_id _ma_protocol_step.protocol_id _ma_protocol_step.step_id _ma_protocol_step.method_type _ma_protocol_step.step_name _ma_protocol_step.details _ma_protocol_step.software_group_id _ma_protocol_step.input_data_group_id _ma_protocol_step.output_data_group_id 1 1 1 modeling . . . 1 2 # # loop_ _ma_model_list.ordinal_id _ma_model_list.model_id _ma_model_list.model_group_id _ma_model_list.model_name _ma_model_list.model_group_name _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 1 1 'Best scoring model' 'All models' 6 'Homology model' . # # loop_ _atom_site.group_PDB _atom_site.id _atom_site.type_symbol _atom_site.label_atom_id _atom_site.label_alt_id _atom_site.label_comp_id _atom_site.label_seq_id _atom_site.auth_seq_id _atom_site.pdbx_PDB_ins_code _atom_site.label_asym_id _atom_site.Cartn_x _atom_site.Cartn_y _atom_site.Cartn_z _atom_site.occupancy _atom_site.label_entity_id _atom_site.auth_asym_id _atom_site.B_iso_or_equiv _atom_site.pdbx_PDB_model_num ATOM 1 C CA . ALA 1 1 ? A 1.000 2.000 3.000 . 1 A . 1 ATOM 2 C CA . TYR 2 2 ? A 4.000 5.000 6.000 . 1 A . 1 ATOM 3 C CA . VAL 3 3 ? A 7.000 8.000 9.000 . 1 A . 1 HETATM 4 FE FE . SF4 . 1 ? B 10.000 10.000 10.000 . 2 B . 1 # # loop_ _atom_type.symbol C FE # python-modelcif-1.5/examples/ligands.py000066400000000000000000000076051506655355100203140ustar00rootroot00000000000000# This example demonstrates writing an mmCIF file of a typical # single-template homology or comparative model, including a ligand. # # This is very similar to the mkmodbase.py example; see that example for # more details. # Import used classes import modelcif import modelcif.model import modelcif.dumper import modelcif.reference import modelcif.protocol import modelcif.alignment from modelcif.alignment import ShorterSequenceIdentity as SequenceIdentity import ihm system = modelcif.System(title='Ligand example') # Describe the amino acid (polymer) sequences as Entity objects, for both # template and model: template_e = modelcif.Entity('AFVVTDNCIKCK', description='Template subunit') model_e = modelcif.Entity('AYVINDSCIA', description='Model subunit') # For non-polymers (e.g. ligands) we need to describe the chemistry of the # ligand as a chemical component object, then create an Entity using that # component. We only need to do this once because the ligand is the same # in both template and model: sf4 = ihm.NonPolymerChemComp("SF4", name='IRON/SULFUR CLUSTER', formula='Fe4 S4') ligand_e = modelcif.Entity([sf4], description='IRON/SULFUR CLUSTER') # Create a Template for each chain (amino acids in chain A, ligand in chain B) # and point to the original PDB, 5fd1: s = modelcif.reference.PDB('5fd1') templateA = modelcif.Template( entity=template_e, asym_id='A', model_num=1, name="Template polymer", transformation=modelcif.Transformation.identity(), references=[s]) templateB = modelcif.Template( entity=ligand_e, asym_id='B', model_num=1, name='Template ligand', transformation=modelcif.Transformation.identity(), references=[s]) # Define the model assembly, as two AsymUnits. NonPolymerFromTemplate is a # subclass of AsymUnit that additionally notes the Template from which it # was derived. In this case we state that the ligand was simply copied from # the template into the target (explicit=False): asymA = modelcif.AsymUnit(model_e, details='Model subunit A', id='A') asymB = modelcif.NonPolymerFromTemplate(template=templateB, explicit=False, details='Model subunit B', id='B') modeled_assembly = modelcif.Assembly((asymA, asymB), name='Modeled assembly') # For the amino acid chain, add the modeling alignment, just as in the # mkmodbase.py example: class Alignment(modelcif.alignment.Global, modelcif.alignment.Pairwise): pass p = modelcif.alignment.Pair( template=templateA.segment("AFVVTDNCIKCK", 1, 12), target=asymA.segment("AYVINDSC--IA", 1, 10), score=modelcif.alignment.BLASTEValue(1e-15), identity=SequenceIdentity(45.0)) aln = Alignment(name="Modeling alignment", pairs=[p]) system.alignments.append(aln) # Add model coordinates, similarly to the mkmodbase.py example. # Note that nonpolymers are not "sequences" and so seq_id=None. atoms = [('A', 1, 'C', 'CA', 1., 2., 3.), ('A', 2, 'C', 'CA', 4., 5., 6.), ('A', 3, 'C', 'CA', 7., 8., 9.), ('B', None, 'FE', 'FE', 10., 10., 10.)] class MyModel(modelcif.model.HomologyModel): asym_unit_map = {'A': asymA, 'B': asymB} def get_atoms(self): for asym, seq_id, type_symbol, atom_id, x, y, z in atoms: yield modelcif.model.Atom( asym_unit=self.asym_unit_map[asym], type_symbol=type_symbol, seq_id=seq_id, atom_id=atom_id, x=x, y=y, z=z, het=seq_id is None) # Add the model and modeling protocol to the file and write them out: model = MyModel(assembly=modeled_assembly, name='Best scoring model') model_group = modelcif.model.ModelGroup([model], name='All models') system.model_groups.append(model_group) protocol = modelcif.protocol.Protocol() protocol.steps.append(modelcif.protocol.ModelingStep( input_data=aln, output_data=model)) system.protocols.append(protocol) with open('output.cif', 'w') as fh: modelcif.dumper.write(fh, [system]) python-modelcif-1.5/examples/mkmodbase.py000066400000000000000000000217771506655355100206430ustar00rootroot00000000000000# This example demonstrates using the library to make an mmCIF file of a # typical single-template single-chain homology or comparative model, similar # to those generated by MODELLER (https://salilab.org/modeller/) and deposited # in the ModBase database (https://modbase.compbio.ucsf.edu/) # For a more complete (but less documented) script to convert a complete # ModBase PDB file into a corresponding mmCIF or BinaryCIF file, see # https://github.com/salilab/modbase_utils/blob/main/modbase_pdb_to_cif.py # Import used classes. import modelcif import modelcif.protocol import modelcif.model import modelcif.dumper import modelcif.reference import modelcif.qa_metric import modelcif.alignment # Different methods measure "sequence identity" in different ways, so import # the class that matches the way Modeller understands it (number of identical # aligned residues, divided by the length of the shorter sequence) from modelcif.alignment import ShorterSequenceIdentity as SequenceIdentity import ihm.citations import modelcif.reader # First, we create a system, which contains everything we know about the # modeling. A single mmCIF file can contain multiple Systems, but in most # cases we use just one: system = modelcif.System(title='S54091 hypothetical protein YPR070w') # List the authors of this file (here these are the ModBase authors) system.authors.extend(('Pieper U', 'Webb B', 'Narayanan E', 'Sali A')) # Describe the software that was used in the modeling modpipe_software = modelcif.Software( name='ModPipe', classification='comparative modeling', location='https://salilab.org/modpipe/', type='program', version='SVN.r1703', description='Comparative modeling pipeline') # Every object we create must ultimately be linked to the System, which # maintains simple lists for each type of object. For example, there is a # list system.software (like system.authors above) which can be used for # any Software object not referenced by any other object. But in this case # we're going to use these Software objects further on in the script, so # don't need to explicitly add them here. modeller_software = modelcif.Software( name='MODELLER', classification='comparative modeling', location='https://salilab.org/modeller/', type='program', version='SVN', citation=ihm.citations.modeller, description='Comparative modeling by satisfaction of spatial restraints') # Next, we define "entities", unique sequences in the system, as Entity # objects. First, the template sequence: template_e = modelcif.Entity('DMACDTFIKCC', description='Template subunit') # Next, the target (model) sequence, together with a link to the reference # sequence (in UniProt): s = modelcif.reference.UniProt(code='MED1_YEAST', accession='Q12321', sequence='DSYVETLDCC') model_e = modelcif.Entity('DSYVETLDCC', description='Model subunit', references=[s]) # Next, we define asymmetric units for everything we modeled. # These roughly correspond to chains in a traditional PDB file. Multiple # asymmetric units may map to the same entity (for example if there are # several copies of a given protein). asymA = modelcif.AsymUnit(model_e, details='Model subunit A', id='A') # Next, we group asymmetric units into assemblies. modeled_assembly = modelcif.Assembly((asymA,), name='Modeled assembly') # In a similar fashion, we declare a Template for each chain that we used # as a template structure, with a link to the reference structure database # (PDB). s = modelcif.reference.PDB('3nc1') template = modelcif.Template( entity=template_e, asym_id='A', model_num=1, name="Template Structure", transformation=modelcif.Transformation.identity(), references=[s]) # Now, we describe the alignment between target and template. # python-ma provides various subclasses to use here. All ModBase structures # use a simple pairwise global alignment between target and template, so # declare a suitable class: class Alignment(modelcif.alignment.Global, modelcif.alignment.Pairwise): pass # An alignment consists of a list of aligned target-template segments. # Here we provide the residue ranges and the actual alignment, including gaps, # between the two, together with the sequence identity and any score available # for the alignment (here we have the BLAST e-value): p = modelcif.alignment.Pair( template=template.segment("DMACDTFIK", 1, 9), target=asymA.segment("DSYV-ETLD", 1, 8), score=modelcif.alignment.BLASTEValue(1e-15), identity=SequenceIdentity(45.0)) aln = Alignment(name="Modeling alignment", software=modpipe_software, pairs=[p]) # Alignments aren't used by any objects; they should be added directly # to the System: system.alignments.append(aln) # For the actual model coordinates, we must subclass a suitable class and # override the get_atoms() method to return a list of Atom objects. This design # avoids having a separate copy of every atom in memory. # Modeller models are comparative or homology models, so we subclass # HomologyModel. For the purposes of this example, we just return a simple # static list of atoms: atoms = [('A', 1, 'C', 'CA', 1., 2., 3.), ('A', 2, 'C', 'CA', 4., 5., 6.), ('A', 3, 'C', 'CA', 7., 8., 9.), ('A', 4, 'C', 'CA', 10., 11., 12.)] class MyModel(modelcif.model.HomologyModel): # Map our asym unit names to ModelCIF asym_unit objects: asym_unit_map = {'A': asymA} def get_atoms(self): for asym, seq_id, type_symbol, atom_id, x, y, z in atoms: yield modelcif.model.Atom( asym_unit=self.asym_unit_map[asym], type_symbol=type_symbol, seq_id=seq_id, atom_id=atom_id, x=x, y=y, z=z) # Link the model to the Assembly that describes all subunits model = MyModel(assembly=modeled_assembly, name='Best scoring model') # Next, we describe the modeling protocol: protocol = modelcif.protocol.Protocol() protocol.steps.append(modelcif.protocol.TemplateSearchStep( name='ModPipe Seq-Prf (0001)', software=modpipe_software, input_data=model_e, output_data=aln)) protocol.steps.append(modelcif.protocol.ModelingStep( software=modeller_software, input_data=aln, output_data=model)) protocol.steps.append(modelcif.protocol.ModelSelectionStep( software=modpipe_software, input_data=model, output_data=model)) # Protocols aren't used by any other objects; they should be added directly # to the System: system.protocols.append(protocol) # We can also attach quality scores to our model(s). To do this we must # first define the scores by creating subclasses using a MetricMode # (e.g. global, per-residue) and a MetricType (e.g. distance, z-score). # Here we define the quality scores used by the ModPipe pipeline that is used # by ModBase. Note that one score (MPQS) uses a custom metric type, while # another (zDOPE) is a simple global z-score: class MPQSMetricType(modelcif.qa_metric.MetricType): """composite score, values >1.1 are considered reliable""" class MPQS(modelcif.qa_metric.Global, MPQSMetricType): """ModPipe Quality Score""" software = modpipe_software class zDOPE(modelcif.qa_metric.Global, modelcif.qa_metric.ZScore): """Normalized DOPE""" software = modeller_software class TSVModRMSD(modelcif.qa_metric.Global, modelcif.qa_metric.Distance): """TSVMod predicted RMSD (MSALL)""" software = None class TSVModNO35(modelcif.qa_metric.Global, modelcif.qa_metric.NormalizedScore): """TSVMod predicted native overlap (MSALL)""" software = None # Add qa metrics to the model model.qa_metrics.extend((MPQS(0.853452), zDOPE(0.31), TSVModRMSD(12.996), TSVModNO35(0.143))) # All ModBase QA metrics are global, but the library also supports per-residue # or pairwise (between two residues) scores. Here's a fictional example for a # z-score on the 4th residue of the first chain in the model, and a distance # score between the 1st and 3rd residues: class SomeLocalScore(modelcif.qa_metric.Local, modelcif.qa_metric.ZScore): """A per-residue z-score""" software = None class SomePairScore(modelcif.qa_metric.LocalPairwise, modelcif.qa_metric.Distance): """A distance score between two residues""" software = None model.qa_metrics.append(SomeLocalScore(asymA.residue(4), -0.1)) model.qa_metrics.append(SomePairScore(asymA.residue(1), asymA.residue(3), 1.0)) # Models should be grouped together using ModelGroup and then added to the # top-level System. Here we only have a single model in the group: model_group = modelcif.model.ModelGroup([model], name='All models') system.model_groups.append(model_group) # Once the system is complete, we can write it out to an mmCIF file: with open('output.cif', 'w') as fh: modelcif.dumper.write(fh, [system]) # We can also *read* an mmCIF file and create a set of Python objects from it. # Here we read in the file we just created: with open('output.cif') as fh: s, = modelcif.reader.read(fh) for t in s.templates: print(t.name, "-".join(c.id for c in t.entity.sequence)) for e in s.entities: print(e.description, "-".join(c.id for c in e.sequence)) python-modelcif-1.5/examples/validate_mmcif.py000077500000000000000000000016111506655355100216310ustar00rootroot00000000000000# This example demonstrates the use of the Python IHM library's validator # to validate a user-provided input mmCIF file. # See also validate_modbase.py for a more detailed example. import sys import ihm.dictionary import urllib.request if len(sys.argv) != 2: print("Usage: %s input.cif" % sys.argv[0], file=sys.stderr) sys.exit(1) fname = sys.argv[1] # Read in the ModelCIF and PDBx dictionary from https://mmcif.wwpdb.org/ with urllib.request.urlopen( 'https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_ma.dic') as fh: pdbx_mcif = ihm.dictionary.read(fh) # Validate the mmCIF file assuming it is UTF8 encoded. # See validate_modbase.py for code to fallback to ASCII for non-UTF8 files. with open(fname, encoding='UTF-8') as fh: pdbx_mcif.validate(fh) # Similarly, to validate a BinaryCIF file, use: # with open(fname, 'rb') as fh: # pdbx_mcif.validate(fh, format='BCIF') python-modelcif-1.5/examples/validate_modbase.py000066400000000000000000000027571506655355100221610ustar00rootroot00000000000000# This example demonstrates the use of the Python IHM library's validator. # A structure is downloaded from the ModBase database and checked against # the ModelCIF dictionary for compliance. This validator can be used # to perform basic integrity checking against any mmCIF dictionary. # See also validate_mmcif.py for a simpler script to validate a # user-provided mmCIF file. import io import ihm.dictionary import urllib.request # Read in the ModelCIF dictionary from wwPDB as a Dictionary object. # Note that the ModelCIF dictionary also includes the PDBx dictionary, # so we don't need to read that in separately fh = urllib.request.urlopen( 'https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_ma.dic') pdbx_mc = ihm.dictionary.read(fh) fh.close() # Validate a structure against PDBx+ModelCIF. # A correct structure here should result in no output; an invalid structure # will result in a ValidatorError Python exception. # Here, a structure from ModBase (which should be valid) is used. acc = 'P21812' cif = urllib.request.urlopen('https://salilab.org/modbase/retrieve' '?databaseID=%s&format=mmcif' % acc).read() # The encoding for mmCIF files isn't strictly defined, so first try UTF-8 # and if that fails, strip out any non-ASCII characters. This ensures that # we handle accented characters in string fields correctly. try: fh = io.StringIO(cif.decode('utf-8')) except UnicodeDecodeError: fh = io.StringIO(cif.decode('ascii', errors='ignore')) pdbx_mc.validate(fh) python-modelcif-1.5/make-release.sh000077500000000000000000000026161506655355100173720ustar00rootroot00000000000000#!/bin/bash -e # First, do # - Check spelling with # codespell . --skip '*.cif' -L assertIn # - Update AuditConformDumper to match latest MA dictionary if necessary # - Run util/validate-outputs.py to make sure all example outputs validate # (cd util; PYTHONPATH=.. python3 ./validate-outputs.py) # - Run util/check-db-entries.py to check against some real archive structures # (cd util; PYTHONPATH=.. python3 check-db-entries.py) # - If we need a newer python-ihm, update the version requirement in # requirements.txt, setup.py, pyproject.toml, util/python-modelcif.spec, # and README.md. # - Update ChangeLog.rst and util/python-modelcif.spec with the release number # - Update release number in modelcif/__init__.py, setup.py and pyproject.toml # - Commit, tag, and push # - Make release on GitHub # - Upload the release tarball from # https://github.com/ihmwg/python-modelcif/releases to Zenodo as a new release # - Make sure there are no extraneous .py files (setup.py will include them # in the pypi package) VERSION=$(python3 setup.py --version) python3 setup.py sdist echo "Now use 'twine upload dist/modelcif-${VERSION}.tar.gz' to publish the release on PyPi" echo "Then, update the conda-forge and COPR packages to match." echo "For COPR, use dist/modelcif-${VERSION}.tar.gz together with util/python-modelcif.spec" echo "For conda-forge, make sure the correct version of python-ihm is required" python-modelcif-1.5/modelcif/000077500000000000000000000000001506655355100162555ustar00rootroot00000000000000python-modelcif-1.5/modelcif/__init__.py000066400000000000000000000766131506655355100204030ustar00rootroot00000000000000import itertools import warnings import ihm from ihm import Entity, AsymUnit, Software, Assembly, Residue # noqa: F401 from ihm import WaterAsymUnit, AsymUnitRange, _remove_identical # noqa: F401 import modelcif.data __version__ = '1.5' class System: """Top-level class representing a complete modeled system. :param str title: Longer text description of the system. :param str id: Unique identifier for this system in the mmCIF file. :param database: If this system is part of an official database (e.g. SwissModel, ModBase), details of the database identifiers. :type database: :class:`Database` :param str model_details: Detailed description of the system, like an abstract. The system contains a number of simple flat lists of various objects, for example :attr:`alignments`. After constructing objects they should usually be added to these lists so that a hierarchy of classes is formed and is ultimately written out to mmCIF/BinaryCIF. After reading a file the resulting ``System`` object will also populate these lists. Most objects do not need to be explicitly added to the system since they are referenced by other objects. For example :class:`Template` objects are not usually added to the system because they are added to alignments which in turn are added to the system. If however an "orphan" Template is desired (not part of an alignment) the system does maintain an appropriate list (``System.templates`` in this case) to which it can be added. """ structure_determination_methodology = "computational" def __init__(self, title=None, id='model', database=None, model_details=None): self.id, self.title = id, title self.database = database self.model_details = model_details #: List of plain text comments. These will be added to the top of #: the mmCIF file. self.comments = [] #: List of all authors of this system, as a list of strings (last name #: followed by initials, e.g. "Smith AJ"). When writing out a file, #: if this list is empty, all authors from the first citation #: (see :attr:`citations` and :class:`ihm.Citation`) are used instead. self.authors = [] #: List of all grants that supported this work. See :class:`ihm.Grant`. self.grants = [] #: List of all citations. By convention the first citation describes #: the system itself. See :class:`ihm.Citation`. self.citations = [] #: Revision/update history. See :class:`ihm.Revision`. self.revisions = [] #: Information on usage of the data. See :class:`ihm.DataUsage`. self.data_usage = [] #: All groups of models. See :class:`~modelcif.model.ModelGroup`. self.model_groups = [] #: All modeling protocols. #: See :class:`~modelcif.protocol.Protocol`. self.protocols = [] #: All modeling alignments. #: See :mod:`modelcif.alignment`. self.alignments = [] #: Any additional files with extra data about this system. #: See :class:`modelcif.associated.Repository`. self.repositories = [] self.entities = [] self.asym_units = [] self.templates = [] self.template_segments = [] self.template_transformations = [] self.data = [] self.data_groups = [] self.software = [] self.software_groups = [] self.assemblies = [] self._orphan_chem_comps = [] # Mapping from ID to QA metric classes self._qa_by_id = {} def _all_models(self): """Iterate over all Models in the system""" # todo: raise an error if a model is present in multiple groups? seen_models = set() for group in self._all_model_groups(): for model in group: if model in seen_models: continue seen_models.add(model) yield group, model def _before_write(self): # Populate flat lists to contain all referenced objects only once # We must populate these in the correct order to get all objects self.assemblies = list(_remove_identical(self._all_assemblies())) self.asym_units = list(_remove_identical(self._all_asym_units())) self.alignments = list(_remove_identical(self.alignments)) self.template_segments = list( _remove_identical(self._all_template_segments())) self.templates = list(_remove_identical(self._all_templates())) self.entities = list(_remove_identical(self._all_entities())) self.template_transformations = list(_remove_identical( self._all_template_transformations())) self.data_groups = list(_remove_identical( self._all_data_groups())) self.data = list(_remove_identical( self._all_data())) self.model_groups = list(_remove_identical(self.model_groups)) self.software_groups = list(_remove_identical( self._all_software_groups())) self.software = list(_remove_identical( self._all_ref_software())) self._add_missing_reference_sequence() def _add_missing_reference_sequence(self): """If any TargetReference has no sequence, use that of the Entity""" for e in self.entities: for r in e.references: if r.sequence is None: r.sequence = "".join(comp.code_canonical for comp in e.sequence) def _check_after_write(self): pass def _all_template_segments(self): return itertools.chain( self.template_segments, (p.template for aln in self.alignments for p in aln.pairs)) def _all_templates(self): return itertools.chain( self.templates, (x.template for x in self.template_segments), (x.template for x in self.asym_units if isinstance(x, NonPolymerFromTemplate))) def _all_template_transformations(self): return itertools.chain( self.template_transformations, (x.transformation for x in self.templates)) def _all_citations(self): """Iterate over all Citations in the system. This includes all Citations referenced from other objects, plus any referenced from the top-level system. Duplicates are filtered out.""" return _remove_identical(itertools.chain( self.citations, (software.citation for software in self._all_software() if software.citation))) def _all_software(self): """Utility method used by ihm.dumper to get all Software. To initially populate this list from all Software referenced in the system, use _all_ref_software() instead.""" return self.software def _all_ref_software(self): """Iterate over all Software in the system. This includes all Software referenced from other objects, plus any referenced from the top-level system. Duplicates may be present.""" def _all_software_in_groups(): for sg in self.software_groups: if isinstance(sg, Software): yield sg else: for s in sg: if isinstance(s, SoftwareWithParameters): yield s.software else: yield s def _all_entities(): return itertools.chain( self.entities, (t.entity for t in self.templates)) def _all_descriptor_software(): comps = frozenset(comp for e in _all_entities() for comp in e.sequence) for comp in comps: if hasattr(comp, 'descriptors') and comp.descriptors: for desc in comp.descriptors: if desc.software: yield desc.software return (itertools.chain( self.software, _all_software_in_groups(), _all_descriptor_software())) def _all_assemblies(self): """Iterate over all Assemblies in the system. This includes all Assemblies referenced from other objects, plus any orphaned Assemblies. Duplicates may be present.""" return itertools.chain( self.assemblies, (model.assembly for group, model in self._all_models() if model.assembly)) def _all_asym_units(self): def _all_asym_in_assemblies(): for asmb in self.assemblies: for a in asmb: yield a.asym if hasattr(a, 'asym') else a return itertools.chain( self.asym_units, _all_asym_in_assemblies()) def _all_entities(self): return itertools.chain( self.entities, (asym.entity for asym in self.asym_units)) def _all_model_groups(self, only_in_states=True): return self.model_groups def _all_data(self): def _all_data_in_groups(): for dg in self.data_groups: if isinstance(dg, list): for data in dg: yield data def _all_data_in_files(): for repo in self.repositories: for f in repo.files: if f.data: yield f.data if hasattr(f, 'files'): for subf in f.files: if subf.data: yield subf.data return itertools.chain( self.data, self.templates, self.entities, self.alignments, (model for group, model in self._all_models()), _all_data_in_groups(), _all_data_in_files()) def _all_data_groups(self): """Return all DataGroup (or singleton Data) objects""" return itertools.chain( self.data_groups, (step.input_data for p in self.protocols for step in p.steps if step.input_data), (step.output_data for p in self.protocols for step in p.steps if step.output_data)) def _all_software_groups(self): """Return all SoftwareGroup (or singleton Software) objects""" return itertools.chain( self.software_groups, (aln.software for aln in self.alignments if aln.software), (step.software for p in self.protocols for step in p.steps if step.software), (metric.software for group, model in self._all_models() for metric in model.qa_metrics if metric.software)) def _all_features(self): """Return all Feature objects""" for _, model in self._all_models(): for m in model.qa_metrics: if hasattr(m, '_all_features'): for f in m._all_features: yield f # Provide ma-specific docs for Entity Entity.__doc__ = """Represent a unique molecular sequence. This can be used both for template sequences (in which case the Entity is then used in a :class:`Template` object) or for target (model) sequences (where it is used in a :class:`AsymUnit` object). (Note that template sequence Entity objects are not written out to the entity, entity_poly etc. tables in the mmCIF/BinaryCIF file by default. Instead, sequence information is captured in template-specific categories.) :param sequence sequence: The primary sequence, as a sequence of :class:`ihm.ChemComp` objects, and/or codes looked up in `alphabet`. See `ihm.Entity `_ for examples. :param alphabet: The mapping from code to chemical components to use (it is not necessary to instantiate this class). :type alphabet: :class:`ihm.Alphabet` :param str description: A short text name for the sequence. :param str details: Longer text describing the sequence. :param source: The method by which the sample for this entity was produced. :type source: :class:`ihm.source.Source` :param references: For a target (model) sequence, information about this entity stored in external databases (for example the sequence in UniProt). For references to structure databases for templates, see :class:`Template` instead. :type references: sequence of :class:`reference.TargetReference` objects See `ihm.Entity `_ for more information. """ # noqa: E501 # Provide ma-specific docs for Software Software.__doc__ = """Software used as part of the modeling protocol. :param str name: The name of the software. :param str classification: The major function of the software, for example 'model building', 'sample preparation', 'data collection'. :param str description: A longer text description of the software. :param str location: Place where the software can be found (e.g. URL). :param str type: Type of software (program/package/library/other). :param str version: The version used. :param citation: Publication describing the software. :type citation: :class:`ihm.Citation` Generally these objects are added to groups (see :class:`SoftwareGroup`) which can then be used to describe the software used in various parts of the modeling (``Software`` objects can also be used any place :class:`SoftwareGroup` are accepted, in which case they will act as if a group containing only a single member was used). See also :attr:`System.software`. """ # Provide ma-specific docs for Assembly Assembly.__doc__ = """A collection of parts of the system that were modeled together. :param sequence elements: Initial set of parts of the system. :param str name: Short text name of this assembly. :param str description: Longer text that describes this assembly. This is implemented as a simple list of asymmetric units (or parts of them), i.e. a list of :class:`AsymUnit` and/or :class:`AsymUnitRange` objects. An Assembly is typically passed to the :class:`modelcif.model.Model` constructor. Note that the ModelCIF dictionary has deprecated the corresponding ``ma_struct_assembly`` category, so any name or description of the assembly will not be written to the mmCIF file. The ModelCIF dictionary requires that all models have the same composition. """ class Database: """Information about a System that is part of an official database. If a :class:`System` is part of an official database (e.g. SwissModel, ModBase), this class contains details of the database identifiers. It should be passed to the :class:`System` constructor. :param str id: Abbreviated name of the database (e.g. PDB) :param str code: Identifier from the database (e.g. 1abc) """ def __init__(self, id, code): self.id, self.code = id, code class SoftwareGroup(list): """A number of :class:`Software` and/or :class:`SoftwareWithParameters` objects that are grouped together. This class can be used to group together multiple :class:`Software` objects if multiple pieces of software were used together to generate a single alignment (see :class:`modelcif.alignment.AlignmentMode`), to run a modeling step (see :class:`modelcif.protocol.Step`), or to calculate a model quality score (see :mod:`modelcif.qa_metric`). It behaves like a regular Python list. :class:`SoftwareWithParameters` allows including both a piece of software, and the parameters with which it was used, in the group. :param sequence elements: Initial set of :class:`Software` and/or :class:`SoftwareWithParameters` objects. """ def __init__(self, elements=(), parameters=None): super(SoftwareGroup, self).__init__(elements) if parameters: warnings.warn( "Parameters for SofwareGroup are ignored. To specify " "parameters for a piece of software, use the " "SoftwareWithParameters class.") self.parameters = [] if parameters is None else parameters class SoftwareWithParameters: """A piece of software and the parameters with which it was used. See :class:`SoftwareGroup`. :param software: The software that was used. :type software: :class:`modelcif.Software` :param sequence parameters: sequence of parameters for the software, as :class:`SoftwareParameter` objects. """ def __init__(self, software, parameters=None): self.software = software self.parameters = [] if parameters is None else parameters # Pass Software-specific fields through name = property(lambda self: self.software.name) classification = property(lambda self: self.software.classification) description = property(lambda self: self.software.description) location = property(lambda self: self.software.location) type = property(lambda self: self.software.type) version = property(lambda self: self.software.version) citation = property(lambda self: self.software.citation) class SoftwareParameter: """A single parameter given to software used in modeling. See :class:`SoftwareWithParameters`, :class:`SoftwareGroup`. :param str name: A short name for this parameter. :param value: Parameter value. :type value: ``int``, ``float``, ``str``, ``bool``, list of ``int``, or list of ``float``. :param str description: A longer description of the parameter. """ def __init__(self, name, value, description=None): self.name, self.value = name, value self.description = description def __repr__(self): return ("" % (self.name, self.value)) class Transformation: """Rotation and translation applied to an object. These objects are generally used to record the transformation that was applied to a :class:`Template` to generate the starting structure used in modeling. :param rot_matrix: Rotation matrix (as a 3x3 array of floats) that places the object in its final position. :param tr_vector: Translation vector (as a 3-element float list) that places the object in its final position. """ def __init__(self, rot_matrix, tr_vector): self.rot_matrix, self.tr_vector = rot_matrix, tr_vector """Return the identity transformation. :return: A new identity Transformation. :rtype: :class:`Transformation` """ @classmethod def identity(cls): if not hasattr(cls, '_identity_obj'): cls._identity_obj = cls( [[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]], [0., 0., 0.]) return cls._identity_obj class TemplateSegment: """An aligned part of a template (see :class:`modelcif.alignment.Pair`). Usually these objects are created from a :class:`Template` using :meth:`Template.segment`, e.g. to get a segment covering residues 1 through 3 in `tmpl` use:: tmpl = modelcif.Template(entity, ...) seg = tmpl.segment('--ACG', 1, 3) """ def __init__(self, template, gapped_sequence, seq_id_begin, seq_id_end): self.template = template self.gapped_sequence = gapped_sequence self.seq_id_range = (seq_id_begin, seq_id_end) class _TemplateBase(modelcif.data.Data): """Base class for all templates; use Template or CustomTemplate""" data_content_type = "template structure" def __init__(self, entity, asym_id, model_num, transformation, name=None, strand_id=None, entity_id=None): super(_TemplateBase, self).__init__(name) self.entity = entity self.asym_id, self.model_num = asym_id, model_num self.transformation = transformation self._strand_id = strand_id self.entity_id = entity_id def segment(self, gapped_sequence, seq_id_begin, seq_id_end): """Get an object representing the alignment of part of this sequence. :param str gapped_sequence: Sequence of the segment, including gaps. :param int seq_id_begin: Start of the segment. :param int seq_id_end: End of the segment. """ # todo: cache so we return the same object for same parameters return TemplateSegment(self, gapped_sequence, seq_id_begin, seq_id_end) seq_id_range = property(lambda self: self.entity.seq_id_range, doc="Sequence range") template = property(lambda self: self) strand_id = property(lambda self: self._strand_id or self.asym_id, doc="PDB or author-provided strand/chain ID") class Template(_TemplateBase): """A single database chain that was used as a template structure for modeling. After creating a polymer template, use :meth:`segment` to denote the part of its sequence used in any modeling alignments (see :class:`modelcif.alignment.Pair`). Non-polymer templates do not have alignments, and should instead be passed to one or more :class:`NonPolymerFromTemplate` objects. Template objects can also be used as inputs or outputs in modeling protocol steps; see :class:`modelcif.protocol.Step`. This class is intended for templates that were taken from reference databases such as PDB. For a non-deposited "custom" template, use the :class:`CustomTemplate` class instead. :param entity: The sequence of the chain. :type entity: :class:`Entity` :param str asym_id: The asym or chain ID in the template structure. :param int model_num: The model number of the template structure. :param transformation: Rotation and translation applied to the original template structure to get the starting model used in modeling. :type transformation: :class:`Transformation` :param str name: A short name for this template. :param references: A list of pointers to reference databases (such as PDB) from which the template structure was taken. :type references: list of :class:`modelcif.reference.TemplateReference` objects :param str strand_id: PDB or "author-provided" strand/chain ID. If not specified, it will be the same as the regular asym_id. :param str entity_id: If known, the ID of the entity for this template in its own mmCIF file. """ def __init__(self, entity, asym_id, model_num, transformation, name=None, references=[], strand_id=None, entity_id=None): super(Template, self).__init__( entity=entity, asym_id=asym_id, model_num=model_num, transformation=transformation, name=name, strand_id=strand_id, entity_id=entity_id) self.references = [] self.references.extend(references) class CustomTemplate(_TemplateBase): """A chain that was used as a template structure for modeling. This class is intended for templates that have not been deposited in a database such as PDB (for deposited templates, use the :class:`Template` class instead). The coordinates of the atoms in these "custom" templates will be included in the mmCIF file; see the :attr:`atoms` member. :param str details: Information on how the template was created. See :class:`Template` for a description of the other parameters. """ def __init__(self, entity, asym_id, model_num, transformation, name=None, strand_id=None, entity_id=None, details=None): super(CustomTemplate, self).__init__( entity=entity, asym_id=asym_id, model_num=model_num, transformation=transformation, name=name, strand_id=strand_id, entity_id=entity_id) self.details = details #: Coordinates of all atoms as :class:`TemplateAtom` objects self.atoms = [] class TemplateAtom: """Coordinates of a single atom in a custom template. This provides the coordinates for a template that has not been deposited in a database. See :class:`CustomTemplate` for more information. These objects are added to the :attr:`CustomTemplate.atoms` list. :param int seq_id: The sequence ID of the residue represented by this atom. This should generally be a number starting at 1 for any polymer chain, water, or oligosaccharide. For ligands, a seq_id is not needed (as a given asym can only contain a single ligand), so either 1 or None can be used. :param str atom_id: The name of the atom in the residue :param str type_symbol: Element name :param float x: x coordinate of the atom :param float y: y coordinate of the atom :param float z: z coordinate of the atom :param bool het: True for HETATM sites, False (default) for ATOM :param float biso: Temperature factor or equivalent (if applicable) :param float occupancy: Fraction of the atom type present (if applicable) :param float charge: Formal charge (if applicable) :param int auth_seq_id: Author-provided sequence ID (if applicable; this is optional for polymers but required for ligands). :param str auth_atom_id: Author-provided atom name (if needed) :param str auth_comp_id: Author-provided residue name (if needed) """ # Reduce memory usage __slots__ = ['seq_id', 'atom_id', 'type_symbol', 'x', 'y', 'z', 'het', 'biso', 'occupancy', 'charge', 'auth_seq_id', 'auth_atom_id', 'auth_comp_id'] def __init__(self, seq_id, atom_id, type_symbol, x, y, z, het=False, biso=None, occupancy=None, charge=None, auth_seq_id=None, auth_atom_id=None, auth_comp_id=None): self.seq_id, self.atom_id = seq_id, atom_id self.type_symbol = type_symbol self.x, self.y, self.z = x, y, z self.het, self.biso = het, biso self.occupancy, self.charge = occupancy, charge self.auth_seq_id = auth_seq_id self.auth_atom_id, self.auth_comp_id = auth_atom_id, auth_comp_id class NonPolymerFromTemplate(AsymUnit): """A non-polymer (e.g. ligand) in the model that is modeled from a non-polymer template. These objects act just like :class:`AsymUnit` and should be added to :class:`Assembly`. To represent a non-polymer that is modeled without a template, just use a regular :class:`AsymUnit`. :param template: The non-polymer template used to model this non-polymer. :type template: :class:`Template` :param bool explicit: True iff the conformation of the template is allowed to change (e.g. bond relaxation, flexible fitting) during the modeling, or False if the template is treated as a single rigid body. For the other parameters, see :class:`AsymUnit`. """ def __init__(self, template, explicit, details=None, auth_seq_id_map=0, id=None, strand_id=None): super(NonPolymerFromTemplate, self).__init__( template.entity, details=details, auth_seq_id_map=auth_seq_id_map, id=id, strand_id=strand_id) self.template, self.explicit = template, explicit class ReferenceDatabase(modelcif.data.Data): """A reference database used in the modeling. This is typically a sequence database used for template search, alignments, etc. These objects are passed as input or output to :class:`modelcif.protocol.Step`. See also :class:`modelcif.data.Data` for more details. Compare with :class:`modelcif.reference.TargetReference`, which pertains to just the modeled sequence itself; this class describes *multiple* sequences. :param str name: Name of the database. :param str url: Location of the database. :param str version: Version of the database. :param release_date: Release date of the specified version. :type release_date: :class:`datetime.date` """ data_content_type = "reference database" def __init__(self, name, url, version=None, release_date=None): super(ReferenceDatabase, self).__init__(name) self.url, self.version, self.release_date = url, version, release_date class Feature: """Base class for selecting parts of the system. This class should not be used itself; instead, see :class:`AtomFeature`, :class:`PolyResidueFeature`, and :class:`EntityInstanceFeature`. Generally it is expected that the entities selected by a given feature are all of the same type. For example, a feature should not select both a ligand and a polymer. Features are typically used in QA metrics, passed to :class:`modelcif.qa_metric.Feature` or :class:`modelcif.qa_metric.FeaturePairwise` objects. """ details = None type = ihm.unknown def _get_entity_type(self, check=False): return ihm.unknown def _check_entity_types(self, types, check): if check: if len(types) > 1: raise ValueError( "Feature %r selects entities of multiple types: %s" % (self, list(types))) elif len(types) == 0: raise ValueError("Feature %r doesn't select anything" % self) return list(types)[0] if len(types) == 1 else 'other' class AtomFeature(Feature): """Selection of one or more atoms from the system. See :class:`Feature` for more information. Note that currently support for atom features in python-modelcif is rather rudimentary. They must be selected by their "id", not by the Atom Python object. :param sequence atoms: A list of atom indices (usually integers). :param str details: Additional text describing this feature. """ type = 'atom' def __init__(self, atoms, details=None): self.atoms, self.details = atoms, details def _get_entity_type(self, check=False): # We currently can't tell what type entity the atom IDs refer to return 'other' def _signature(self): return tuple(self.atoms) class PolyResidueFeature(Feature): """Selection of one or more polymer residues from the system. See :class:`Feature` for more information. :param sequence residues: A list of :class:`Residue` objects. :param str details: Additional text describing this feature. """ type = 'residue' def __init__(self, residues, details=None): self.residues, self.details = residues, details def _get_entity_type(self, check=False): types = frozenset(x.entity.type for x in self.residues) return self._check_entity_types(types, check) def _signature(self): return tuple(self.residues) class EntityInstanceFeature(Feature): """Selection of one or more asyms from the system. See :class:`Feature` for more information. :param sequence asym_units: A list of :class:`AsymUnit` objects. :param str details: Additional text describing this feature. """ type = 'entity instance' def __init__(self, asym_units, details=None): self.asym_units, self.details = asym_units, details def _get_entity_type(self, check=False): types = frozenset(x.entity.type for x in self.asym_units) return self._check_entity_types(types, check) def _signature(self): return tuple(self.asym_units) python-modelcif-1.5/modelcif/alignment.py000066400000000000000000000141001506655355100206010ustar00rootroot00000000000000"""Classes to handle alignments between template structure(s) and target sequence(s). To create an alignment, first declare a class for the given kind of alignment by deriving from subclasses of :class:`AlignmentMode` (e.g. :class:`Global`) and :class:`AlignmentType` (e.g. :class:`Pairwise`). For example, a typical pairwise global alignment could be declared using:: class Alignment(modelcif.alignment.Global, modelcif.alignment.Pairwise): pass """ import modelcif.data class Identity: """Percent sequence identity between the template sequence and the target sequence being modeled. Use the correct subclass that corresponds to the denominator used when calculating the identity, for example :class:`ShorterSequenceIdentity`, or if the denominator is not covered here, subclass this class and provide a docstring to describe the denominator, e.g.:: class CustomIdentity(Identity): "my custom sequence identity denominator" :param float value: The percent sequence identity value. """ denominator = "Other" def __init__(self, value): self.value = value def _get_other_details(self): if (type(self) is not Identity and self.denominator == Identity.denominator): return self.__doc__.split('\n')[0] other_details = property( _get_other_details, doc="More information about a custom sequence identity denominator. " "By default it is the first line of the docstring.") class ShorterSequenceIdentity(Identity): """Sequence identity calculated using the length of the shorter sequence as the denominator. See :class:`Identity` for more information.""" other_details = None denominator = "Length of the shorter sequence" class AlignedPositionsIdentity(Identity): """Sequence identity calculated using the number of aligned positions (including gaps) as the denominator. See :class:`Identity` for more information.""" other_details = None denominator = "Number of aligned positions (including gaps)" class AlignedResiduePairsIdentity(Identity): """Sequence identity calculated using the number of aligned residue pairs (not including gaps) as the denominator. See :class:`Identity` for more information.""" other_details = None denominator = "Number of aligned residue pairs (not including the gaps)" class MeanSequenceIdentity(Identity): """Sequence identity calculated using the arithmetic mean of the sequence lengths as the denominator. See :class:`Identity` for more information.""" other_details = None denominator = "Arithmetic mean sequence length" class Pair: """A single pairwise alignment between a single target and template chain. See :class:`AlignmentMode`. An alignment consists of one or more of these pairs. :param template: The template segment that is aligned, i.e. the seq_id range for the template and the sequence (including gaps) of one-letter codes. :type template: :class:`modelcif.TemplateSegment` :param target: The target segment that is aligned. :type target: output from :meth:`ihm.AsymUnit.segment` :param identity: The sequence identity between target and template, if known. :type identity: :class:`Identity` :param score: A measure of the quality of the alignment, if known. :type score: :class:`Score` """ def __init__(self, template, target, identity=None, score=None): self.template, self.target, self.score = template, target, score self.identity = identity class AlignmentMode(modelcif.data.Data): """Base class for all alignments. Actual alignments should derive from both a subclass of this class (e.g. :class:`Global`) and a subclass of :class:`AlignmentType`. :param str name: A short description of this alignment. :param pairs: List of individual target-template alignments. :type pairs: list of :class:`Pair` objects :param software: The software that was used to build the alignment. :type software: :class:`modelcif.Software` or :class:`modelcif.SoftwareGroup` """ data_content_type = 'target-template alignment' def __init__(self, name, pairs, software=None): modelcif.data.Data.__init__(self, name) self.pairs = pairs self.software = software class Global(AlignmentMode): """Base class for global alignments. See :class:`AlignmentMode` for more details.""" mode = "global" class Local(AlignmentMode): """Base class for local alignments. See :class:`AlignmentMode` for more details.""" mode = "local" class AlignmentType: """Base class for all alignment types. Actual alignments should derive from both a subclass of this class (e.g. :class:`Pairwise`) and a subclass of :class:`AlignmentMode`. """ type = "other" class Pairwise(AlignmentType): """An alignment between a single target and template. See :class:`AlignmentType` for more details.""" type = "target-template pairwise alignment" other_details = None class Multiple(AlignmentType): """A multiple sequence alignment between target and template. See :class:`AlignmentType` for more details.""" type = "target-template MSA" other_details = None class Score: """Base class for a quality score for a given target-template alignment. Usually a derived class such as :class:`BLASTEValue` is used, and passed to :class:`Pair` objects. :param float value: The actual score value. """ type = "Other" def __init__(self, value): self.value = value class BLASTEValue(Score): """BLAST e-value for an alignment. See :class:`Score` for more details.""" type = "BLAST e-value" other_details = None class HHblitsEValue(Score): """E-value computed by HHblits for an alignment. See :class:`Score` for more details.""" type = "HHblits e-value" other_details = None python-modelcif-1.5/modelcif/associated.py000066400000000000000000000120311506655355100207430ustar00rootroot00000000000000"""Classes to associate extra files with the system. Typically, one or more :class:`Repository` objects are created and added to :attr:`modelcif.System.repositories`.""" import posixpath import warnings class Repository: """An online location where associated files can be found. These objects are typically added to :attr:`modelcif.System.repositories`. :param str url_root: URL root that prefixes each file's path. For example, if url_root is ``https://example.com`` then a :class:`File` with path ``test.txt`` can be found at ``https://example.com/test.txt``. If the files are not available online, None can be used here. :param list files: A list of :class:`File` objects. """ def __init__(self, url_root, files): self.url_root = url_root self.files = files def get_url(self, f): """Get the full URL for the given :class:`File`""" return posixpath.join(self.url_root or '', f.path) class File: """A single associated file. These objects can be added to a :class:`Repository` or a :class:`ZipFile`. :param str path: File name. :param str details: Any additional information about the file. :param data: If available, the data (e.g. sequence, structure, alignment) that are stored in the file. :type data: :class:`~modelcif.data.Data` """ file_type = 'file' file_content = 'other' file_format = 'other' def __init__(self, path, details=None, data=None): self.path, self.details, self.data = path, details, data class CIFFile(File): """An associated file in mmCIF or BinaryCIF format. See :class:`File` for more details. :param str path: File name that will be used to construct URLs in the main mmCIF file (see :class:`Repository` or :class:`ZipFile`). :param str details: Any additional information about the file. :param data: If available, the data (e.g. sequence, structure, alignment) that are stored in the file. :type data: :class:`~modelcif.data.Data` :param list categories: If given, any mmCIF category names in this list are written out to ``local_path`` by :func:`modelcif.dumper.write` instead of to the primary file handle. :param list copy_categories: If given, any mmCIF category names in this list are written out to both ``local_path`` by :func:`modelcif.dumper.write` and the primary file handle. :param str entry_id: Unique identifier for the associated file, if written (by specifying ``categories`` or ``copy_categories``). :param str entry_details: A comment to be added to the associated file, if written (by specifying ``categories`` or ``copy_categories``). :param str local_path: File name that will be used for ``categories`` or ``copy_categories``. If not given, it defaults to the same as ``path``. (The file is always written directly to the local disk, even if this object is placed inside a :class:`ZipFile`.) :param bool binary: If False (the default), any output file is written in mmCIF format; if True, the file is written in BinaryCIF. """ _binary_ff_map = {True: 'bcif', False: 'cif'} file_format = property(lambda self: self._binary_ff_map[self.binary], doc="Format of the file (BinaryCIF or mmCIF)") def __init__(self, path, details=None, categories=[], copy_categories=[], entry_id='model', entry_details=None, local_path=None, binary=False, data=None): super(CIFFile, self).__init__(path, details, data) self.categories = categories self.copy_categories = copy_categories self.id = entry_id self.entry_details = entry_details self.local_path = local_path or path self.binary = binary class QAMetricsFile(CIFFile): """An associated file in CIF format containing QA metrics. See :class:`CIFFile` for more details. """ file_content = 'QA metrics' # Map old class name to new equivalent class LocalPairwiseQAScoresFile(QAMetricsFile): def __init__(self, *args, **keys): warnings.warn("LocalPairwiseQAScoresFile is deprecated. " "Use QAMetricsFile instead.", stacklevel=2) super(LocalPairwiseQAScoresFile, self).__init__(*args, **keys) class ZipFile(File): """An associated archive file in zip format, containing other files. See :class:`File` for more details. :param list files: A list of the :class:`File` objects contained within this archive. Note that an archive cannot contain another archive. """ file_type = 'archive' file_content = 'archive with multiple files' file_format = 'zip' def __init__(self, path, details=None, files=[], data=None): super(ZipFile, self).__init__(path, details, data) self.files = files python-modelcif-1.5/modelcif/data.py000066400000000000000000000031071506655355100175410ustar00rootroot00000000000000"""Classes to track inputs/outputs of modeling protocols. See also :class:`modelcif.protocol.Step`. """ class Data: """Some part of the system that is input or output by part of the modeling protocol, and/or stored in a file. Usually a subclass is passed to :class:`modelcif.protocol.Step` to describe the input or output, or to :class:`modelcif.associated.File` to point to where the data are stored: - A database of possible template sequences/structures to construct or search (:class:`modelcif.ReferenceDatabase`) - A template structure (:class:`modelcif.Template`) - The sequence of the target (:class:`modelcif.Entity`) - A target-template alignment (:mod:`modelcif.alignment`) - Target structure coordinates (:class:`modelcif.model.Model`) However, this class can also be used directly to describe other kinds of input/output data. :param str name: A short name for the data. :param str details: A longer description of the data. """ data_content_type = 'other' data_other_details = None def __init__(self, name, details=None): self.name = name self.data_other_details = details class DataGroup(list): """A number of :class:`Data` objects that are grouped together. This class can be used to group together multiple :class:`Data` objects if a given modeling protocol step consumes or generates multiple pieces of data. See :class:`modelcif.protocol.Step`. It behaves like a regular Python list. """ pass python-modelcif-1.5/modelcif/descriptor.py000066400000000000000000000045721506655355100210150ustar00rootroot00000000000000"""Classes to describe the chemistry of custom chemical components. If a given :class:`ihm.ChemComp` is not defined in either the wwPDB chemical component dictionary (CCD) or the ModelArchive CCD, its chemistry can be described with one or more of these objects. They are passed as the ``descriptors`` argument when creating a new :class:`ihm.ChemComp`. """ class Descriptor: """Base class for all descriptors. This class is generally not used directly; instead, a subclass such as :class:`IUPACName` or :class:`InChI` is employed. :param str value: The actual name or identifier describing the chemistry. :param str details: Additional details about this descriptor. :param software: The software used to generate the descriptor, if any. :type software: :class:`modelcif.Software` """ def __init__(self, value, details=None, software=None): self.value, self.details = value, details self.software = software def __repr__(self): return "<%s(%s)>" % (self.__class__.__name__, repr(self.value)) class CanonicalSMILES(Descriptor): """Simplified Molecular-Input Line-Entry System (SMILES) computed from chemical structure devoid of isotopic and stereochemical information.""" type = 'Canonical SMILES' class IsomericSMILES(Descriptor): """Simplified Molecular-Input Line-Entry System (SMILES) computed from chemical structure containing isotopic and stereochemical information. SMILES written with isotopic and chiral specifications are collectively known as isomeric SMILES.""" type = 'Isomeric SMILES' class IUPACName(Descriptor): """Chemical name computed from chemical structure that uses International Union of Pure and Applied Chemistry (IUPAC) nomenclature standards.""" type = 'IUPAC Name' class InChI(Descriptor): """International Chemical Identifier (InChI) computed from chemical structure using the International Union of Pure and Applied Chemistry (IUPAC) standard.""" type = 'InChI' class InChIKey(Descriptor): """International Chemical Identifier hash (InChIKey) computed from chemical structure using the International Union of Pure and Applied Chemistry (IUPAC) standard.""" type = 'InChI Key' class PubChemCID(Descriptor): """PubChem Compound ID.""" type = 'PubChem CID' python-modelcif-1.5/modelcif/dumper.py000066400000000000000000001335551506655355100201370ustar00rootroot00000000000000"""Utility classes to dump out information in mmCIF or BinaryCIF format""" from datetime import date import itertools import operator import ihm.dumper import ihm from ihm import util import ihm.format import ihm.format_bcif from ihm.dumper import Dumper, Variant, _prettyprint_seq, _get_transform import modelcif.qa_metric import modelcif.data class _AuditConformDumper(Dumper): URL = ("https://raw.githubusercontent.com/ihmwg/ModelCIF/%s/dist/" + "mmcif_ma.dic") def dump(self, system, writer): with writer.category("_audit_conform") as lp: # Update to match the version of the ModelCIF dictionary # we support: lp.write(dict_name="mmcif_ma.dic", dict_version="1.4.7", dict_location=self.URL % "80e1e22") class _EntryLinkDumper(Dumper): def dump(self, system, writer): with writer.loop("_entry_link", ["id", "entry_id", "details"]) as lp: lp.write(id=1, entry_id=system.id, details=system.entry_details) class _DatabaseDumper(Dumper): def dump(self, system, writer): if system.database: with writer.category("_database_2") as lp: lp.write(database_id=system.database.id, database_code=system.database.code) class _ChemCompDumper(Dumper): # Similar to ihm.dumper._ChemCompDumper, but we need to also include # components referenced only by Templates, as their Entities are not # included in system.entities by default _prov_map = {'core': 'CCD Core', 'ma': 'CCD MA', 'local': 'CCD local'} def _get_entities(self, system): return itertools.chain( system.entities, (t.entity for t in system.templates)) def _get_provenance(self, comp): ccd = comp.ccd if ccd is None: ccd = 'core' if comp.descriptors: ccd = 'local' val = self._prov_map.get(ccd) if not val: if not self._check: return ihm.unknown raise KeyError("Invalid ccd value %s for %s; can be %s, or None" % (repr(comp.ccd), comp, ", ".join(sorted(self._prov_map.keys())))) return val def dump(self, system, writer): comps = frozenset( comp for e in self._get_entities(system) for comp in e.sequence) with writer.loop("_chem_comp", ["id", "type", "name", "formula", "formula_weight", "ma_provenance"]) as lp: for comp in sorted(comps, key=operator.attrgetter('id')): lp.write(id=comp.id, type=comp.type, name=comp.name, formula=comp.formula, formula_weight=comp.formula_weight, ma_provenance=self._get_provenance(comp)) class _ChemCompDescriptorDumper(Dumper): def _get_entities(self, system): return itertools.chain( system.entities, (t.entity for t in system.templates)) def dump(self, system, writer): ordinal = itertools.count(1) comps = frozenset( comp for e in self._get_entities(system) for comp in e.sequence) with writer.loop("_ma_chem_comp_descriptor", ["ordinal_id", "chem_comp_id", "chem_comp_name", "type", "value", "details", "software_id"]) as lp: for comp in sorted(comps, key=operator.attrgetter('id')): if not hasattr(comp, 'descriptors') or not comp.descriptors: continue for desc in comp.descriptors: lp.write(ordinal_id=next(ordinal), chem_comp_id=comp.id, chem_comp_name=comp.name, type=desc.type, value=desc.value, details=desc.details, software_id=desc.software._id if desc.software else None) class _TargetRefDBDumper(Dumper): def dump(self, system, writer): with writer.loop( "_ma_target_ref_db_details", ["target_entity_id", "db_name", "db_name_other_details", "db_code", "db_accession", "seq_db_isoform", "seq_db_align_begin", "seq_db_align_end", "ncbi_taxonomy_id", "organism_scientific", "seq_db_sequence_version_date", "seq_db_sequence_checksum", "is_primary"]) as lp: for e in system.entities: for r in e.references: if r.align_begin is None: db_begin = min(a.db_begin for a in r._get_alignments()) else: db_begin = r.align_begin if r.align_end is None: aligns = [a for a in r._get_alignments() if a.db_end is not None] if aligns: db_end = max(a.db_end for a in aligns) else: db_end = len(r.sequence) else: db_end = r.align_end lp.write(target_entity_id=e._id, db_name=r.name, db_name_other_details=r.other_details, db_code=r.code, db_accession=r.accession, seq_db_isoform=r.isoform, seq_db_align_begin=db_begin, seq_db_align_end=db_end, ncbi_taxonomy_id=r.ncbi_taxonomy_id, organism_scientific=r.organism_scientific, seq_db_sequence_version_date=date.isoformat( r.sequence_version_date) if r.sequence_version_date else None, seq_db_sequence_checksum=r.sequence_crc64, is_primary=r.is_primary) class _EntityNonPolyDumper(Dumper): def finalize(self, system): self._ma_model_mode_map = {} expmap = {True: 'explicit', False: 'implicit'} for a in system.asym_units: if isinstance(a, modelcif.NonPolymerFromTemplate): self._ma_model_mode_map[a.template.entity] = \ expmap.get(a.explicit) def dump(self, system, writer): with writer.loop("_pdbx_entity_nonpoly", ["entity_id", "name", "comp_id", "ma_model_mode"]) as lp: for entity in system.entities: if entity.is_polymeric(): continue lp.write(entity_id=entity._id, name=entity.description, comp_id=entity.sequence[0].id, ma_model_mode=self._ma_model_mode_map.get(entity)) class _TargetEntityDumper(Dumper): def dump(self, system, writer): with writer.loop( "_ma_target_entity", ["entity_id", "data_id", "origin"]) as lp: for e in system.entities: lp.write(entity_id=e._id, data_id=e._data_id, origin="reference database" if e.references else "designed") with writer.loop( "_ma_target_entity_instance", ["asym_id", "entity_id", "details"]) as lp: for asym in system.asym_units: lp.write(asym_id=asym._id, entity_id=asym.entity._id, details=asym.details) class _SoftwareGroupDumper(Dumper): def finalize(self, system): # Map from id(list) to id self._param_group_id = {} self._param_groups = [] for n, s in enumerate(system.software_groups): # Use _group_id rather than _id as the "group" might be a # singleton Software, which already has its own id s._group_id = n + 1 if isinstance(s, modelcif.SoftwareGroup): for soft in s: if (isinstance(soft, modelcif.SoftwareWithParameters) and soft.parameters and id(soft.parameters) not in self._param_group_id): self._param_groups.append(soft.parameters) self._param_group_id[id(soft.parameters)] \ = len(self._param_groups) def dump(self, system, writer): self.dump_parameters(system, writer) self.dump_groups(system, writer) def dump_groups(self, system, writer): ordinal = itertools.count(1) with writer.loop( "_ma_software_group", ["ordinal_id", "group_id", "software_id", "parameter_group_id"]) as lp: for g in system.software_groups: if isinstance(g, modelcif.Software): # If a singleton Software, write a group containing one # member lp.write(ordinal_id=next(ordinal), group_id=g._group_id, software_id=g._id) else: for s in g: param = None if isinstance(s, modelcif.SoftwareWithParameters): soft_id = s.software._id if s.parameters: param = self._param_group_id[id(s.parameters)] else: soft_id = s._id lp.write(ordinal_id=next(ordinal), group_id=g._group_id, software_id=soft_id, parameter_group_id=param) def _handle_list(self, value): list_type_map = {int: 'integer-csv', float: 'float-csv'} types = frozenset(type(x) for x in value) if types == frozenset((int,)): data_type = list_type_map[int] elif types == frozenset((float,)) or types == frozenset((int, float)): # Treat mix of int and float as float data_type = list_type_map[float] else: raise TypeError("Only lists of ints or floats are supported") return data_type, ",".join(str(x) for x in value) def dump_parameters(self, system, writer): parameter_id = itertools.count(1) type_map = {int: "integer", float: "float", str: "string", bool: "boolean"} with writer.loop( "_ma_software_parameter", ["parameter_id", "group_id", "data_type", "name", "value", "description"]) as lp: for g in self._param_groups: group_id = self._param_group_id[id(g)] for p in g: if isinstance(p.value, (list, tuple)): data_type, value = self._handle_list(p.value) else: data_type = type_map.get(type(p.value), str) value = p.value lp.write(parameter_id=next(parameter_id), group_id=group_id, data_type=data_type, name=p.name, value=value, description=p.description) class _DataDumper(Dumper): def finalize(self, system): for n, d in enumerate(system.data): d._data_id = n + 1 def dump(self, system, writer): with writer.loop( "_ma_data", ["id", "name", "content_type", "content_type_other_details"]) as lp: for d in system.data: # ihm.Entity isn't a subclass of Data, so we need # to fill in missing attributes here if isinstance(d, ihm.Entity): lp.write(id=d._data_id, name=d.description, content_type="target", content_type_other_details=None) else: lp.write(id=d._data_id, name=d.name, content_type=d.data_content_type, content_type_other_details=d.data_other_details) class _DataGroupDumper(Dumper): def finalize(self, system): for n, d in enumerate(system.data_groups): # Use _data_group_id rather than _id as the "group" might be a # singleton Data, which already has its own id d._data_group_id = n + 1 def dump(self, system, writer): ordinal = itertools.count(1) with writer.loop( "_ma_data_group", ["ordinal_id", "group_id", "data_id"]) as lp: for g in system.data_groups: if isinstance(g, (modelcif.data.Data, ihm.Entity)): # If a singleton Data (or ihm.Entity, which isn't a # subclass of Data), write a group containing one member lp.write(ordinal_id=next(ordinal), group_id=g._data_group_id, data_id=g._data_id) else: for d in g: lp.write(ordinal_id=next(ordinal), group_id=g._data_group_id, data_id=d._data_id) class _DataRefDBDumper(Dumper): def dump(self, system, writer): with writer.loop( "_ma_data_ref_db", ["data_id", "name", "location_url", "version", "release_date"]) as lp: for d in system.data: if not isinstance(d, modelcif.ReferenceDatabase): continue lp.write(data_id=d._data_id, name=d.name, location_url=d.url, version=d.version, release_date=date.isoformat(d.release_date) if d.release_date else None) class _TemplateTransformDumper(Dumper): def finalize(self, system): for n, trans in enumerate(system.template_transformations): trans._id = n + 1 def dump(self, system, writer): with writer.loop( "_ma_template_trans_matrix", ["id", "rot_matrix[1][1]", "rot_matrix[2][1]", "rot_matrix[3][1]", "rot_matrix[1][2]", "rot_matrix[2][2]", "rot_matrix[3][2]", "rot_matrix[1][3]", "rot_matrix[2][3]", "rot_matrix[3][3]", "tr_vector[1]", "tr_vector[2]", "tr_vector[3]"]) as lp: for t in system.template_transformations: lp.write(id=t._id, **_get_transform(t.rot_matrix, t.tr_vector)) class _AlignmentDumper(Dumper): def finalize(self, system): for n, tmpl in enumerate(system.templates): tmpl._id = n + 1 for n, segment in enumerate(system.template_segments): # Cannot use _id since segment might also be a complete template # (with _id = template id) segment._segment_id = n + 1 for n, aln in enumerate(system.alignments): aln._id = n + 1 def dump(self, system, writer): self.dump_template_details(system, writer) self.dump_template_poly(system, writer) self.dump_template_poly_segment(system, writer) self.dump_template_non_poly(system, writer) self.dump_template_ref_db(system, writer) self.dump_target_template_poly_mapping(system, writer) self.dump_template_customized(system, writer) self.dump_template_coord(system, writer) self.dump_info(system, writer) self.dump_details(system, writer) self.dump_sequences(system, writer) def dump_template_details(self, system, writer): ordinal = itertools.count(1) def write_template(tmpl, tgt_asym, lp): org = ("customized" if isinstance(tmpl, modelcif.CustomTemplate) else "reference database") poly = ("polymer" if tmpl.entity.is_polymeric() else "non-polymer") lp.write(ordinal_id=next(ordinal), template_id=tmpl._id, template_origin=org, template_entity_type=poly, template_trans_matrix_id=tmpl.transformation._id, template_data_id=tmpl._data_id, target_asym_id=tgt_asym._id if tgt_asym else None, template_label_asym_id=tmpl.asym_id, template_label_entity_id=tmpl.entity_id, template_model_num=tmpl.model_num, template_auth_asym_id=tmpl.strand_id) with writer.loop( "_ma_template_details", ["ordinal_id", "template_id", "template_origin", "template_entity_type", "template_trans_matrix_id", "template_data_id", "target_asym_id", "template_label_asym_id", "template_label_entity_id", "template_model_num", "template_auth_asym_id"]) as lp: seen_templates = set() for a in system.alignments: for s in a.pairs: # get Template from TemplateSegment write_template(s.template.template, s.target.asym, lp) seen_templates.add(s.template.template) # Handle all non-polymer templates (not in alignments) for a in system.asym_units: if isinstance(a, modelcif.NonPolymerFromTemplate): write_template(a.template, a, lp) seen_templates.add(a.template) # Handle all remaining non-aligned templates for t in system.templates: if t not in seen_templates: write_template(t, None, lp) def _get_sequence(self, entity): """Get the sequence for an entity as a string""" # Split into lines to get tidier CIF output return "\n".join(_prettyprint_seq((comp.code if len(comp.code) == 1 else '(%s)' % comp.code for comp in entity.sequence), 70)) def _get_canon(self, entity): """Get the canonical sequence for an entity as a string""" # Split into lines to get tidier CIF output seq = "\n".join(_prettyprint_seq( (comp.code_canonical for comp in entity.sequence), 70)) return seq def dump_template_poly(self, system, writer): with writer.loop( "_ma_template_poly", ["template_id", "seq_one_letter_code", "seq_one_letter_code_can"]) as lp: for tmpl in system.templates: entity = tmpl.entity if not entity.is_polymeric(): continue lp.write(template_id=tmpl._id, seq_one_letter_code=self._get_sequence(entity), seq_one_letter_code_can=self._get_canon(entity)) def dump_template_poly_segment(self, system, writer): with writer.loop("_ma_template_poly_segment", ["id", "template_id", "residue_number_begin", "residue_number_end"]) as lp: for s in system.template_segments: lp.write( id=s._segment_id, template_id=s.template._id, residue_number_begin=s.seq_id_range[0], residue_number_end=s.seq_id_range[1]) def dump_template_non_poly(self, system, writer): with writer.loop( "_ma_template_non_poly", ["template_id", "comp_id", "details"]) as lp: for tmpl in system.templates: entity = tmpl.entity if entity.is_polymeric(): continue lp.write(template_id=tmpl._id, comp_id=entity.sequence[0].id, details=entity.description) def dump_template_ref_db(self, system, writer): with writer.loop( "_ma_template_ref_db_details", ["template_id", "db_name", "db_name_other_details", "db_accession_code", "db_version_date"]) as lp: for tmpl in system.templates: if not isinstance(tmpl, modelcif.Template): continue for ref in tmpl.references: lp.write(template_id=tmpl._id, db_name=ref.name, db_name_other_details=ref.other_details, db_accession_code=ref.accession, db_version_date=date.isoformat( ref.db_version_date) if ref.db_version_date else None) def dump_template_customized(self, system, writer): with writer.loop( "_ma_template_customized", ["template_id", "details"]) as lp: for tmpl in system.templates: if isinstance(tmpl, modelcif.CustomTemplate): lp.write(template_id=tmpl._id, details=tmpl.details) def dump_template_coord(self, system, writer): ordinal = itertools.count(1) with writer.loop( "_ma_template_coord", ["template_id", "group_PDB", "ordinal_id", "type_symbol", "label_atom_id", "label_comp_id", "label_seq_id", "label_asym_id", "auth_seq_id", "auth_asym_id", "auth_atom_id", "auth_comp_id", "Cartn_x", "Cartn_y", "Cartn_z", "occupancy", "label_entity_id", "B_iso_or_equiv", "formal_charge"]) as lp: for tmpl in system.templates: if not isinstance(tmpl, modelcif.CustomTemplate): continue e = tmpl.entity for atom in tmpl.atoms: lp.write(template_id=tmpl._id, group_PDB='HETATM' if atom.het else 'ATOM', ordinal_id=next(ordinal), type_symbol=atom.type_symbol, label_atom_id=atom.atom_id, label_comp_id=e.sequence[atom.seq_id - 1].id, label_seq_id=atom.seq_id, label_asym_id=tmpl.asym_id, auth_seq_id=atom.auth_seq_id, auth_asym_id=tmpl.strand_id, auth_atom_id=atom.auth_atom_id, auth_comp_id=atom.auth_comp_id, Cartn_x=atom.x, Cartn_y=atom.y, Cartn_z=atom.z, occupancy=atom.occupancy, label_entity_id=tmpl.entity_id, B_iso_or_equiv=atom.biso, formal_charge=atom.charge) def dump_target_template_poly_mapping(self, system, writer): ordinal = itertools.count(1) with writer.loop("_ma_target_template_poly_mapping", ["id", "template_segment_id", "target_asym_id", "target_seq_id_begin", "target_seq_id_end"]) as lp: for a in system.alignments: for p in a.pairs: lp.write( id=next(ordinal), template_segment_id=p.template._segment_id, target_asym_id=p.target.asym._id, target_seq_id_begin=p.target.seq_id_range[0], target_seq_id_end=p.target.seq_id_range[1]) def dump_info(self, system, writer): with writer.loop( "_ma_alignment_info", ["alignment_id", "data_id", "software_group_id", "alignment_length", "alignment_type", "alignment_mode"]) as lp: for a in system.alignments: if a.pairs: align_len = max(len(s.gapped_sequence) for pair in a.pairs for s in (pair.template, pair.target)) else: align_len = None lp.write(alignment_id=a._id, data_id=a._data_id, software_group_id=a.software._group_id if a.software else None, alignment_type=a.type, alignment_mode=a.mode, alignment_length=align_len, alignment_type_other_details=a.other_details) def dump_details(self, system, writer): ordinal = itertools.count(1) with writer.loop( "_ma_alignment_details", ["ordinal_id", "alignment_id", "template_segment_id", "target_asym_id", "score_type", "score_type_other_details", "score_value", "percent_sequence_identity", "sequence_identity_denominator", "sequence_identity_denominator_other_details"]) as lp: for a in system.alignments: for s in a.pairs: if s.identity is None: denom = od = identity = None else: denom = s.identity.denominator od = s.identity.other_details identity = s.identity.value if s.score is None: score_type = score_other_details = score_value = None else: score_type = s.score.type score_other_details = s.score.other_details score_value = s.score.value lp.write(ordinal_id=next(ordinal), alignment_id=a._id, template_segment_id=s.template._segment_id, target_asym_id=s.target.asym._id, score_type=score_type, score_type_other_details=score_other_details, score_value=score_value, percent_sequence_identity=identity, sequence_identity_denominator=denom, sequence_identity_denominator_other_details=od) def dump_sequences(self, system, writer): ordinal = itertools.count(1) with writer.loop( "_ma_alignment", ["ordinal_id", "alignment_id", "target_template_flag", "sequence"]) as lp: for a in system.alignments: # todo: don't duplicate sequences for s in a.pairs: # 1=target, 2=template lp.write(ordinal_id=next(ordinal), alignment_id=a._id, target_template_flag=1, sequence=s.target.gapped_sequence) lp.write(ordinal_id=next(ordinal), alignment_id=a._id, target_template_flag=2, sequence=s.template.gapped_sequence) class _ProtocolDumper(Dumper): def finalize(self, system): # Assign IDs to protocols and steps for np, p in enumerate(system.protocols): p._id = np + 1 for ns, s in enumerate(p.steps): s._id = ns + 1 def dump(self, system, writer): ordinal = itertools.count(1) with writer.loop( "_ma_protocol_step", ['ordinal_id', 'protocol_id', 'step_id', 'method_type', 'step_name', 'details', 'software_group_id', 'input_data_group_id', 'output_data_group_id']) as lp: for p in system.protocols: for s in p.steps: lp.write(ordinal_id=next(ordinal), protocol_id=p._id, step_id=s._id, method_type=s.method_type, step_name=s.name, details=s.details, software_group_id=s.software._group_id if s.software else None, input_data_group_id=s.input_data._data_group_id if s.input_data else None, output_data_group_id=s.output_data._data_group_id if s.output_data else None) class _ModelDumper(ihm.dumper._ModelDumperBase): def dump(self, system, writer): self.dump_model_list(system, writer) self.dump_model_groups(system, writer) seen_types = self.dump_atoms(system, writer, add_ihm=False) self.dump_atom_type(seen_types, system, writer) def dump_model_list(self, system, writer): with writer.loop("_ma_model_list", ["ordinal_id", "model_name", "data_id", "model_type", "model_type_other_details"]) as lp: for group, model in sorted(system._all_models(), key=lambda x: x[1]._id): lp.write(ordinal_id=model._id, model_name=model.name, data_id=model._data_id, model_type=model.model_type, model_type_other_details=model.other_details) def dump_model_groups(self, system, writer): self.dump_model_group_summary(system, writer) self.dump_model_group_link(system, writer) def dump_model_group_summary(self, system, writer): with writer.loop("_ma_model_group", ["id", "name", "details"]) as lp: for group in system.model_groups: # ihm.model.ModelGroup only supports details after v1.8 lp.write(id=group._id, name=group.name, details=group.details if hasattr(group, 'details') else None) def dump_model_group_link(self, system, writer): with writer.loop("_ma_model_group_link", ["group_id", "model_id"]) as lp: for group in system.model_groups: for model_id in sorted(set(model._id for model in group)): lp.write(model_id=model_id, group_id=group._id) class _AssociatedDumper(Dumper): def finalize(self, system): file_id = itertools.count(1) in_archive_file_id = itertools.count(1) for repo in system.repositories: for f in repo.files: f._id = next(file_id) if hasattr(f, 'files'): for af in f.files: if hasattr(af, 'files'): raise ValueError( "An archive cannot contain another archive") af._id = next(in_archive_file_id) def dump(self, system, writer): self.dump_files(system, writer) self.dump_archive_files(system, writer) def dump_files(self, system, writer): with writer.loop( "_ma_entry_associated_files", ["id", "entry_id", "file_url", "file_type", "file_format", "file_content", "details", "data_id"]) as lp: for repo in system.repositories: for f in repo.files: lp.write(id=f._id, entry_id=system.id, file_url=repo.get_url(f), file_type=f.file_type, file_format=f.file_format, file_content=f.file_content, details=f.details, data_id=f.data._data_id if f.data else None) def dump_archive_files(self, system, writer): with writer.loop( "_ma_associated_archive_file_details", ["id", "archive_file_id", "file_path", "file_format", "file_content", "description", "data_id"]) as lp: for repo in system.repositories: for f in repo.files: if not hasattr(f, 'files'): continue for af in f.files: lp.write(id=af._id, archive_file_id=f._id, file_path=af.path, file_format=af.file_format, file_content=af.file_content, description=af.details, data_id=af.data._data_id if af.data else None) class _FeatureDumper(Dumper): def finalize(self, system): seen_features = {} self._features_by_id = [] for f in system._all_features(): util._remove_id(f) for f in system._all_features(): util._assign_id(f, seen_features, self._features_by_id, seen_obj=f._signature()) def dump(self, system, writer): self.dump_list(writer) self.dump_atom(writer) self.dump_residue(writer) self.dump_instance(writer) def dump_list(self, writer): with writer.loop("_ma_feature_list", ["feature_id", "feature_type", "entity_type", "details"]) as lp: for f in self._features_by_id: lp.write(feature_id=f._id, feature_type=f.type, entity_type=f._get_entity_type(check=self._check), details=f.details) def dump_atom(self, writer): ordinal = itertools.count(1) with writer.loop("_ma_atom_feature", ["ordinal_id", "feature_id", "atom_id"]) as lp: for f in self._features_by_id: if not isinstance(f, modelcif.AtomFeature): continue for a in f.atoms: lp.write(ordinal_id=next(ordinal), feature_id=f._id, atom_id=a) def dump_residue(self, writer): ordinal = itertools.count(1) with writer.loop("_ma_poly_residue_feature", ["ordinal_id", "feature_id", "label_asym_id", "label_seq_id", "label_comp_id"]) as lp: for f in self._features_by_id: if not isinstance(f, modelcif.PolyResidueFeature): continue for r in f.residues: seq = r.entity.sequence lp.write(ordinal_id=next(ordinal), feature_id=f._id, label_asym_id=r.asym._id, label_seq_id=r.seq_id, label_comp_id=seq[r.seq_id - 1].id) def dump_instance(self, writer): ordinal = itertools.count(1) with writer.loop("_ma_entity_instance_feature", ["ordinal_id", "feature_id", "label_asym_id"]) as lp: for f in self._features_by_id: if not isinstance(f, modelcif.EntityInstanceFeature): continue for a in f.asym_units: lp.write(ordinal_id=next(ordinal), feature_id=f._id, label_asym_id=a._id) class _QAMetricDumper(Dumper): def finalize(self, system): # Get all metric classes used by all systems seen_metric_classes = set() self._metric_classes_by_id = [] metric_id = itertools.count(1) for group, model in system._all_models(): for m in model.qa_metrics: cls = type(m) if cls not in seen_metric_classes: seen_metric_classes.add(cls) cls._id = next(metric_id) # We need an instance of the class in case name or # description are provided by property() self._metric_classes_by_id.append(m) def dump(self, system, writer): self.dump_metric_types(system, writer) self.dump_metric_global(system, writer) self.dump_metric_local(system, writer) self.dump_metric_pairwise(system, writer) self.dump_metric_feature(system, writer) self.dump_metric_feature_pairwise(system, writer) def dump_metric_types(self, system, writer): with writer.loop( "_ma_qa_metric", ["id", "name", "description", "type", "mode", "type_other_details", "software_group_id"]) as lp: for m in self._metric_classes_by_id: lp.write(id=m._id, name=m.name, description=m.description, type=m.type, mode=m.mode, type_other_details=m.other_details, software_group_id=m.software._group_id if m.software else None) def dump_metric_global(self, system, writer): ordinal = itertools.count(1) with writer.loop( "_ma_qa_metric_global", ["ordinal_id", "model_id", "metric_id", "metric_value"]) as lp: for group, model in system._all_models(): for m in model.qa_metrics: if not isinstance(m, modelcif.qa_metric.Global): continue lp.write(ordinal_id=next(ordinal), model_id=model._id, metric_id=m._id, metric_value=m.value) def dump_metric_local(self, system, writer): ordinal = itertools.count(1) with writer.loop( "_ma_qa_metric_local", ["ordinal_id", "model_id", "label_asym_id", "label_seq_id", "label_comp_id", "metric_id", "metric_value"]) as lp: for group, model in system._all_models(): for m in model.qa_metrics: if not isinstance(m, modelcif.qa_metric.Local): continue seq = m.residue.asym.entity.sequence lp.write(ordinal_id=next(ordinal), model_id=model._id, label_asym_id=m.residue.asym._id, label_seq_id=m.residue.seq_id, label_comp_id=seq[m.residue.seq_id - 1].id, metric_id=m._id, metric_value=m.value) def dump_metric_pairwise(self, system, writer): ordinal = itertools.count(1) with writer.loop( "_ma_qa_metric_local_pairwise", ["ordinal_id", "model_id", "label_asym_id_1", "label_seq_id_1", "label_comp_id_1", "label_asym_id_2", "label_seq_id_2", "label_comp_id_2", "metric_id", "metric_value"]) as lp: for group, model in system._all_models(): for m in model.qa_metrics: if not isinstance(m, modelcif.qa_metric.LocalPairwise): continue seq1 = m.residue1.asym.entity.sequence seq2 = m.residue2.asym.entity.sequence lp.write(ordinal_id=next(ordinal), model_id=model._id, label_asym_id_1=m.residue1.asym._id, label_seq_id_1=m.residue1.seq_id, label_comp_id_1=seq1[m.residue1.seq_id - 1].id, label_asym_id_2=m.residue2.asym._id, label_seq_id_2=m.residue2.seq_id, label_comp_id_2=seq2[m.residue2.seq_id - 1].id, metric_id=m._id, metric_value=m.value) def dump_metric_feature(self, system, writer): ordinal = itertools.count(1) with writer.loop( "_ma_qa_metric_feature", ["ordinal_id", "model_id", "feature_id", "metric_id", "metric_value"]) as lp: for group, model in system._all_models(): for m in model.qa_metrics: if not isinstance(m, modelcif.qa_metric.Feature): continue lp.write(ordinal_id=next(ordinal), model_id=model._id, feature_id=m.feature._id, metric_id=m._id, metric_value=m.value) def dump_metric_feature_pairwise(self, system, writer): ordinal = itertools.count(1) with writer.loop( "_ma_qa_metric_feature_pairwise", ["ordinal_id", "model_id", "feature_id_1", "feature_id_2", "metric_id", "metric_value"]) as lp: for group, model in system._all_models(): for m in model.qa_metrics: if not isinstance(m, modelcif.qa_metric.FeaturePairwise): continue lp.write(ordinal_id=next(ordinal), model_id=model._id, feature_id_1=m.feature1._id, feature_id_2=m.feature2._id, metric_id=m._id, metric_value=m.value) class _CopyWriter: """Context manager to write loop or category to two mmCIF/BinaryCIF files""" def __init__(self, w1, w2): self.w1, self.w2 = w1, w2 def write(self, *args, **keys): self.w1.write(*args, **keys) self.w2.write(*args, **keys) def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): # This may not correctly handle exceptions raised within the loop self.w1.__exit__(exc_type, exc_value, traceback) self.w2.__exit__(exc_type, exc_value, traceback) class _SystemWriter: """Utility class which normally just passes through to the default ``base_writer``, but outputs selected categories to associated files.""" def __init__(self, base_writer, category_map, copy_category_map): self._base_writer = base_writer self.category_map = category_map self.copy_category_map = copy_category_map def category(self, category): w = self.copy_category_map.get(category) if w: return _CopyWriter(w.category(category), self._base_writer.category(category)) else: w = self.category_map.get(category, self._base_writer) return w.category(category) def loop(self, category, keys): w = self.copy_category_map.get(category) if w: return _CopyWriter(w.loop(category, keys), self._base_writer.loop(category, keys)) else: w = self.category_map.get(category, self._base_writer) return w.loop(category, keys) def end_block(self): # Flush and close all file handles of associated files for w in self.category_map.values(): if not hasattr(w, 'fh'): continue w.flush() w.fh.close() del w.fh # Just pass through to base writer object def flush(self): return self._base_writer.flush() def start_block(self, name): return self._base_writer.start_block(name) def write_comment(self, comment): return self._base_writer.write_comment(comment) class ModelCIFVariant(Variant): """Used to select typical PDBx/ModelCIF file output. See :func:`write` and :class:`ihm.dumper.Variant`.""" _dumpers = [ ihm.dumper._EntryDumper, # must be first ihm.dumper._StructDumper, ihm.dumper._CommentDumper, _AuditConformDumper, _DatabaseDumper, ihm.dumper._CitationDumper, ihm.dumper._SoftwareDumper, _SoftwareGroupDumper, ihm.dumper._AuditAuthorDumper, ihm.dumper._AuditRevisionDumper, ihm.dumper._DataUsageDumper, ihm.dumper._GrantDumper, _ChemCompDumper, _ChemCompDescriptorDumper, ihm.dumper._EntityDumper, ihm.dumper._EntitySrcGenDumper, ihm.dumper._EntitySrcNatDumper, ihm.dumper._EntitySrcSynDumper, ihm.dumper._StructRefDumper, _TargetRefDBDumper, ihm.dumper._EntityPolyDumper, _EntityNonPolyDumper, ihm.dumper._EntityPolySeqDumper, ihm.dumper._StructAsymDumper, ihm.dumper._PolySeqSchemeDumper, ihm.dumper._NonPolySchemeDumper, _DataDumper, _DataGroupDumper, _DataRefDBDumper, _TargetEntityDumper, _TemplateTransformDumper, _AlignmentDumper, _ProtocolDumper, _ModelDumper, _AssociatedDumper, _FeatureDumper, _QAMetricDumper] def get_dumpers(self): return [d() for d in self._dumpers] def get_system_writer(self, system, writer_class, writer): # Get a Writer-like object which outputs selected categories to # associated files (the rest use the default writer) category_map = {} copy_category_map = {} def _all_repo_files(r): for f in r.files: yield f if hasattr(f, 'files'): for subf in f.files: yield subf for r in system.repositories: for f in _all_repo_files(r): if (not hasattr(f, 'categories') or (not f.categories and not f.copy_categories)): continue if f.binary: w = ihm.format_bcif.BinaryCifWriter( open(f.local_path, 'wb')) else: w = ihm.format.CifWriter(open(f.local_path, 'w')) # Write header information to the associated file dumpers = (ihm.dumper._EntryDumper(), _EntryLinkDumper()) # We are passing the File object to the dumpers here where # they expect a System object, but the interfaces are similar # enough, so we don't need a facade object. for d in dumpers: d.finalize(f) for d in dumpers: d.dump(f, w) for c in f.categories: # Allow for categories with or without leading underscore category_map['_' + c.lstrip('_').lower()] = w for c in f.copy_categories: copy_category_map['_' + c.lstrip('_').lower()] = w if category_map or copy_category_map: return _SystemWriter(writer, category_map, copy_category_map) else: # If no categories, we can just use the base writer return writer def write(fh, systems, format='mmCIF', dumpers=[], variant=ModelCIFVariant, check=True): """Write out all `systems` to the file handle `fh`. See :func:`ihm.dumper.write` for more information. The function here behaves similarly but writes out files compliant with the ModelCIF extension directory rather than IHM.""" return ihm.dumper.write(fh, systems, format, dumpers, variant, check=check) python-modelcif-1.5/modelcif/model.py000066400000000000000000000120251506655355100177270ustar00rootroot00000000000000import ihm.representation from ihm.model import Atom, ModelGroup # noqa: F401 import modelcif.data from ihm.util import _check_residue_range # Provide ma-specific docs for Atom Atom.__doc__ = """Coordinates of part of the model represented by an atom. See :meth:`Model.get_atoms` for more details. :param asym_unit: The asymmetric unit that this atom represents :type asym_unit: :class:`modelcif.AsymUnit` :param int seq_id: The residue index represented by this atom (can be None for HETATM sites) :param str atom_id: The name of the atom in the residue :param str type_symbol: Element name :param float x: x coordinate of the atom :param float y: y coordinate of the atom :param float z: z coordinate of the atom :param bool het: True for HETATM sites, False (default) for ATOM :param float biso: Temperature factor or equivalent (if applicable) :param float occupancy: Fraction of the atom type present (if applicable) """ # Provide ma-specific docs for ModelGroup ModelGroup.__doc__ = """A set of related models. See :class:`Model`. It is implemented as a simple list of the models. These objects are typically stored directly in the system; see :attr:`modelcif.System.model_groups`. :param elements: Initial set of models in the group. :param str name: Descriptive name for the group. """ class Model(modelcif.data.Data): """Base class for coordinates of a single structure. Use a subclass such as :class:`HomologyModel` or :class:`AbInitioModel`, or represent a custom model type by creating a new subclass and providing a docstring to describe it, e.g.:: class CustomModel(Model): "custom model type" :param assembly: The :class:`modelcif.AsymUnit` objects that make up this model. :type assembly: :class:`modelcif.Assembly` :param str name: Short name for this model. """ data_content_type = 'model coordinates' model_type = "Other" def __init__(self, assembly, name=None): modelcif.data.Data.__init__(self, name) self.assembly = assembly # Assume everything is atomic for ModelCIF models self.representation = ihm.representation.Representation( [ihm.representation.AtomicSegment(seg, rigid=False) for seg in assembly]) self._atoms = [] #: List of residue ranges that were explicitly not modeled. See #: :class:`NotModeledResidueRange`. self.not_modeled_residue_ranges = [] #: Quality scores for the model or part of it (a simple list of #: metric objects; see :mod:`modelcif.qa_metric`) self.qa_metrics = [] def _get_other_details(self): if (type(self) is not Model and self.model_type == Model.model_type): return self.__doc__.split('\n')[0] other_details = property( _get_other_details, doc="More information about a custom model type. " "By default it is the first line of the docstring.") def get_atoms(self): """Yield :class:`Atom` objects that represent this model. The default implementation simply iterates over an internal list of atoms, but this is not very memory-efficient, particularly if the atoms are already stored somewhere else, e.g. in the software's own data structures. It is recommended to subclass and provide a more efficient implementation. For example, `the modbase_pdb_to_cif script `_ uses a custom ``MyModel`` subclass that creates Atom objects on the fly from PDB ATOM or HETATM lines. """ # noqa: E501 for a in self._atoms: yield a def add_atom(self, atom): self._atoms.append(atom) class HomologyModel(Model): """Coordinates of a single structure generated using homology or comparative modeling. See :class:`Model` for a description of the parameters. """ model_type = "Homology model" other_details = None class AbInitioModel(Model): """Coordinates of a single structure generated using ab initio modeling. See :class:`Model` for a description of the parameters. """ model_type = "Ab initio model" other_details = None class NotModeledResidueRange: """A range of residues that were explicitly not modeled. See :attr:`Model.not_modeled_residue_ranges`. These ranges are not explicitly stored in the mmCIF file, but they will be excluded from the ``pdbx_poly_seq_scheme`` table. :param asym_unit: The asymmetric unit to which the residues belong. :type asym_unit: :class:`~modelcif.AsymUnit` :param int seq_id_begin: Starting residue in the range. :param int seq_id_end: Ending residue in the range. """ def __init__(self, asym_unit, seq_id_begin, seq_id_end): self.asym_unit = asym_unit self.seq_id_begin, self.seq_id_end = seq_id_begin, seq_id_end _check_residue_range((seq_id_begin, seq_id_end), asym_unit.entity) python-modelcif-1.5/modelcif/protocol.py000066400000000000000000000063441506655355100204770ustar00rootroot00000000000000"""Classes for handling modeling protocols. """ class Step: """A single step in a :class:`Protocol`. This class describes a generic step in a modeling protocol. In most cases, a more specific subclass should be used, such as :class:`TemplateSearchStep`, :class:`ModelingStep`, or :class:`ModelSelectionStep`. :param input_data: Any objects that this step takes as input. Any individual :class:`modelcif.data.Data` object (such as a template structure, target sequence, alignment, or model coordinates) can be given here, or a group of such objects (as a :class:`modelcif.data.DataGroup` object) can be passed. :type input_data: :class:`modelcif.data.DataGroup` or :class:`modelcif.data.Data` :param output_data: Any objects that this step creates as output, similarly to ``input_data``. :type output_data: :class:`modelcif.data.DataGroup` or :class:`modelcif.data.Data` :param str name: A short name for this step. :param str details: Longer description of this step. :param software: The software that was employed in this modeling step. :type software: :class:`modelcif.Software` or :class:`modelcif.SoftwareGroup` """ method_type = "other" def __init__(self, input_data, output_data, name=None, details=None, software=None): self.input_data, self.output_data = input_data, output_data self.name, self.details, self.software = name, details, software class TemplateSearchStep(Step): """A modeling protocol step that searches for templates. See :class:`Step` for more details.""" method_type = "template search" class TemplateSelectionStep(Step): """A modeling protocol step that selects one or more templates. See :class:`Step` for more details.""" method_type = "template selection" class TargetTemplateAlignmentStep(Step): """A modeling protocol step that creates a target-template alignment. See :class:`Step` for more details.""" method_type = "target-template alignment" class CoevolutionMSAStep(Step): """A modeling protocol step that creates a coevolution multiple sequence alignment. See :class:`Step` for more details.""" method_type = "coevolution MSA" class ContactPredictionStep(Step): """A modeling protocol step that predicts contacts. See :class:`Step` for more details.""" method_type = "contact prediction" class ModelingStep(Step): """A modeling protocol step that generates model coordinates. See :class:`Step` for more details.""" method_type = "modeling" class ModelSelectionStep(Step): """A modeling protocol step that filters candidates to select models. See :class:`Step` for more details.""" method_type = "model selection" class ModelRefinementStep(Step): """A modeling protocol step that refines models. See :class:`Step` for more details.""" method_type = "model refinement" class Protocol: """A modeling protocol. Each protocol consists of a number of protocol steps.""" def __init__(self): #: All modeling steps (:class:`Step` objects) self.steps = [] python-modelcif-1.5/modelcif/qa_metric.py000066400000000000000000000214431506655355100205770ustar00rootroot00000000000000"""Classes to annotate models with quality scores. To use, first declare a class for the desired score by deriving from both a subclass of :class:`MetricMode` (which defines the part of the system the metric applies to) and a subclass of :class:`MetricType` (which describes the meaning of the score value). Set the ``software`` attribute to point to the software used to calculate the metric (as a :class:`modelcif.SoftwareGroup` or :class:`modelcif.Software` object). For example to declare a global distance score:: class MyScore(modelcif.qa_metric.Global, modelcif.qa_metric.Distance): "My distance-based quality score" software = modelcif.Software(...) The name and description of the score in the mmCIF file will be taken from the name and docstring of the Python class, unless the :attr:`MetricMode.name` or :attr:`MetricMode.description` attributes are overridden in the subclass. QA metric objects should be added to :attr:`modelcif.model.Model.qa_metrics`. """ class MetricMode: """Base class for the mode of a quality metric. Use a derived class such as :class:`Global`, :class:`Local`, :class:`LocalPairwise`, :class:`Feature`, or :class:`FeaturePairwise` for declaring a new score. """ name = property(lambda x: type(x).__name__, doc="Short name of this score. By default it is just the " "class name, but this can be overridden in subclasses " "(for example to create names containing spaces).") description = property(lambda x: x.__doc__.split("\n")[0], doc="Longer text description of this score. By " "default it is the first line of the " "docstring.") class Global(MetricMode): """A score that is calculated per-model. :param float value: The score value (see :class:`MetricType`). """ mode = "global" def __init__(self, value): self.value = value def __repr__(self): return "<%s(value=%r)>" % (type(self).__name__, self.value) class Local(MetricMode): """A score that is calculated on a single residue. :param residue: The residue that is scored. :type residue: :class:`modelcif.Residue` :param float value: The score value (see :class:`MetricType`). """ mode = "local" def __init__(self, residue, value): self.residue = residue self.value = value def __repr__(self): return "<%s(residue=%r, value=%r)>" % (type(self).__name__, self.residue, self.value) class LocalPairwise(MetricMode): """A score that is calculated between two residues. :param residue1: The first residue that is scored. :type residue1: :class:`modelcif.Residue` :param residue2: The second residue that is scored. :type residue2: :class:`modelcif.Residue` :param float value: The score value (see :class:`MetricType`). """ mode = "local-pairwise" def __init__(self, residue1, residue2, value): self.residue1 = residue1 self.residue2 = residue2 self.value = value def __repr__(self): return ("<%s(residue1=%r, residue2=%r, value=%r)>" % (type(self).__name__, self.residue1, self.residue2, self.value)) class Feature(MetricMode): """A score that is calculated on a single feature. :param feature: The feature that is scored. :type feature: :class:`modelcif.Feature` :param float value: The score value (see :class:`MetricType`). """ mode = "per-feature" def __init__(self, feature, value): self.feature = feature self.value = value def __repr__(self): return "<%s(feature=%r, value=%r)>" % (type(self).__name__, self.feature, self.value) _all_features = property(lambda self: (self.feature,)) class FeaturePairwise(MetricMode): """A score that is calculated between two features. :param feature1: The first feature that is scored. :type feature1: :class:`modelcif.Feature` :param feature2: The second feature that is scored. :type feature2: :class:`modelcif.Feature` :param float value: The score value (see :class:`MetricType`). """ mode = "per-feature-pair" def __init__(self, feature1, feature2, value): self.feature1 = feature1 self.feature2 = feature2 self.value = value def __repr__(self): return ("<%s(feature1=%r, feature2=%r, value=%r)>" % (type(self).__name__, self.feature1, self.feature2, self.value)) _all_features = property(lambda self: (self.feature1, self.feature2)) class MetricType: """Base class for the type of a quality metric. Generally a derived class such as :class:`ZScore` or :class:`Distance` is used to declare a new score, but a custom type can also be declared by deriving from this class and providing a docstring to describe the metric type:: class MPQSMetricType(modelcif.qa_metric.MetricType): "composite score, values >1.1 are reliable" """ type = "other" def _get_other_details(self): # Find most derived class of MetricType before we pulled in MetricMode # and use the first line of its docstring as other_details if self.type == MetricType.type: for base in type(self).mro(): if (issubclass(base, MetricType) and base is not MetricType and not issubclass(base, MetricMode)): return base.__doc__.split('\n')[0] other_details = property( _get_other_details, doc="More information about this metric type. By default it is the " "first line of the MetricType subclass docstring.") class ZScore(MetricType): """Score that is the number of standard deviations from optimal/best. See :class:`MetricType` for more information.""" type = "zscore" other_details = None class Energy(MetricType): """Energy score (the lower the energy, the better the quality). See :class:`MetricType` for more information.""" type = "energy" other_details = None class Distance(MetricType): """Distance score (the lower the distance, the better the quality). See :class:`MetricType` for more information.""" type = "distance" other_details = None class NormalizedScore(MetricType): """Normalized score ranging from 0 to 1. See :class:`MetricType` for more information.""" type = "normalized score" other_details = None class PAE(MetricType): """Score that is a predicted aligned error. See :class:`MetricType` for more information.""" type = "PAE" other_details = None class ContactProbability(MetricType): """Score that is a contact probability of a pairwise interaction. See :class:`MetricType` for more information.""" type = "contact probability" other_details = None class PLDDT(MetricType): """Predicted lDDT-CA score in [0,100] (higher score, means better accuracy). See :class:`MetricType` for more information.""" type = "pLDDT" other_details = None class PLDDT01(MetricType): """Predicted lDDT-CA score in [0,1] (higher score, means better accuracy). See :class:`MetricType` for more information.""" type = "pLDDT in [0,1]" other_details = None class PLDDTAllAtom(MetricType): """Predicted lDDT all atom score in [0,100] (higher score, means better accuracy). See :class:`MetricType` for more information.""" type = "pLDDT all-atom" other_details = None class PLDDTAllAtom01(MetricType): """Predicted lDDT all atom score in [0,1] (higher score, means better accuracy). See :class:`MetricType` for more information.""" type = "pLDDT all-atom in [0,1]" other_details = None class PLDDTToPolymer(MetricType): """Predicted lDDT with distances from each atom to CA or C1' of nearby polymer residues [0,100] (higher score, means better accuracy). See :class:`MetricType` for more information.""" type = "pLDDT to polymer" other_details = None class PTM(MetricType): """Predicted TM-score in [0,1] (higher value means higher confidence). See :class:`MetricType` for more information.""" type = "pTM" other_details = None class IpTM(MetricType): """Protein-protein interface score, based on TM-score in [0,1]. See :class:`MetricType` for more information.""" type = "ipTM" other_details = None class Boolean(MetricType): """0 or 1 depending on whether a check passed. See :class:`MetricType` for more information.""" type = "boolean" other_details = None python-modelcif-1.5/modelcif/reader.py000066400000000000000000001377501506655355100201060ustar00rootroot00000000000000"""Utility classes to read in information in mmCIF or BinaryCIF format""" import modelcif import modelcif.model import modelcif.protocol import modelcif.qa_metric import modelcif.alignment import modelcif.reference import modelcif.associated import modelcif.descriptor import ihm import ihm.source import ihm.reader from ihm.reader import Variant, Handler, IDMapper, _ChemCompIDMapper from ihm.reader import OldFileError, _make_new_entity from datetime import date import posixpath import operator import inspect import collections import functools import warnings def _get_date(iso_date_str): """Get a datetime.date obj for a string in isoformat.""" if iso_date_str is None: return None return date(int(iso_date_str[0:4]), int(iso_date_str[5:7]), int(iso_date_str[8:10])) class _AuditConformHandler(Handler): category = '_audit_conform' def __call__(self, dict_name, dict_version): # Reject old file versions if we can parse the version if dict_name == "mmcif_ma.dic": try: major, minor, _ = [int(x) for x in dict_version.split('.')] if (major, minor) < (1, 3): raise OldFileError( "This version of python-modelcif only supports " "reading files that conform to version 1.3 or later " "of the ModelCIF extension dictionary. This file " "conforms to version %s." % dict_version) except ValueError: pass class _ReferenceIDMapper(IDMapper): """Add extra handling to IDMapper for ihm.reference.Reference objects""" def _make_new_object(self, newcls=None): if newcls is None or newcls is ihm.reference.Sequence: return self._cls(*(None,) * 4) else: return newcls(*(None,) * 3) class _TemplateIDMapper(IDMapper): """Add extra handling to IDMapper for modelcif.Template objects""" def _update_old_object(self, obj, newcls=None): super(_TemplateIDMapper, self)._update_old_object(obj, newcls) # Add missing members if the wrong class was originally instantianted if newcls is modelcif.CustomTemplate and not hasattr(obj, 'atoms'): obj.details = None obj.atoms = [] class _FeatureIDMapper(IDMapper): """Add extra handling to IDMapper for QA metric features""" def _make_new_object(self, newcls=None): if newcls is None: # Make Feature base class (takes no args) return self._cls() else: # Make subclass (takes one atoms/residues/asyms argument) return newcls([]) def _update_old_object(self, obj, newcls=None): super(_FeatureIDMapper, self)._update_old_object(obj, newcls) # Add missing members if the base class was originally instantianted if (newcls is modelcif.PolyResidueFeature and not hasattr(obj, 'residues')): obj.residues = [] elif newcls is modelcif.AtomFeature and not hasattr(obj, 'atoms'): obj.atoms = [] elif (newcls is modelcif.EntityInstanceFeature and not hasattr(obj, 'asym_units')): obj.asym_units = [] class _SystemReader: def __init__(self, model_class, starting_model_class, system=None): self.system = system or modelcif.System() #: Mapping from ID to :class:`ihm.Software` objects self.software = IDMapper(self.system.software, ihm.Software, *(None,) * 4) #: Mapping from ID to :class:`ihm.Citation` objects self.citations = IDMapper(self.system.citations, ihm.Citation, *(None,) * 8) #: Mapping from ID to :class:`ihm.Revision` objects self.revisions = IDMapper(self.system.revisions, ihm.Revision, *(None,) * 4) #: Mapping from ID to :class:`ihm.Entity` objects self.entities = IDMapper(self.system.entities, _make_new_entity) #: Mapping from ID to :class:`ihm.source.Manipulated` objects self.src_gens = IDMapper(None, ihm.source.Manipulated) #: Mapping from ID to :class:`ihm.source.Natural` objects self.src_nats = IDMapper(None, ihm.source.Natural) #: Mapping from ID to :class:`ihm.source.Synthetic` objects self.src_syns = IDMapper(None, ihm.source.Synthetic) #: Mapping from ID to :class:`ihm.AsymUnit` objects self.asym_units = IDMapper(self.system.asym_units, ihm.AsymUnit, None) #: Mapping from ID to :class:`ihm.ChemComp` objects self.chem_comps = _ChemCompIDMapper(self.system._orphan_chem_comps, ihm.ChemComp, *(None,) * 3) self.software_groups = IDMapper(self.system.software_groups, modelcif.SoftwareGroup) self.default_data_by_id = {} self.data_by_id = {} self.data_groups = IDMapper(self.system.data_groups, modelcif.data.DataGroup) self.transformations = IDMapper(self.system.template_transformations, modelcif.Transformation, *(None,) * 2) self.templates = _TemplateIDMapper( self.system.templates, modelcif.Template, *(None,) * 4) self.template_segments = IDMapper( self.system.template_segments, modelcif.TemplateSegment, *(None,) * 4) self.default_model_class = model_class is modelcif.model.Model self._all_seen_models = [] for group, model in self.system._all_models(): self._all_seen_models.append(model) self.models = IDMapper(self._all_seen_models, model_class, [], None) self.model_groups = IDMapper(self.system.model_groups, modelcif.model.ModelGroup) self.assemblies = IDMapper(self.system.assemblies, modelcif.Assembly) self.protocols = IDMapper(self.system.protocols, modelcif.protocol.Protocol) self.references = _ReferenceIDMapper(None, ihm.reference.Sequence) self.alignments = IDMapper(None, ihm.reference.Alignment) self.features = _FeatureIDMapper(None, modelcif.Feature) self.assoc_by_id = {} self.qa_by_id = self.system._qa_by_id self.software_parameters = collections.defaultdict(list) self.alignment_pairs = collections.defaultdict(list) self.alignment_seqs = collections.defaultdict(list) # Correspondence between target and template sequence ranges self.target_template_poly_mapping = {} # Correspondence between target and template chains self.target_asym_for_template = {} # Mapping from Entity to bool ma_model_mode flag self.ma_model_mode_map = {} def finalize(self): # make sequence immutable (see also _make_new_entity) for e in self.system.entities: e.sequence = tuple(e.sequence) # If no Assembly is provided, assume each model consists of all Asyms # Assume everything in every Model is atomic for mg in self.system.model_groups: for m in mg: if not m.assembly: m.assembly.extend(self.system.asym_units[:]) m.representation = ihm.representation.Representation( [ihm.representation.AtomicSegment(seg, rigid=False) for seg in m.assembly]) class _DatabaseHandler(Handler): category = '_database_2' def __call__(self, database_code, database_id): self.system.database = modelcif.Database( id=database_id, code=database_code) class _ChemCompHandler(Handler): """Similar to ihm.reader._ChemCompHandler but also handles the ma_provenance data item""" category = '_chem_comp' _prov_map = {'ccd core': 'core', 'ccd ma': 'ma', 'ccd local': 'local'} def __init__(self, *args): super(_ChemCompHandler, self).__init__(*args) # Map _chem_comp.type to corresponding subclass of ihm.ChemComp self.type_map = dict((x[1].type.lower(), x[1]) for x in inspect.getmembers(ihm, inspect.isclass) if issubclass(x[1], ihm.ChemComp)) def __call__(self, type, id, name, formula, ma_provenance): typ = 'other' if type is None else type.lower() s = self.sysr.chem_comps.get_by_id( id, self.type_map.get(typ, ihm.ChemComp)) self.copy_if_present(s, locals(), keys=('name', 'formula')) if ma_provenance: s.ccd = self._prov_map.get(ma_provenance.lower()) class _ChemCompDescriptorHandler(Handler): category = '_ma_chem_comp_descriptor' def __init__(self, *args): super(_ChemCompDescriptorHandler, self).__init__(*args) # Map _chem_comp_descriptor.type to corresponding subclass of # modelcif.descriptor.Descriptor self._type_map = dict( (x[1].type.lower(), x[1]) for x in inspect.getmembers(modelcif.descriptor, inspect.isclass) if issubclass(x[1], modelcif.descriptor.Descriptor) and x[1] is not modelcif.descriptor.Descriptor) def __call__(self, chem_comp_id, type, value, details, software_id): s = self.sysr.chem_comps.get_by_id(chem_comp_id) type_class = self._type_map.get( type.lower(), modelcif.descriptor.Descriptor) software = self.sysr.software.get_by_id_or_none(software_id) desc = type_class(value=value, details=details, software=software) if not hasattr(s, 'descriptors') or not s.descriptors: s.descriptors = [] s.descriptors.append(desc) class _EntityNonPolyHandler(Handler): category = '_pdbx_entity_nonpoly' _mmmap = {'explicit': True, 'implicit': False} def __call__(self, entity_id, comp_id, ma_model_mode): s = self.sysr.entities.get_by_id(entity_id) s.sequence = (self.sysr.chem_comps.get_by_id(comp_id),) if ma_model_mode in (None, ihm.unknown): self.sysr.ma_model_mode_map[s] = ma_model_mode else: self.sysr.ma_model_mode_map[s] = self._mmmap.get( ma_model_mode.lower()) class _TemplatePolyHandler(Handler): category = '_ma_template_poly' def __init__(self, sysr): super(_TemplatePolyHandler, self).__init__(sysr) # Use python-ihm's _EntityPolyHandler to do most of the work here. # Note that we use Entity objects to store the sequence of the # templates, but template Entities are *not* stored in the mmCIF # file, so the 'entity ID' here is actually template_id and we # don't look the Entity object up with sysr.entities.get_by_id(). self._eph = ihm.reader._EntityPolyHandler(sysr) def __call__(self, template_id, seq_one_letter_code, seq_one_letter_code_can): self._eph(entity_id=template_id, type=None, pdbx_seq_one_letter_code=seq_one_letter_code, pdbx_seq_one_letter_code_can=seq_one_letter_code_can) def finalize(self): # No type given in _ma_template_poly, so assume amino acid alphabet = ihm.LPeptideAlphabet() def _get_seq(codes, codes_can): for i, code in enumerate(codes): # Hopefully non-standard codes are listed in chem_comp table if code in alphabet: yield alphabet[code] else: comp = self.sysr.chem_comps.get_by_id(code) # chem_comp table doesn't define one-letter codes, so # fill them in here if we have them comp.code = code if i < len(codes_can): comp.code_canonical = codes_can[i] yield comp for t in self.system.templates: ei = self._eph._entity_info.get(t._id) if ei: t.entity = ihm.Entity(sequence=_get_seq(ei.one_letter, ei.one_letter_can)) class _TemplateNonPolyHandler(Handler): category = '_ma_template_non_poly' def __call__(self, template_id, comp_id, details): template = self.sysr.templates.get_by_id(template_id) seq = (self.sysr.chem_comps.get_by_id(comp_id),) template.entity = ihm.Entity(sequence=seq, description=details) class _SoftwareGroupHandler(Handler): category = '_ma_software_group' def __call__(self, group_id, software_id, parameter_group_id): g = self.sysr.software_groups.get_by_id(group_id) s = self.sysr.software.get_by_id(software_id) # Don't need to handle None or ihm.unknown specially here; this will # map them to an empty list parameters = self.sysr.software_parameters[parameter_group_id] if parameters: s = modelcif.SoftwareWithParameters(software=s, parameters=parameters) g.append(s) class _SoftwareParameterHandler(Handler): category = '_ma_software_parameter' def _get_int_list(self, value): return [int(x) for x in value.split(',')] def _get_float_list(self, value): return [float(x) for x in value.split(',')] def __call__(self, group_id, data_type, name, value, description): type_map = {"integer": self.get_int, "float": self.get_float, "boolean": self.get_bool, "string": str, "integer-csv": self._get_int_list, "float-csv": self._get_float_list} pg = self.sysr.software_parameters[group_id] converter = type_map.get(data_type, str) p = modelcif.SoftwareParameter(name=name, value=converter(value), description=description) pg.append(p) class _DataHandler(Handler): category = '_ma_data' def __call__(self, id, name, content_type_other_details): d = modelcif.data.Data(name=name, details=content_type_other_details) d._data_id = id self.sysr.default_data_by_id[id] = d def finalize(self): for data_id, defdata in self.sysr.default_data_by_id.items(): data = self.sysr.data_by_id.get(data_id) if not data: # Add placeholder Data if only referenced in ma_data self.sysr.data_by_id[data_id] = defdata elif hasattr(data, 'name') and not data.name: # Add data-specific fields if they are present in ma_data # but not elsewhere data.name = defdata.name self.system.data[:] = sorted(self.sysr.data_by_id.values(), key=operator.attrgetter('_data_id')) for g in self.system.data_groups: g[:] = [self.sysr.data_by_id.get(x) for x in g] class _DataGroupHandler(Handler): category = '_ma_data_group' def __call__(self, group_id, data_id): g = self.sysr.data_groups.get_by_id(group_id) # fill in real Data objects at _DataHandler.finalize time g.append(data_id) class _DataRefDBHandler(Handler): category = '_ma_data_ref_db' def __call__(self, data_id, name, location_url, version, release_date): d = modelcif.ReferenceDatabase( name=name, url=location_url, version=version, release_date=_get_date(release_date)) self.sysr.data_by_id[data_id] = d d._data_id = data_id class _EnumerationMapper: """Map an mmCIF enumerated value to the corresponding Python class""" def __init__(self, module, base_class, attr="name"): self._base_class = base_class self._other_name = getattr(base_class, attr).upper() self._attr = attr self._map = dict( (getattr(x[1], attr).upper(), x[1]) for x in inspect.getmembers(module, inspect.isclass) if issubclass(x[1], base_class) and x[1] is not base_class) self._other_map = {} def get(self, name, other_det): """Get the Python class that matches the given name and other_details""" name = name.upper() typ = self._map.get(name) if typ: return typ # If name is not Other this is an enumeration value we don't have # a class for; make and cache a new class for the given name: if name != self._other_name: class ExtraType(self._base_class): other_details = None setattr(ExtraType, self._attr, name) self._map[name] = ExtraType return ExtraType # If name is "Other" then treat other_details as the key other_det_up = other_det if other_det is None else other_det.upper() if other_det_up not in self._other_map: class CustomType(self._base_class): other_details = other_det __doc__ = other_det self._other_map[other_det_up] = CustomType return self._other_map[other_det_up] class _TargetEntityHandler(Handler): category = '_ma_target_entity' def __call__(self, entity_id, data_id): e = self.sysr.entities.get_by_id(entity_id) self.sysr.data_by_id[data_id] = e e._data_id = data_id class _TargetRefDBHandler(Handler): category = '_ma_target_ref_db_details' def __init__(self, *args): super(_TargetRefDBHandler, self).__init__(*args) # Map db_name to subclass of modelcif.reference.TargetReference self.type_map = _EnumerationMapper(modelcif.reference, modelcif.reference.TargetReference) def __call__(self, target_entity_id, db_name, db_name_other_details, db_code, db_accession, seq_db_isoform, seq_db_align_begin: int, seq_db_align_end: int, ncbi_taxonomy_id, organism_scientific, seq_db_sequence_version_date, seq_db_sequence_checksum, is_primary: bool): e = self.sysr.entities.get_by_id(target_entity_id) typ = self.type_map.get(db_name, db_name_other_details) with warnings.catch_warnings(): warnings.simplefilter("ignore") ref = typ(code=db_code, accession=db_accession, align_begin=seq_db_align_begin, align_end=seq_db_align_end, isoform=seq_db_isoform, ncbi_taxonomy_id=ncbi_taxonomy_id, organism_scientific=organism_scientific, sequence_version_date=_get_date( seq_db_sequence_version_date), sequence_crc64=seq_db_sequence_checksum, is_primary=is_primary) e.references.append(ref) def finalize(self): # Combine information from struct_ref (ihm.reference objects) # with that from _ma_target_ref_db_details (modelcif.reference). # Use db_name/db_code/accession as the key. # We start with two distinct lists, as python-ihm uses struct_ref.id # as the key, which _ma_target_ref_db_details does not use. for e in self.system.entities: ihm_refs = [r for r in e.references if not isinstance(r, modelcif.reference.TargetReference)] ma_refs = [r for r in e.references if isinstance(r, modelcif.reference.TargetReference)] e.references = ma_refs ma_refs = dict(((r.db_name, r.db_code, r.accession), r) for r in ma_refs) for ir in ihm_refs: k = (ir.db_name, ir.db_code, ir.accession) mr = ma_refs.get(k) if mr is None: # Change type from ihm to modelcif class typ = self.type_map.get(ir.db_name, None) ir.__class__ = typ e.references.append(ir) # Add missing fields only present in modelcif class ir.isoform = ir.ncbi_taxonomy_id = None ir.organism_scientific = ir.sequence_version_date = None ir.sequence_crc64 = None ir.align_begin = ir.align_end = ir.is_primary = None else: # Add struct_ref info to corresponding modelcif object mr.sequence = ir.sequence mr.details = ir.details mr.alignments = ir.alignments class _TransformationHandler(Handler): category = '_ma_template_trans_matrix' def __call__(self, id, tr_vector1, tr_vector2, tr_vector3, rot_matrix11, rot_matrix21, rot_matrix31, rot_matrix12, rot_matrix22, rot_matrix32, rot_matrix13, rot_matrix23, rot_matrix33): t = self.sysr.transformations.get_by_id(id) t.rot_matrix = ihm.reader._get_matrix33(locals(), 'rot_matrix') t.tr_vector = ihm.reader._get_vector3(locals(), 'tr_vector') class _TemplateDetailsHandler(Handler): category = '_ma_template_details' def __call__(self, template_id, template_trans_matrix_id, template_data_id, target_asym_id, template_label_asym_id, template_label_entity_id, template_model_num: int, template_auth_asym_id, template_origin): newcls = None if template_origin and template_origin.lower() == 'customized': newcls = modelcif.CustomTemplate template = self.sysr.templates.get_by_id(template_id, newcls) template.transformation = self.sysr.transformations.get_by_id( template_trans_matrix_id) # Add empty sequence (hopefully will fill in from _ma_template_poly # or _ma_template_non_poly) template.entity = ihm.Entity([]) template.entity_id = template_label_entity_id template.asym_id = template_label_asym_id template.model_num = template_model_num template._strand_id = template_auth_asym_id self.sysr.data_by_id[template_data_id] = template template._data_id = template_data_id self.sysr.target_asym_for_template[template_id] = target_asym_id class _TemplateRefDBHandler(Handler): category = '_ma_template_ref_db_details' def __init__(self, *args): super(_TemplateRefDBHandler, self).__init__(*args) # Map db_name to subclass of modelcif.reference.TemplateReference self.type_map = _EnumerationMapper( modelcif.reference, modelcif.reference.TemplateReference) def __call__(self, template_id, db_name, db_name_other_details, db_accession_code, db_version_date): t = self.sysr.templates.get_by_id(template_id) typ = self.type_map.get(db_name, db_name_other_details) ref = typ(accession=db_accession_code, db_version_date=_get_date(db_version_date)) t.references.append(ref) class _TemplatePolySegmentHandler(Handler): category = '_ma_template_poly_segment' def __call__(self, id, template_id, residue_number_begin, residue_number_end): segment = self.sysr.template_segments.get_by_id(id) segment.template = self.sysr.templates.get_by_id(template_id) segment.seq_id_range = (int(residue_number_begin), int(residue_number_end)) class _TemplateCustomizedHandler(Handler): category = '_ma_template_customized' def __call__(self, template_id, details): template = self.sysr.templates.get_by_id(template_id, modelcif.CustomTemplate) template.details = details class _TemplateCoordHandler(Handler): category = '_ma_template_coord' def __call__(self, template_id, group_pdb, type_symbol, label_atom_id, label_seq_id: int, auth_seq_id: int, auth_atom_id, auth_comp_id, cartn_x: float, cartn_y: float, cartn_z: float, occupancy: float, b_iso_or_equiv: float, formal_charge: float): template = self.sysr.templates.get_by_id(template_id, modelcif.CustomTemplate) atom = modelcif.TemplateAtom( het=group_pdb is not None and group_pdb != 'ATOM', type_symbol=type_symbol, atom_id=label_atom_id, seq_id=label_seq_id, auth_seq_id=auth_seq_id, auth_atom_id=auth_atom_id, auth_comp_id=auth_comp_id, x=cartn_x, y=cartn_y, z=cartn_z, occupancy=occupancy, biso=b_iso_or_equiv, charge=formal_charge) template.atoms.append(atom) def _get_align_class(type_class, mode_class, align_class_map): """Create and return a new class to represent an alignment""" k = (type_class, mode_class) if k not in align_class_map: class Alignment(type_class, mode_class): pass align_class_map[k] = Alignment return align_class_map[k] class _AlignmentInfoHandler(Handler): category = '_ma_alignment_info' def __init__(self, *args): super(_AlignmentInfoHandler, self).__init__(*args) # Map type to subclass of modelcif.alignment.AlignmentType self._type_map = dict( (x[1].type.upper(), x[1]) for x in inspect.getmembers(modelcif.alignment, inspect.isclass) if issubclass(x[1], modelcif.alignment.AlignmentType) and x[1] is not modelcif.alignment.AlignmentType) # Map mode to subclass of modelcif.alignment.AlignmentMode self._mode_map = dict( (x[1].mode.upper(), x[1]) for x in inspect.getmembers(modelcif.alignment, inspect.isclass) if issubclass(x[1], modelcif.alignment.AlignmentMode) and x[1] is not modelcif.alignment.AlignmentMode) # Cache created Alignment classes self._align_class_map = {} def __call__(self, alignment_id, data_id, software_group_id, alignment_type, alignment_mode): type_class = self._type_map.get( alignment_type.upper(), modelcif.alignment.AlignmentType) mode_class = self._mode_map.get( alignment_mode.upper(), modelcif.alignment.AlignmentMode) software = self.sysr.software_groups.get_by_id_or_none( software_group_id) align_class = _get_align_class(type_class, mode_class, self._align_class_map) alignment = align_class(name=None, pairs=[], software=software) alignment._id = alignment_id self.sysr.data_by_id[data_id] = alignment alignment._data_id = data_id self.sysr.system.alignments.append(alignment) def finalize(self): for aln in self.sysr.system.alignments: for pair in self.sysr.alignment_pairs[aln._id]: k = (pair.template._id, pair.target.asym._id) pair.target.seq_id_range = \ self.sysr.target_template_poly_mapping.get(k) aln.pairs.append(pair) # todo: handle multiple alignments, multiple templates for flag, sequence in self.sysr.alignment_seqs[aln._id]: if flag == '2': # template aln.pairs[0].template.gapped_sequence = sequence else: # target aln.pairs[0].target.gapped_sequence = sequence # Handle nonpolymer templates for tmpl_id, tgt_asym_id in self.sysr.target_asym_for_template.items(): template = self.sysr.templates.get_by_id(tmpl_id) if not template.entity.is_polymeric(): asym = self.sysr.asym_units.get_by_id(tgt_asym_id) asym.__class__ = modelcif.NonPolymerFromTemplate asym.template = template asym.explicit = self.sysr.ma_model_mode_map.get( template.entity) class _AlignmentHandler(Handler): category = '_ma_alignment' def __call__(self, alignment_id, target_template_flag, sequence): # Remember for later; processed by AlignmentInfoHandler.finalize() self.sysr.alignment_seqs[alignment_id].append((target_template_flag, sequence)) class _AlignmentDetailsHandler(Handler): category = '_ma_alignment_details' def __init__(self, *args): super(_AlignmentDetailsHandler, self).__init__(*args) # Map denom to subclass of modelcif.alignment.Identity self._ident_map = _EnumerationMapper( modelcif.alignment, modelcif.alignment.Identity, attr='denominator') # Map score_type to subclass of modelcif.alignment.Score self._score_map = _EnumerationMapper( modelcif.alignment, modelcif.alignment.Score, attr='type') def __call__(self, alignment_id, template_segment_id, target_asym_id, score_type, score_type_other_details, score_value: float, percent_sequence_identity: float, sequence_identity_denominator, sequence_identity_denominator_other_details): if score_type: score_class = self._score_map.get(score_type, score_type_other_details) score = score_class(score_value) else: score = None if sequence_identity_denominator: ident_class = self._ident_map.get( sequence_identity_denominator, sequence_identity_denominator_other_details) ident = ident_class(percent_sequence_identity) else: ident = None template = self.sysr.template_segments.get_by_id(template_segment_id) asym = self.sysr.asym_units.get_by_id(target_asym_id) # We don't know the target segment yet (will be filled in at finalize # time from the ma_target_template_poly_mapping and ma_alignment # tables) tgt_seg = asym.segment(gapped_sequence=None, seq_id_begin=None, seq_id_end=None) p = modelcif.alignment.Pair(template=template, target=tgt_seg, identity=ident, score=score) # Cannot add to alignment yet as it might not exist; remember for # now and we'll add in finalize() of AlignmentInfoHandler self.sysr.alignment_pairs[alignment_id].append(p) class _TargetTemplatePolyMappingHandler(Handler): category = '_ma_target_template_poly_mapping' def __call__(self, template_segment_id, target_asym_id, target_seq_id_begin: int, target_seq_id_end: int): k = (template_segment_id, target_asym_id) rng = (target_seq_id_begin, target_seq_id_end) # Remember for now and we'll add in finalize() of AlignmentInfoHandler self.sysr.target_template_poly_mapping[k] = rng class _AssemblyHandler(Handler): category = '_ma_struct_assembly' def __call__(self, assembly_id, asym_id, seq_id_begin, seq_id_end): a = self.sysr.assemblies.get_by_id(assembly_id) asym = self.sysr.asym_units.get_by_id(asym_id) if seq_id_begin is None and seq_id_end is None: a.append(asym) else: a.append(asym(int(seq_id_begin), int(seq_id_end))) def finalize(self): # Any AsymUnitRange which covers an entire asym, # replace with AsymUnit object for a in self.system.assemblies: a[:] = [self._handle_component(x) for x in a] def _handle_component(self, comp): if isinstance(comp, modelcif.AsymUnitRange) \ and comp.seq_id_range == comp.asym.seq_id_range: return comp.asym else: return comp class _AssemblyDetailsHandler(Handler): category = '_ma_struct_assembly_details' def __call__(self, assembly_id, assembly_name, assembly_description): a = self.sysr.assemblies.get_by_id(assembly_id) a.name = assembly_name a.description = assembly_description class _ModelListHandler(Handler): category = '_ma_model_list' def __init__(self, *args): super(_ModelListHandler, self).__init__(*args) # Map model_type to subclass of modelcif.model.Model self._type_map = _EnumerationMapper( modelcif.model, modelcif.model.Model, attr='model_type') # Old-style model groups self._old_group_for_model = {} def finalize(self): # Put all models not in a group in their own group models_in_groups = frozenset(m._id for mg in self.system.model_groups for m in mg) # Get ungrouped models in the order encountered in the file ungrouped = [] for m in self.sysr._all_seen_models: if m._id not in models_in_groups: # Use old-style group if present mg = self._old_group_for_model.get(m._id) if mg is None: ungrouped.append(m) else: mg.append(m) if ungrouped: mg = modelcif.model.ModelGroup(ungrouped) self.system.model_groups.append(mg) def __call__(self, ordinal_id, model_group_id, model_name, model_group_name, assembly_id, data_id, model_type, model_type_other_details): if self.sysr.default_model_class: model_type = self._type_map.get( model_type, model_type_other_details) model = self.sysr.models.get_by_id(ordinal_id, model_type) else: model = self.sysr.models.get_by_id(ordinal_id) model.model_type = model_type model.name = model_name self.sysr.data_by_id[data_id] = model model._data_id = data_id model.assembly = self.sysr.assemblies.get_by_id(assembly_id) # Get group info if present (old dictionary) if model_group_id not in (None, ihm.unknown): mg = self.sysr.model_groups.get_by_id(model_group_id) mg.name = model_group_name self._old_group_for_model[ordinal_id] = mg class _ModelGroupHandler(Handler): category = '_ma_model_group' def __call__(self, id, name, details): model_group = self.sysr.model_groups.get_by_id(id) self.copy_if_present(model_group, locals(), keys=('name', 'details')) class _ModelGroupLinkHandler(Handler): category = '_ma_model_group_link' def __call__(self, group_id, model_id): model_group = self.sysr.model_groups.get_by_id(group_id) model = self.sysr.models.get_by_id(model_id) model_group.append(model) class _ProtocolHandler(Handler): category = '_ma_protocol_step' def __init__(self, *args): super(_ProtocolHandler, self).__init__(*args) # Map method_type to subclass of modelcif.protocol.Step self._method_map = dict( (x[1].method_type.upper(), x[1]) for x in inspect.getmembers(modelcif.protocol, inspect.isclass) if issubclass(x[1], modelcif.protocol.Step) and x[1] is not modelcif.protocol.Step) def __call__(self, protocol_id, method_type, step_name, details, software_group_id, input_data_group_id, output_data_group_id): p = self.sysr.protocols.get_by_id(protocol_id) stepcls = self._method_map.get(method_type.upper(), modelcif.protocol.Step) indata = self.sysr.data_groups.get_by_id(input_data_group_id) outdata = self.sysr.data_groups.get_by_id(output_data_group_id) software = self.sysr.software_groups.get_by_id_or_none( software_group_id) step = stepcls(input_data=indata, output_data=outdata, name=step_name, details=details, software=software) p.steps.append(step) def _get_assoc_type_maps(): # Get a mapping from (file_content,file_format) to a subclass of # modelcif.associated.File cs = [x[1] for x in inspect.getmembers(modelcif.associated, inspect.isclass) if issubclass(x[1], modelcif.associated.File) and x[1] is not modelcif.associated.File] _type_map = dict( ((x.file_content.upper(), x.file_format.upper()), x) for x in cs if not hasattr(x, '_binary_ff_map')) # Do the same thing for classes that take a 'binary' argument _bin_type_map = {} for x in cs: if not hasattr(x, '_binary_ff_map'): continue file_content = x.file_content.upper() for binary, file_format in x._binary_ff_map.items(): _bin_type_map[(file_content, file_format.upper())] = (x, binary) return _type_map, _bin_type_map def _get_assoc_class(file_content, file_format, type_map, binary_type_map): # Use previous mapping to get a subclass of modelcif.associated.File # from (file_content, file_format) # Map deprecated file_content to new equivalent if file_content.upper() == 'LOCAL PAIRWISE QA SCORES': file_content = 'QA METRICS' k = (file_content.upper(), file_format.upper()) filecls_bin = binary_type_map.get(k) if filecls_bin: filecls, binary = filecls_bin return functools.partial(filecls, binary=binary) else: return type_map.get(k, modelcif.associated.File) class _AssociatedHandler(Handler): category = '_ma_entry_associated_files' def __init__(self, *args): super(_AssociatedHandler, self).__init__(*args) self._repos_by_root = {} self._type_map, self._binary_type_map = _get_assoc_type_maps() def __call__(self, id, file_url, file_type, file_format, file_content, details, data_id): filecls = _get_assoc_class( file_content, file_format, self._type_map, self._binary_type_map) # Assume everything before last slash (if any) is URL root url_root, path = posixpath.split(file_url) url_root = url_root or None r = self._repos_by_root.get(url_root) if not r: r = modelcif.associated.Repository(url_root=url_root, files=[]) self._repos_by_root[url_root] = r self.system.repositories.append(r) c = filecls(path=path, details=details, data=data_id) r.files.append(c) self.sysr.assoc_by_id[id] = c def finalize(self): # Map data_id to Data objects for repo in self.system.repositories: for f in repo.files: f.data = self.sysr.data_by_id.get(f.data) class _AssociatedArchiveHandler(Handler): category = '_ma_associated_archive_file_details' def __init__(self, *args): super(_AssociatedArchiveHandler, self).__init__(*args) self._type_map, self._binary_type_map = _get_assoc_type_maps() self._archive_files = collections.defaultdict(list) def __call__(self, id, archive_file_id, file_path, file_format, file_content, description, data_id): filecls = _get_assoc_class( file_content, file_format, self._type_map, self._binary_type_map) c = filecls(path=file_path, details=description, data=data_id) # Top-level archive file might not exist yet self._archive_files[archive_file_id].append(c) def finalize(self): # Put files in archives for archive_file_id, files in self._archive_files.items(): archive = self.sysr.assoc_by_id.get(archive_file_id) if archive: # Map data_id to Data objects for f in files: f.data = self.sysr.data_by_id.get(f.data) archive.files = files class _FeatureListHandler(Handler): category = '_ma_feature_list' def __call__(self, feature_id, details): if details: f = self.sysr.features.get_by_id(feature_id) f.details = details class _AtomFeatureHandler(Handler): category = '_ma_atom_feature' def __call__(self, feature_id, atom_id): f = self.sysr.features.get_by_id(feature_id, modelcif.AtomFeature) f.atoms.append(atom_id) class _PolyResidueFeatureHandler(Handler): category = '_ma_poly_residue_feature' def __call__(self, feature_id, label_seq_id: int, label_asym_id): f = self.sysr.features.get_by_id( feature_id, modelcif.PolyResidueFeature) asym = self.sysr.asym_units.get_by_id(label_asym_id) f.residues.append(asym.residue(label_seq_id)) class _EntityInstanceFeatureHandler(Handler): category = '_ma_entity_instance_feature' def __call__(self, feature_id, label_asym_id): f = self.sysr.features.get_by_id( feature_id, modelcif.EntityInstanceFeature) asym = self.sysr.asym_units.get_by_id(label_asym_id) f.asym_units.append(asym) def _make_qa_class(type_class, mode_class, p_name, p_description, p_software): """Create and return a new class to represent a QA metric""" class QA(type_class, mode_class): name = p_name __doc__ = description = p_description software = p_software QA.__name__ = p_name return QA class _QAMetricHandler(Handler): category = '_ma_qa_metric' def __init__(self, *args): super(_QAMetricHandler, self).__init__(*args) # Map mode to subclass of modelcif.qa_metric.MetricMode self._mode_map = dict( (x[1].mode.upper(), x[1]) for x in inspect.getmembers(modelcif.qa_metric, inspect.isclass) if issubclass(x[1], modelcif.qa_metric.MetricMode) and x[1] is not modelcif.qa_metric.MetricMode) # Map type to subclass of modelcif.qa_metric.MetricType # (also allow user-defined "other" classes) self._type_map = _EnumerationMapper( modelcif.qa_metric, modelcif.qa_metric.MetricType, attr="type") def __call__(self, id, name, description, type, mode, type_other_details, software_group_id): type_class = self._type_map.get(type, type_other_details) mode_class = self._mode_map.get(mode.upper(), modelcif.qa_metric.MetricMode) software = self.sysr.software_groups.get_by_id_or_none( software_group_id) self.sysr.qa_by_id[id] = _make_qa_class( type_class, mode_class, name, description, software) class _QAMetricGlobalHandler(Handler): category = '_ma_qa_metric_global' def __call__(self, model_id, metric_id, metric_value: float): model = self.sysr.models.get_by_id(model_id) metric_class = self.sysr.qa_by_id[metric_id] model.qa_metrics.append(metric_class(metric_value)) class _QAMetricLocalHandler(Handler): category = '_ma_qa_metric_local' def __call__(self, model_id, label_asym_id, label_seq_id: int, metric_id, metric_value: float): model = self.sysr.models.get_by_id(model_id) asym = self.sysr.asym_units.get_by_id(label_asym_id) residue = asym.residue(label_seq_id) metric_class = self.sysr.qa_by_id[metric_id] model.qa_metrics.append(metric_class(residue, metric_value)) class _QAMetricPairwiseHandler(Handler): category = '_ma_qa_metric_local_pairwise' def __call__(self, model_id, label_asym_id_1, label_seq_id_1: int, label_asym_id_2, label_seq_id_2: int, metric_id, metric_value: float): model = self.sysr.models.get_by_id(model_id) asym1 = self.sysr.asym_units.get_by_id(label_asym_id_1) residue1 = asym1.residue(label_seq_id_1) asym2 = self.sysr.asym_units.get_by_id(label_asym_id_2) residue2 = asym2.residue(label_seq_id_2) metric_class = self.sysr.qa_by_id[metric_id] model.qa_metrics.append(metric_class(residue1, residue2, metric_value)) class _QAMetricFeatureHandler(Handler): category = '_ma_qa_metric_feature' def __call__(self, model_id, feature_id, metric_id, metric_value: float): model = self.sysr.models.get_by_id(model_id) feature = self.sysr.features.get_by_id(feature_id) metric_class = self.sysr.qa_by_id[metric_id] model.qa_metrics.append(metric_class(feature, metric_value)) class _QAMetricFeaturePairwiseHandler(Handler): category = '_ma_qa_metric_feature_pairwise' def __call__(self, model_id, feature_id_1, feature_id_2, metric_id, metric_value: float): model = self.sysr.models.get_by_id(model_id) feature1 = self.sysr.features.get_by_id(feature_id_1) feature2 = self.sysr.features.get_by_id(feature_id_2) metric_class = self.sysr.qa_by_id[metric_id] model.qa_metrics.append(metric_class(feature1, feature2, metric_value)) class ModelCIFVariant(Variant): """Used to select typical PDBx/ModelCIF file input. See :func:`read` and :class:`ihm.reader.Variant`.""" system_reader = _SystemReader _handlers = [ ihm.reader._StructHandler, ihm.reader._SoftwareHandler, ihm.reader._CitationHandler, ihm.reader._AuditAuthorHandler, ihm.reader._AuditRevisionHistoryHandler, ihm.reader._AuditRevisionDetailsHandler, ihm.reader._AuditRevisionGroupHandler, ihm.reader._AuditRevisionCategoryHandler, ihm.reader._AuditRevisionItemHandler, ihm.reader._DataUsageHandler, ihm.reader._GrantHandler, ihm.reader._CitationAuthorHandler, _ChemCompHandler, _ChemCompDescriptorHandler, ihm.reader._EntityHandler, ihm.reader._EntitySrcNatHandler, ihm.reader._EntitySrcGenHandler, ihm.reader._EntitySrcSynHandler, ihm.reader._EntityPolyHandler, ihm.reader._EntityPolySeqHandler, _EntityNonPolyHandler, ihm.reader._StructAsymHandler, _SoftwareGroupHandler, _DatabaseHandler, _SoftwareParameterHandler, _DataHandler, _DataGroupHandler, _DataRefDBHandler, _TargetEntityHandler, ihm.reader._StructRefHandler, ihm.reader._StructRefSeqHandler, ihm.reader._StructRefSeqDifHandler, _TargetRefDBHandler, _TransformationHandler, _TemplateDetailsHandler, _TemplateRefDBHandler, _TemplatePolySegmentHandler, _TemplateCustomizedHandler, _TemplateCoordHandler, _TemplatePolyHandler, _TemplateNonPolyHandler, _AlignmentHandler, _AlignmentInfoHandler, _AlignmentDetailsHandler, _TargetTemplatePolyMappingHandler, _AssemblyHandler, _AssemblyDetailsHandler, ihm.reader._AtomSiteHandler, ihm.reader._PolySeqSchemeHandler, ihm.reader._NonPolySchemeHandler, _ModelListHandler, _ModelGroupHandler, _ModelGroupLinkHandler, _ProtocolHandler, _AssociatedHandler, _AssociatedArchiveHandler, _FeatureListHandler, _AtomFeatureHandler, _PolyResidueFeatureHandler, _EntityInstanceFeatureHandler, _QAMetricHandler, _QAMetricGlobalHandler, _QAMetricLocalHandler, _QAMetricPairwiseHandler, _QAMetricFeatureHandler, _QAMetricFeaturePairwiseHandler] def get_handlers(self, sysr): return [h(sysr) for h in self._handlers] def get_audit_conform_handler(self, sysr): return _AuditConformHandler(sysr) def read(fh, model_class=modelcif.model.Model, format='mmCIF', handlers=[], warn_unknown_category=False, warn_unknown_keyword=False, reject_old_file=False, variant=ModelCIFVariant, add_to_system=None): """Read data from the file handle `fh`. See :func:`ihm.reader.read` for more information. The function here behaves similarly but reads in files compliant with the ModelCIF extension directory rather than IHM. Note that if a custom ``model_class`` is provided, any models present in the file will be returned as that type, regardless of their type stated in the mmCIF file (e.g. homology model, ab initio model). (However, the ``model_type`` attribute will be set appropriately.) If the input file references any associated files, they will be listed in :attr:`modelcif.System.repositories`. The files will not be automatically downloaded or read in, but it is straightforward to do this in Python; see the `associated files example `_. :return: A list of :class:`modelcif.System` objects. """ # noqa: E501 return ihm.reader.read( fh, model_class=model_class, format=format, handlers=handlers, warn_unknown_category=warn_unknown_category, warn_unknown_keyword=warn_unknown_keyword, reject_old_file=reject_old_file, variant=variant, add_to_system=add_to_system) python-modelcif-1.5/modelcif/reference.py000066400000000000000000000162171506655355100205740ustar00rootroot00000000000000"""Classes for linking back to a sequence or structure database.""" import warnings import ihm.reference from ihm.reference import Alignment, SeqDif # noqa: F401 class TargetReference(ihm.reference.Sequence): """Point to the sequence of a target :class:`modelcif.Entity` in a sequence database. Typically a subclass such as :class:`UniProt` is used, although to use a custom database, make a new subclass and provide a docstring to describe the database, e.g.:: class CustomRef(TargetReference): "my custom database" Compare with :class:`modelcif.ReferenceDatabase`, which describes multiple sequences used in template searches or alignment construction; this class relates to just the modeled sequence itself. See also :attr:`alignments` to describe the correspondence between the database and entity sequences. :param str code: The name of the sequence in the database. :param str accession: The database accession. :param int align_begin: Beginning index of the sequence in the database. Deprecated; use :attr:`alignments` instead. :param int align_end: Ending index of the sequence in the database. Deprecated; use :attr:`alignments` instead. :param str isoform: Sequence isoform, if applicable. :param str ncbi_taxonomy_id: Taxonomy identifier provided by NCBI. :param str organism_scientific: Scientific name of the organism. :param sequence_version_date: Versioning date, e.g. for UniProtKB sequences this is usually the date of last modification from the DT line of an entry. :type sequence_version_date: :class:`datetime.date` or :class:`datetime.datetime` :param str sequence_crc64: The CRC64 sum of the original database sequence. :param str sequence: The complete database sequence, as a string of one-letter codes. If omitted, will default to the canonical sequence of the associated :class:`~modelcif.Entity`. :param str details: Longer text describing the sequence. :param bool is_primary: True iff this is the main input used in the modeling. """ name = 'Other' def __init__(self, code, accession, align_begin=None, align_end=None, isoform=None, ncbi_taxonomy_id=None, organism_scientific=None, sequence_version_date=None, sequence_crc64=None, sequence=None, details=None, is_primary=None): super(TargetReference, self).__init__( db_name=self.name, db_code=code, accession=accession, sequence=sequence, details=details) self.align_begin, self.align_end = align_begin, align_end self.isoform = isoform self.ncbi_taxonomy_id = ncbi_taxonomy_id self.organism_scientific = organism_scientific self.sequence_version_date = sequence_version_date self.sequence_crc64 = sequence_crc64 if align_begin or align_end: warnings.warn( "align_begin and align_end are deprecated, and will be " "removed in a future python-modelcif release. Specify the " "database sequence and provide one or more " "modelcif.reference.Alignment objects instead.", stacklevel=2) if sequence is None: warnings.warn( "No sequence provided. The canonical sequence of the Entity " "will be used instead.", stacklevel=2) #: All alignments between the reference and entity sequences, as #: :class:`Alignment` objects. If none are provided, a simple 1:1 #: alignment is assumed. self.alignments = [] self.is_primary = is_primary code = property(lambda self: self.db_code) def _get_other_details(self): if (type(self) is not TargetReference and self.name == TargetReference.name): return self.__doc__.split('\n')[0] other_details = property( _get_other_details, doc="More information about a custom reference type. " "By default it is the first line of the docstring.") class UniProt(TargetReference): """Point to the sequence of an :class:`modelcif.Entity` in UniProt. These objects are typically passed to the :class:`modelcif.Entity` constructor for target sequences (for templates, see :class:`TemplateReference`). See :class:`TargetReference` for a description of the parameters. """ name = 'UNP' other_details = None class TemplateReference: """Point to the structure of a :class:`modelcif.Template` in a structure database. These objects are typically passed to the :class:`modelcif.Template` constructor for template sequences (for target sequences, see :class:`TargetReference`). Typically a subclass such as :class:`PDB` is used, although to use a custom database, make a new subclass and provide a docstring to describe the database, e.g.:: class CustomRef(TemplateReference): "my custom database" :param str accession: The database accession. :param db_version_date: Versioning date, e.g. for PDB entries this is usually the value of ``_pdbx_audit_revision_history.revision_date``. :type db_version_date: :class:`datetime.date` or :class:`datetime.datetime` """ name = 'Other' def __init__(self, accession, db_version_date=None): self.accession = accession self.db_version_date = db_version_date def _get_other_details(self): if (type(self) is not TemplateReference and self.name == TemplateReference.name): return self.__doc__.split('\n')[0] other_details = property( _get_other_details, doc="More information about a custom reference type. " "By default it is the first line of the docstring.") class PDB(TemplateReference): """Point to the structure of a :class:`modelcif.Template` in PDB. These objects are typically passed to the :class:`modelcif.Template` constructor. See :class:`TemplateReference` for a description of the parameters. """ name = 'PDB' other_details = None class AlphaFoldDB(TemplateReference): """Point to the structure of a :class:`modelcif.Template` in AlphaFold DB. These objects are typically passed to the :class:`modelcif.Template` constructor. See :class:`TemplateReference` for a description of the parameters. """ name = 'AlphaFoldDB' other_details = None class PubChem(TemplateReference): """Point to the structure of a :class:`modelcif.Template` in PubChem. These objects are typically passed to the :class:`modelcif.Template` constructor. See :class:`TemplateReference` for a description of the parameters. Use the PubChem CID as the accession code. """ name = 'PubChem' other_details = None python-modelcif-1.5/modelcif/test.py000066400000000000000000000014661506655355100176150ustar00rootroot00000000000000import modelcif import modelcif.dumper import modelcif.reader import os import unittest class Tests(unittest.TestCase): def test_basic(self): """Basic install test""" system = modelcif.System(title='test system') entity_a = modelcif.Entity('AAA', description='Subunit A') entity_b = modelcif.Entity('AAAAAA', description='Subunit B') system.entities.extend((entity_a, entity_b)) # Test output in mmCIF format with open('output.cif', 'w') as fh: modelcif.dumper.write(fh, [system]) # Make sure we can read back the file with open('output.cif') as fh: sys2, = modelcif.reader.read(fh) self.assertEqual(sys2.title, 'test system') os.unlink('output.cif') if __name__ == '__main__': unittest.main() python-modelcif-1.5/modelcif/util/000077500000000000000000000000001506655355100172325ustar00rootroot00000000000000python-modelcif-1.5/modelcif/util/__init__.py000066400000000000000000000000051506655355100213360ustar00rootroot00000000000000pass python-modelcif-1.5/modelcif/util/make_mmcif.py000066400000000000000000000072731506655355100217050ustar00rootroot00000000000000#!/usr/bin/env python3 """ Add minimal ModelCIF-related tables to an mmCIF file. Given any mmCIF file as input, this script will add any missing ModelCIF-related tables and write out a new file that is minimally compliant with the ModelCIF dictionary. This is done by simply reading in the original file with python-modelcif and then writing it out again, so a) any data in the input file that is not understood by python-modelcif will be lost on output; and b) input files that aren't compliant with the PDBx dictionary, or that contain syntax errors or other problems, may crash or otherwise confuse python-modelcif. While a best effort is made, it is not guaranteed that the output file is valid. It is recommended that it is run through a validator such as examples/validate_mmcif.py and any errors corrected or reported as issues. """ import modelcif.reader import modelcif.dumper import modelcif.model import ihm.util import os import argparse def add_modelcif_info(s): if not s.title: s.title = 'Auto-generated system' if not s.protocols: default_protocol = modelcif.protocol.Protocol() step = modelcif.protocol.ModelingStep( name='modeling', input_data=None, output_data=None) default_protocol.steps.append(step) s.protocols.append(default_protocol) for model_group in s.model_groups: for model in model_group: # Entity description is also used by python-modelcif for # ma_data.name, which is mandatory, so it cannot be unknown/? for asym in model.assembly: if asym.entity.description is ihm.unknown: asym.entity.description = "target" model.not_modeled_residue_ranges.extend( _get_not_modeled_residues(model)) return s def _get_not_modeled_residues(model): """Yield NotModeledResidueRange objects for all residue ranges in the Model that are not referenced by Atom objects""" for assem in model.assembly: asym = assem.asym if hasattr(assem, 'asym') else assem if not asym.entity.is_polymeric(): continue # Make a set of all residue indices of this asym "handled" # by being modeled with Atom objects handled_residues = set() for atom in model._atoms: if atom.asym_unit is asym: handled_residues.add(atom.seq_id) # Convert set to a list of residue ranges handled_residues = ihm.util._make_range_from_list( sorted(handled_residues)) # Return not-modeled for each non-handled range for r in ihm.util._invert_ranges(handled_residues, end=assem.seq_id_range[1], start=assem.seq_id_range[0]): yield modelcif.model.NotModeledResidueRange(asym, r[0], r[1]) def get_args(): p = argparse.ArgumentParser( description="Add minimal ModelCIF-related tables to an mmCIF file.") p.add_argument("input", metavar="input.cif", help="input mmCIF file name") p.add_argument("output", metavar="output.cif", help="output mmCIF file name", default="output.cif", nargs="?") return p.parse_args() def main(): args = get_args() if (os.path.exists(args.input) and os.path.exists(args.output) and os.path.samefile(args.input, args.output)): raise ValueError("Input and output are the same file") with open(args.input) as fh: with open(args.output, 'w') as fhout: modelcif.dumper.write( fhout, [add_modelcif_info(s) for s in modelcif.reader.read(fh)]) if __name__ == '__main__': main() python-modelcif-1.5/pyproject.toml000066400000000000000000000017601506655355100174130ustar00rootroot00000000000000[build-system] requires = ["setuptools"] build-backend = "setuptools.build_meta" [project] name = "modelcif" version = "1.5" requires-python = ">=3.6" authors = [ {name="Ben Webb", email="ben@salilab.org"} ] readme = "README.md" description = 'Package for handling ModelCIF mmCIF and BinaryCIF files' license = "MIT" license-files = ["LICEN[CS]E*"] classifiers = [ "Programming Language :: Python :: 3", "Operating System :: OS Independent", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering", ] dependencies = ['ihm>=2.6'] [project.urls] homepage = 'https://github.com/ihmwg/python-modelcif' repository = 'https://github.com/ihmwg/python-modelcif' documentation = 'https://python-modelcif.readthedocs.io/' issues = 'https://github.com/ihmwg/python-modelcif/issues/' changelog = 'https://github.com/ihmwg/python-modelcif/blob/main/ChangeLog.rst' [tool.ruff.lint] select = ["E", "F"] ignore = ["E402"] [tool.setuptools] packages = ['modelcif', 'modelcif.util'] python-modelcif-1.5/requirements.txt000066400000000000000000000000131506655355100177510ustar00rootroot00000000000000ihm >= 2.6 python-modelcif-1.5/setup.py000077500000000000000000000017021506655355100162100ustar00rootroot00000000000000#!/usr/bin/env python try: from setuptools import setup except ImportError: from distutils.core import setup import sys VERSION = "1.5" copy_args = sys.argv[1:] with open("README.md", "r") as fh: long_description = fh.read() setup(name='modelcif', version=VERSION, script_args=copy_args, description='Package for handling ModelCIF mmCIF and BinaryCIF files', long_description=long_description, long_description_content_type="text/markdown", author='Ben Webb', author_email='ben@salilab.org', url='https://github.com/ihmwg/python-modelcif', packages=['modelcif', 'modelcif.util'], install_requires=['ihm>=2.6'], classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering", ]) python-modelcif-1.5/test/000077500000000000000000000000001506655355100154525ustar00rootroot00000000000000python-modelcif-1.5/test/input/000077500000000000000000000000001506655355100166115ustar00rootroot00000000000000python-modelcif-1.5/test/input/mini.cif000066400000000000000000000127301506655355100202330ustar00rootroot00000000000000data_model # _exptl.method 'model, MODELLER Version 9.24 2020/08/21 11:54:31' # _modeller.version 9.24 # loop_ _struct_asym.id _struct_asym.entity_id _struct_asym.details A 1 ? B 2 ? # loop_ _entity_poly_seq.entity_id _entity_poly_seq.num _entity_poly_seq.mon_id 1 1 VAL 1 2 GLY 1 3 GLN 1 4 GLN 1 5 TYR 1 6 SER 1 7 SER 2 1 ASP 2 2 GLU # loop_ _atom_site.group_PDB _atom_site.type_symbol _atom_site.label_atom_id _atom_site.label_alt_id _atom_site.label_comp_id _atom_site.label_asym_id _atom_site.auth_asym_id _atom_site.label_seq_id _atom_site.auth_seq_id _atom_site.pdbx_PDB_ins_code _atom_site.Cartn_x _atom_site.Cartn_y _atom_site.Cartn_z _atom_site.occupancy _atom_site.B_iso_or_equiv _atom_site.label_entity_id _atom_site.id _atom_site.pdbx_PDB_model_num ATOM N N . VAL A A 1 2 ? 115.846 27.965 -26.370 1.000 141.830 1 1 1 ATOM C CA . VAL A A 1 2 ? 114.370 27.980 -26.088 1.000 143.490 1 2 1 ATOM C C . VAL A A 1 2 ? 113.517 27.504 -27.287 1.000 143.910 1 3 1 ATOM O O . VAL A A 1 2 ? 113.885 27.746 -28.441 1.000 146.600 1 4 1 ATOM C CB . VAL A A 1 2 ? 113.901 29.406 -25.683 1.000 143.750 1 5 1 ATOM C CG1 . VAL A A 1 2 ? 115.030 30.438 -25.931 1.000 144.590 1 6 1 ATOM C CG2 . VAL A A 1 2 ? 112.669 29.783 -26.486 1.000 144.500 1 7 1 ATOM N N . GLY A A 2 3 ? 112.371 26.869 -27.012 1.000 142.200 1 8 1 ATOM C CA . GLY A A 2 3 ? 111.506 26.368 -28.075 1.000 137.530 1 9 1 ATOM C C . GLY A A 2 3 ? 111.719 24.869 -28.275 1.000 135.820 1 10 1 ATOM O O . GLY A A 2 3 ? 110.768 24.093 -28.268 1.000 134.380 1 11 1 ATOM N N . GLN A A 3 4 ? 112.989 24.479 -28.392 1.000 134.310 1 12 1 ATOM C CA . GLN A A 3 4 ? 113.468 23.113 -28.639 1.000 128.420 1 13 1 ATOM C C . GLN A A 3 4 ? 113.556 22.956 -30.163 1.000 121.240 1 14 1 ATOM O O . GLN A A 3 4 ? 113.552 23.977 -30.840 1.000 127.090 1 15 1 ATOM C CB . GLN A A 3 4 ? 112.614 22.038 -27.919 1.000 132.340 1 16 1 ATOM C CG . GLN A A 3 4 ? 113.028 21.943 -26.407 1.000 135.370 1 17 1 ATOM C CD . GLN A A 3 4 ? 112.604 20.667 -25.677 1.000 138.260 1 18 1 ATOM O OE1 . GLN A A 3 4 ? 112.836 19.543 -26.150 1.000 141.450 1 19 1 ATOM N NE2 . GLN A A 3 4 ? 112.006 20.839 -24.497 1.000 139.310 1 20 1 ATOM N N . GLN A A 4 5 ? 113.648 21.739 -30.710 1.000 124.970 1 21 1 ATOM C CA . GLN A A 4 5 ? 113.808 21.534 -32.168 1.000 117.620 1 22 1 ATOM C C . GLN A A 4 5 ? 114.778 22.519 -32.833 1.000 112.980 1 23 1 ATOM O O . GLN A A 4 5 ? 114.677 23.727 -32.677 1.000 116.850 1 24 1 ATOM C CB . GLN A A 4 5 ? 112.456 21.545 -32.905 1.000 121.870 1 25 1 ATOM C CG . GLN A A 4 5 ? 111.763 20.153 -32.917 1.000 123.750 1 26 1 ATOM C CD . GLN A A 4 5 ? 110.863 19.874 -34.145 1.000 123.650 1 27 1 ATOM O OE1 . GLN A A 4 5 ? 110.040 20.712 -34.537 1.000 122.500 1 28 1 ATOM N NE2 . GLN A A 4 5 ? 111.008 18.674 -34.737 1.000 122.090 1 29 1 ATOM N N . TYR A A 5 6 ? 115.713 21.980 -33.598 1.000 109.460 1 30 1 ATOM C CA . TYR A A 5 6 ? 116.743 22.770 -34.259 1.000 103.700 1 31 1 ATOM C C . TYR A A 5 6 ? 116.348 23.366 -35.602 1.000 100.320 1 32 1 ATOM O O . TYR A A 5 6 ? 115.530 22.799 -36.311 1.000 98.760 1 33 1 ATOM C CB . TYR A A 5 6 ? 117.973 21.876 -34.402 1.000 104.580 1 34 1 ATOM C CG . TYR A A 5 6 ? 119.003 22.282 -35.425 1.000 105.030 1 35 1 ATOM C CD1 . TYR A A 5 6 ? 119.591 23.546 -35.395 1.000 106.020 1 36 1 ATOM C CD2 . TYR A A 5 6 ? 119.450 21.366 -36.380 1.000 105.180 1 37 1 ATOM C CE1 . TYR A A 5 6 ? 120.606 23.890 -36.289 1.000 106.990 1 38 1 ATOM C CE2 . TYR A A 5 6 ? 120.461 21.694 -37.276 1.000 106.420 1 39 1 ATOM C CZ . TYR A A 5 6 ? 121.039 22.958 -37.226 1.000 107.110 1 40 1 ATOM O OH . TYR A A 5 6 ? 122.057 23.290 -38.095 1.000 107.500 1 41 1 ATOM N N . SER A A 6 7 ? 116.921 24.519 -35.944 1.000 96.290 1 42 1 ATOM C CA . SER A A 6 7 ? 116.626 25.161 -37.229 1.000 93.490 1 43 1 ATOM C C . SER A A 6 7 ? 117.900 25.595 -37.944 1.000 91.900 1 44 1 ATOM O O . SER A A 6 7 ? 118.767 26.246 -37.352 1.000 91.810 1 45 1 ATOM C CB . SER A A 6 7 ? 115.732 26.388 -37.048 1.000 93.090 1 46 1 ATOM O OG . SER A A 6 7 ? 116.503 27.521 -36.705 1.000 92.330 1 47 1 ATOM N N . SER A A 7 8 ? 117.999 25.245 -39.224 1.000 89.750 1 48 1 ATOM C CA . SER A A 7 8 ? 119.165 25.590 -40.036 1.000 87.320 1 49 1 ATOM C C . SER A A 7 8 ? 119.224 27.089 -40.277 1.000 84.820 1 50 1 ATOM O O . SER A A 7 8 ? 120.074 27.594 -41.008 1.000 84.020 1 51 1 ATOM C CB . SER A A 7 8 ? 119.112 24.859 -41.383 1.000 88.180 1 52 1 ATOM O OG . SER A A 7 8 ? 117.956 25.221 -42.117 1.000 88.850 1 53 1 ATOM N N . ASP B B 1 3 ? 71.339 57.678 52.031 1.000 152.010 2 54 1 ATOM C CA . ASP B B 1 3 ? 70.427 58.819 51.717 1.000 152.390 2 55 1 ATOM C C . ASP B B 1 3 ? 70.144 58.821 50.222 1.000 151.960 2 56 1 ATOM O O . ASP B B 1 3 ? 70.984 59.245 49.435 1.000 151.590 2 57 1 ATOM C CB . ASP B B 1 3 ? 71.083 60.142 52.119 1.000 153.250 2 58 1 ATOM C CG . ASP B B 1 3 ? 71.660 60.105 53.526 1.000 154.120 2 59 1 ATOM O OD1 . ASP B B 1 3 ? 72.652 59.371 53.741 1.000 154.200 2 60 1 ATOM O OD2 . ASP B B 1 3 ? 71.119 60.804 54.415 1.000 154.250 2 61 1 ATOM N N . GLU B B 2 4 ? 68.956 58.362 49.837 1.000 151.910 2 62 1 ATOM C CA . GLU B B 2 4 ? 68.584 58.274 48.425 1.000 152.090 2 63 1 ATOM C C . GLU B B 2 4 ? 68.584 59.573 47.616 1.000 151.320 2 64 1 ATOM O O . GLU B B 2 4 ? 67.786 59.730 46.686 1.000 150.840 2 65 1 ATOM C CB . GLU B B 2 4 ? 67.218 57.585 48.274 1.000 153.600 2 66 1 ATOM C CG . GLU B B 2 4 ? 66.035 58.328 48.890 1.000 155.740 2 67 1 ATOM C CD . GLU B B 2 4 ? 64.690 57.699 48.526 1.000 156.760 2 68 1 ATOM O OE1 . GLU B B 2 4 ? 64.487 56.498 48.819 1.000 156.940 2 69 1 ATOM O OE2 . GLU B B 2 4 ? 63.835 58.409 47.947 1.000 157.060 2 70 1 python-modelcif-1.5/test/input/no_title.cif000066400000000000000000000001201506655355100211020ustar00rootroot00000000000000data_PDBDEV_00000025 _entry.id PDBDEV_00000025 _struct.entry_id PDBDEV_00000025 python-modelcif-1.5/test/input/not_modeled.cif000066400000000000000000000072221506655355100215700ustar00rootroot00000000000000data_model # _exptl.method 'model, MODELLER Version 9.24 2020/08/21 11:54:31' # _modeller.version 9.24 # loop_ _struct_asym.id _struct_asym.entity_id _struct_asym.details A 1 ? B 2 ? # loop_ _entity_poly_seq.entity_id _entity_poly_seq.num _entity_poly_seq.mon_id 1 1 VAL 1 2 GLY 1 3 GLN 1 4 GLN 1 5 TYR 1 6 SER 1 7 SER 2 1 ASP 2 2 GLU # loop_ _atom_site.group_PDB _atom_site.type_symbol _atom_site.label_atom_id _atom_site.label_alt_id _atom_site.label_comp_id _atom_site.label_asym_id _atom_site.auth_asym_id _atom_site.label_seq_id _atom_site.auth_seq_id _atom_site.pdbx_PDB_ins_code _atom_site.Cartn_x _atom_site.Cartn_y _atom_site.Cartn_z _atom_site.occupancy _atom_site.B_iso_or_equiv _atom_site.label_entity_id _atom_site.id _atom_site.pdbx_PDB_model_num ATOM N N . VAL A A 1 2 ? 115.846 27.965 -26.370 1.000 141.830 1 1 1 ATOM C CA . VAL A A 1 2 ? 114.370 27.980 -26.088 1.000 143.490 1 2 1 ATOM C C . VAL A A 1 2 ? 113.517 27.504 -27.287 1.000 143.910 1 3 1 ATOM O O . VAL A A 1 2 ? 113.885 27.746 -28.441 1.000 146.600 1 4 1 ATOM C CB . VAL A A 1 2 ? 113.901 29.406 -25.683 1.000 143.750 1 5 1 ATOM C CG1 . VAL A A 1 2 ? 115.030 30.438 -25.931 1.000 144.590 1 6 1 ATOM C CG2 . VAL A A 1 2 ? 112.669 29.783 -26.486 1.000 144.500 1 7 1 ATOM N N . GLY A A 2 3 ? 112.371 26.869 -27.012 1.000 142.200 1 8 1 ATOM C CA . GLY A A 2 3 ? 111.506 26.368 -28.075 1.000 137.530 1 9 1 ATOM C C . GLY A A 2 3 ? 111.719 24.869 -28.275 1.000 135.820 1 10 1 ATOM O O . GLY A A 2 3 ? 110.768 24.093 -28.268 1.000 134.380 1 11 1 ATOM N N . GLN A A 3 4 ? 112.989 24.479 -28.392 1.000 134.310 1 12 1 ATOM C CA . GLN A A 3 4 ? 113.468 23.113 -28.639 1.000 128.420 1 13 1 ATOM C C . GLN A A 3 4 ? 113.556 22.956 -30.163 1.000 121.240 1 14 1 ATOM O O . GLN A A 3 4 ? 113.552 23.977 -30.840 1.000 127.090 1 15 1 ATOM C CB . GLN A A 3 4 ? 112.614 22.038 -27.919 1.000 132.340 1 16 1 ATOM C CG . GLN A A 3 4 ? 113.028 21.943 -26.407 1.000 135.370 1 17 1 ATOM C CD . GLN A A 3 4 ? 112.604 20.667 -25.677 1.000 138.260 1 18 1 ATOM O OE1 . GLN A A 3 4 ? 112.836 19.543 -26.150 1.000 141.450 1 19 1 ATOM N NE2 . GLN A A 3 4 ? 112.006 20.839 -24.497 1.000 139.310 1 20 1 ATOM N N . GLN A A 4 5 ? 113.648 21.739 -30.710 1.000 124.970 1 21 1 ATOM C CA . GLN A A 4 5 ? 113.808 21.534 -32.168 1.000 117.620 1 22 1 ATOM C C . GLN A A 4 5 ? 114.778 22.519 -32.833 1.000 112.980 1 23 1 ATOM O O . GLN A A 4 5 ? 114.677 23.727 -32.677 1.000 116.850 1 24 1 ATOM C CB . GLN A A 4 5 ? 112.456 21.545 -32.905 1.000 121.870 1 25 1 ATOM C CG . GLN A A 4 5 ? 111.763 20.153 -32.917 1.000 123.750 1 26 1 ATOM C CD . GLN A A 4 5 ? 110.863 19.874 -34.145 1.000 123.650 1 27 1 ATOM O OE1 . GLN A A 4 5 ? 110.040 20.712 -34.537 1.000 122.500 1 28 1 ATOM N NE2 . GLN A A 4 5 ? 111.008 18.674 -34.737 1.000 122.090 1 29 1 ATOM N N . SER A A 7 8 ? 117.999 25.245 -39.224 1.000 89.750 1 48 1 ATOM C CA . SER A A 7 8 ? 119.165 25.590 -40.036 1.000 87.320 1 49 1 ATOM C C . SER A A 7 8 ? 119.224 27.089 -40.277 1.000 84.820 1 50 1 ATOM O O . SER A A 7 8 ? 120.074 27.594 -41.008 1.000 84.020 1 51 1 ATOM C CB . SER A A 7 8 ? 119.112 24.859 -41.383 1.000 88.180 1 52 1 ATOM O OG . SER A A 7 8 ? 117.956 25.221 -42.117 1.000 88.850 1 53 1 ATOM N N . ASP B B 1 3 ? 71.339 57.678 52.031 1.000 152.010 2 54 1 ATOM C CA . ASP B B 1 3 ? 70.427 58.819 51.717 1.000 152.390 2 55 1 ATOM C C . ASP B B 1 3 ? 70.144 58.821 50.222 1.000 151.960 2 56 1 ATOM O O . ASP B B 1 3 ? 70.984 59.245 49.435 1.000 151.590 2 57 1 ATOM C CB . ASP B B 1 3 ? 71.083 60.142 52.119 1.000 153.250 2 58 1 ATOM C CG . ASP B B 1 3 ? 71.660 60.105 53.526 1.000 154.120 2 59 1 ATOM O OD1 . ASP B B 1 3 ? 72.652 59.371 53.741 1.000 154.200 2 60 1 ATOM O OD2 . ASP B B 1 3 ? 71.119 60.804 54.415 1.000 154.250 2 61 1 python-modelcif-1.5/test/input/struct_only.cif000066400000000000000000000002701506655355100216600ustar00rootroot00000000000000data_PDBDEV_00000025 _entry.id PDBDEV_00000025 _struct.entry_id PDBDEV_00000025 _struct.title 'Architecture of Pol II(G) and molecular mechanism of transcription regulation by Gdown1' python-modelcif-1.5/test/test_alignment.py000066400000000000000000000023301506655355100210370ustar00rootroot00000000000000import utils import os import unittest TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import modelcif.alignment class Tests(unittest.TestCase): def test_identity(self): """Test sequence identity classes""" ident = modelcif.alignment.ShorterSequenceIdentity(42.0) self.assertEqual(ident.denominator, "Length of the shorter sequence") self.assertIsNone(ident.other_details) self.assertAlmostEqual(ident.value, 42.0, delta=1e-4) ident = modelcif.alignment.AlignedPositionsIdentity(42.0) ident = modelcif.alignment.AlignedResiduePairsIdentity(42.0) ident = modelcif.alignment.MeanSequenceIdentity(42.0) # generic "other" identity ident = modelcif.alignment.Identity(42.0) self.assertEqual(ident.denominator, "Other") self.assertIsNone(ident.other_details) # custom "other" identity class CustomIdentity(modelcif.alignment.Identity): """foo bar""" ident = CustomIdentity(42.0) self.assertEqual(ident.denominator, "Other") self.assertEqual(ident.other_details, "foo") if __name__ == '__main__': unittest.main() python-modelcif-1.5/test/test_associated.py000066400000000000000000000007651506655355100212120ustar00rootroot00000000000000import utils import os import unittest TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import modelcif.associated class Tests(unittest.TestCase): def test_local_pairwise_qa_scores_file(self): """Test LocalPairwiseQAScoresFile class""" self.assertWarns(UserWarning, modelcif.associated.LocalPairwiseQAScoresFile, path='foo') if __name__ == '__main__': unittest.main() python-modelcif-1.5/test/test_descriptor.py000066400000000000000000000006741506655355100212500ustar00rootroot00000000000000import utils import os import unittest TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import modelcif.descriptor class Tests(unittest.TestCase): def test_descriptor(self): """Test Descriptor classes""" base = modelcif.descriptor.Descriptor("1abc") self.assertEqual(base.value, "1abc") _ = repr(base) if __name__ == '__main__': unittest.main() python-modelcif-1.5/test/test_dumper.py000066400000000000000000001524201506655355100203630ustar00rootroot00000000000000from datetime import date import warnings import utils import os import unittest from io import StringIO try: import msgpack except ImportError: msgpack = None TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import modelcif.dumper import modelcif.protocol import modelcif.model import modelcif.reference import modelcif.alignment import modelcif.associated import modelcif.descriptor import ihm.format import ihm.dumper def _get_dumper_output(dumper, system, check=True): dumper._check = check fh = StringIO() writer = ihm.format.CifWriter(fh) dumper.dump(system, writer) return fh.getvalue() class Tests(unittest.TestCase): def test_write(self): """Test write() function""" sys1 = modelcif.System(id='system1') sys2 = modelcif.System(id='system 2+3') fh = StringIO() modelcif.dumper.write(fh, [sys1, sys2]) lines = fh.getvalue().split('\n') self.assertEqual(lines[:2], ["data_system1", "_entry.id system1"]) if lines[9] == 'data_system23': self.assertEqual(lines[9:11], ["data_system23", "_entry.id 'system 2+3'"]) else: self.assertEqual(lines[11:13], ["data_system23", "_entry.id 'system 2+3'"]) def test_audit_conform_dumper(self): """Test AuditConformDumper""" system = modelcif.System() dumper = modelcif.dumper._AuditConformDumper() out = _get_dumper_output(dumper, system) lines = sorted(out.split('\n')) self.assertEqual(lines[1].split()[0], "_audit_conform.dict_location") self.assertEqual(lines[2].rstrip('\r\n'), "_audit_conform.dict_name mmcif_ma.dic") self.assertEqual(lines[3].split()[0], "_audit_conform.dict_version") def test_database_dumper(self): """Test DatabaseDumper""" system = modelcif.System() dumper = modelcif.dumper._DatabaseDumper() out = _get_dumper_output(dumper, system) self.assertEqual(out, '') system = modelcif.System( database=modelcif.Database(id='foo', code='bar')) dumper = modelcif.dumper._DatabaseDumper() out = _get_dumper_output(dumper, system) self.assertEqual(out, "_database_2.database_code bar\n" "_database_2.database_id foo\n") def test_software_group_dumper(self): """Test SoftwareGroupDumper""" class MockObject: pass p1 = modelcif.SoftwareParameter(name='foo', value=42) p2 = modelcif.SoftwareParameter(name='bar', value=True) p3 = modelcif.SoftwareParameter(name='baz', value='ok') intlist = modelcif.SoftwareParameter(name='intlist', value=[1, 2, 3]) floatlist = modelcif.SoftwareParameter( name='floatlist', value=(1., 2., 3.)) mixlist = modelcif.SoftwareParameter(name='mixlist', value=[1, 2., 3]) s1 = modelcif.Software( name='s1', classification='test code', description='Some test program', version=1, location='http://test.org') s1._id = 1 s2 = modelcif.Software( name='s2', classification='test code', description='Some test program', version=1, location='http://test.org') s2._id = 2 s3 = modelcif.Software( name='s3', classification='test code', description='Some test program', version=1, location='http://test.org') s3._id = 3 system = modelcif.System() aln1 = MockObject() aln1.pairs = [] aln1.software = modelcif.SoftwareGroup((s1, s2)) # SoftwareGroup.parameters should be ignored aln1.software.parameters.append('garbage') aln2 = MockObject() aln2.pairs = [] aln2.software = s3 aln3 = MockObject() aln3.pairs = [] s3param = modelcif.SoftwareWithParameters( software=s3, parameters=[p1, p2, p3, intlist, floatlist, mixlist]) aln3.software = modelcif.SoftwareGroup((s2, s3param)) # Duplicate parameters, should get the same ID as for aln3 aln4 = MockObject() aln4.pairs = [] aln4.software = modelcif.SoftwareGroup((s3param,)) system.alignments.extend((aln1, aln2, aln3, aln4)) system._before_write() # populate system.software_groups dumper = modelcif.dumper._SoftwareGroupDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) # Should have two groups (s1, s2) and (s2, s3) and another # singleton group (s3) self.assertEqual(out, """# loop_ _ma_software_parameter.parameter_id _ma_software_parameter.group_id _ma_software_parameter.data_type _ma_software_parameter.name _ma_software_parameter.value _ma_software_parameter.description 1 1 integer foo 42 . 2 1 boolean bar YES . 3 1 string baz ok . 4 1 integer-csv intlist 1,2,3 . 5 1 float-csv floatlist 1.0,2.0,3.0 . 6 1 float-csv mixlist 1,2.0,3 . # # loop_ _ma_software_group.ordinal_id _ma_software_group.group_id _ma_software_group.software_id _ma_software_group.parameter_group_id 1 1 1 . 2 1 2 . 3 2 3 . 4 3 2 . 5 3 3 1 6 4 3 1 # """) def test_bad_software_parameter(self): """Test invalid SoftwareParameter""" p1 = modelcif.SoftwareParameter(name='foo', value=['string', 'list']) s1 = modelcif.Software( name='s1', classification='test code', description='Some test program', version=1, location='http://test.org') system = modelcif.System() system.software.append(s1) s1param = modelcif.SoftwareWithParameters(s1, parameters=[p1]) sg1 = modelcif.SoftwareGroup([s1param]) system.software_groups.append(sg1) dumper = modelcif.dumper._SoftwareGroupDumper() dumper.finalize(system) # Only lists of ints or floats are supported, not strings self.assertRaises(TypeError, _get_dumper_output, dumper, system) def test_data_dumper(self): """Test DataDumper""" system = modelcif.System() entity = modelcif.Entity("DMA", description='test entity') system.entities.append(entity) # Template and target use same entity here (but different data IDs) template = modelcif.Template( entity, asym_id="A", model_num=1, name="test template", transformation=modelcif.Transformation.identity()) system.templates.append(template) system.data.append(modelcif.data.Data(name="test other", details="test details")) system._before_write() # populate system.data dumper = modelcif.dumper._DataDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_data.id _ma_data.name _ma_data.content_type _ma_data.content_type_other_details 1 'test other' other 'test details' 2 'test template' 'template structure' . 3 'test entity' target . # """) def test_data_group_dumper(self): """Test DataGroupDumper""" system = modelcif.System() tgt_e1 = modelcif.Entity("D") tgt_e2 = modelcif.Entity("M") tgt_e3 = modelcif.Entity("A") tgt_e1._data_id = 1 tgt_e2._data_id = 2 tgt_e3._data_id = 3 system.entities.extend((tgt_e1, tgt_e2, tgt_e3)) dg12 = modelcif.data.DataGroup((tgt_e1, tgt_e2)) p = modelcif.protocol.Protocol() p.steps.append(modelcif.protocol.ModelingStep( input_data=dg12, output_data=tgt_e3)) system.protocols.append(p) system._before_write() # populate system.data_groups dumper = modelcif.dumper._DataGroupDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) # First group (tgt_e1,tgt_e2); second group contains just tgt_e3 self.assertEqual(out, """# loop_ _ma_data_group.ordinal_id _ma_data_group.group_id _ma_data_group.data_id 1 1 1 2 1 2 3 2 3 # """) def test_data_ref_db_dumper(self): """Test DataRefDBDumper""" system = modelcif.System() system.data.append(modelcif.ReferenceDatabase( name='testdb', url='testurl', version='1.0', release_date=date(1979, 11, 22))) system.data.append(modelcif.data.Data(name="test other", details="test details")) dumper = modelcif.dumper._DataDumper() dumper.finalize(system) # Assign Data IDs dumper = modelcif.dumper._DataRefDBDumper() out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_data_ref_db.data_id _ma_data_ref_db.name _ma_data_ref_db.location_url _ma_data_ref_db.version _ma_data_ref_db.release_date 1 testdb testurl 1.0 1979-11-22 # """) def test_qa_metric_dumper(self): """Test QAMetricDumper""" system = modelcif.System() s1 = modelcif.Software( name='s1', classification='test code', description='Some test program', version=1, location='http://test.org') s1._group_id = 1 class MockObject: pass class CustomMetricType(modelcif.qa_metric.MetricType): """my custom type""" class DistanceScore(modelcif.qa_metric.Global, modelcif.qa_metric.Distance): """test description""" name = "test score" software = s1 class CustomScore(modelcif.qa_metric.Global, CustomMetricType): """Description does not match docstring""" description = "custom description" software = None class LocalScore(modelcif.qa_metric.Local, modelcif.qa_metric.ZScore): """custom local description Second line of docstring (ignored)""" name = "custom local score" software = None class PairScore(modelcif.qa_metric.LocalPairwise, modelcif.qa_metric.Energy): """custom pair description""" name = "custom pair score" software = None class FeatureScore(modelcif.qa_metric.Feature, modelcif.qa_metric.ZScore): """feature score""" name = "feature score" software = None class FeaturePairwiseScore(modelcif.qa_metric.FeaturePairwise, modelcif.qa_metric.ZScore): """feature pairwise score""" name = "feature pairwise score" software = None m1 = DistanceScore(42.) m2 = CustomScore(99.) m3 = DistanceScore(60.) e1 = modelcif.Entity('ACGT') asym = modelcif.AsymUnit(e1, 'foo') asym._id = 'Z' m4 = LocalScore(asym.residue(2), 20.) m5 = PairScore(asym.residue(1), asym.residue(3), 30.) resf = modelcif.PolyResidueFeature((asym.residue(1), asym.residue(2))) instf = modelcif.EntityInstanceFeature((asym,)) m6 = FeatureScore(resf, 40.) m7 = FeaturePairwiseScore(resf, instf, 50.) model = MockObject() model._id = 18 model.qa_metrics = [m1, m2, m3, m4, m5, m6, m7] mg = modelcif.model.ModelGroup((model,)) system.model_groups.append(mg) # Assign feature IDs dumper = modelcif.dumper._FeatureDumper() dumper.finalize(system) dumper = modelcif.dumper._QAMetricDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_qa_metric.id _ma_qa_metric.name _ma_qa_metric.description _ma_qa_metric.type _ma_qa_metric.mode _ma_qa_metric.type_other_details _ma_qa_metric.software_group_id 1 'test score' 'test description' distance global . 1 2 CustomScore 'custom description' other global 'my custom type' . 3 'custom local score' 'custom local description' zscore local . . 4 'custom pair score' 'custom pair description' energy local-pairwise . . 5 'feature score' 'feature score' zscore per-feature . . 6 'feature pairwise score' 'feature pairwise score' zscore per-feature-pair . . # # loop_ _ma_qa_metric_global.ordinal_id _ma_qa_metric_global.model_id _ma_qa_metric_global.metric_id _ma_qa_metric_global.metric_value 1 18 1 42.000 2 18 2 99.000 3 18 1 60.000 # # loop_ _ma_qa_metric_local.ordinal_id _ma_qa_metric_local.model_id _ma_qa_metric_local.label_asym_id _ma_qa_metric_local.label_seq_id _ma_qa_metric_local.label_comp_id _ma_qa_metric_local.metric_id _ma_qa_metric_local.metric_value 1 18 Z 2 CYS 3 20.000 # # loop_ _ma_qa_metric_local_pairwise.ordinal_id _ma_qa_metric_local_pairwise.model_id _ma_qa_metric_local_pairwise.label_asym_id_1 _ma_qa_metric_local_pairwise.label_seq_id_1 _ma_qa_metric_local_pairwise.label_comp_id_1 _ma_qa_metric_local_pairwise.label_asym_id_2 _ma_qa_metric_local_pairwise.label_seq_id_2 _ma_qa_metric_local_pairwise.label_comp_id_2 _ma_qa_metric_local_pairwise.metric_id _ma_qa_metric_local_pairwise.metric_value 1 18 Z 1 ALA Z 3 GLY 4 30.000 # # loop_ _ma_qa_metric_feature.ordinal_id _ma_qa_metric_feature.model_id _ma_qa_metric_feature.feature_id _ma_qa_metric_feature.metric_id _ma_qa_metric_feature.metric_value 1 18 1 5 40.000 # # loop_ _ma_qa_metric_feature_pairwise.ordinal_id _ma_qa_metric_feature_pairwise.model_id _ma_qa_metric_feature_pairwise.feature_id_1 _ma_qa_metric_feature_pairwise.feature_id_2 _ma_qa_metric_feature_pairwise.metric_id _ma_qa_metric_feature_pairwise.metric_value 1 18 1 2 6 50.000 # """) def test_feature_dumper(self): """Test FeatureDumper""" system = modelcif.System() class MockObject: pass class TestScore(modelcif.qa_metric.Feature, modelcif.qa_metric.ZScore): """test score""" name = "test score" software = None e1 = modelcif.Entity('ACGT') asym = modelcif.AsymUnit(e1, 'foo') e2 = ihm.Entity([ihm.NonPolymerChemComp('HEM')]) heme = modelcif.AsymUnit(e2, 'heme') asym._id = 'Y' heme._id = 'Z' atomf = modelcif.AtomFeature((1, 2, 3), details='atom f') resf = modelcif.PolyResidueFeature((asym.residue(1), asym.residue(2)), details='prf') instf = modelcif.EntityInstanceFeature((asym,)) inst2f = modelcif.EntityInstanceFeature((heme,)) atoms = TestScore(atomf, 20.) ress = TestScore(resf, 30.) insts = TestScore(instf, 40.) inst2s = TestScore(inst2f, 40.) model = MockObject() model._id = 18 model.qa_metrics = [atoms, ress, insts, inst2s] mg = modelcif.model.ModelGroup((model,)) system.model_groups.append(mg) dumper = modelcif.dumper._FeatureDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_feature_list.feature_id _ma_feature_list.feature_type _ma_feature_list.entity_type _ma_feature_list.details 1 atom other 'atom f' 2 residue polymer prf 3 'entity instance' polymer . 4 'entity instance' non-polymer . # # loop_ _ma_atom_feature.ordinal_id _ma_atom_feature.feature_id _ma_atom_feature.atom_id 1 1 1 2 1 2 3 1 3 # # loop_ _ma_poly_residue_feature.ordinal_id _ma_poly_residue_feature.feature_id _ma_poly_residue_feature.label_asym_id _ma_poly_residue_feature.label_seq_id _ma_poly_residue_feature.label_comp_id 1 2 Y 1 ALA 2 2 Y 2 CYS # # loop_ _ma_entity_instance_feature.ordinal_id _ma_entity_instance_feature.feature_id _ma_entity_instance_feature.label_asym_id 1 3 Y 2 4 Z # """) # Test empty feature emptyf = modelcif.EntityInstanceFeature(()) emptys = TestScore(emptyf, 20.) model.qa_metrics = [emptys] dumper = modelcif.dumper._FeatureDumper() dumper.finalize(system) self.assertRaises(ValueError, _get_dumper_output, dumper, system) # Should work with checks disabled _ = _get_dumper_output(dumper, system, check=False) # Test feature that selects multiple entity types multf = modelcif.EntityInstanceFeature((asym, heme)) mults = TestScore(multf, 20.) model.qa_metrics = [mults] dumper = modelcif.dumper._FeatureDumper() dumper.finalize(system) self.assertRaises(ValueError, _get_dumper_output, dumper, system) # Should work with checks disabled _ = _get_dumper_output(dumper, system, check=False) def test_protocol_dumper(self): """Test ProtocolDumper""" class MockObject: pass indat = MockObject() indat._data_group_id = 1 outdat = MockObject() outdat._data_group_id = 2 system = modelcif.System() s1 = modelcif.Software( name='s1', classification='test code', description='Some test program', version=1, location='http://test.org') s1._group_id = 42 p = modelcif.protocol.Protocol() p.steps.append(modelcif.protocol.TemplateSearchStep( name='tsstep', details="some details", software=s1, input_data=indat, output_data=outdat)) p.steps.append(modelcif.protocol.ModelingStep( name='modstep', input_data=indat, output_data=outdat)) p.steps.append(modelcif.protocol.ModelingStep( name='nullstep', input_data=None, output_data=None)) system.protocols.append(p) dumper = modelcif.dumper._ProtocolDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_protocol_step.ordinal_id _ma_protocol_step.protocol_id _ma_protocol_step.step_id _ma_protocol_step.method_type _ma_protocol_step.step_name _ma_protocol_step.details _ma_protocol_step.software_group_id _ma_protocol_step.input_data_group_id _ma_protocol_step.output_data_group_id 1 1 1 'template search' tsstep 'some details' 42 1 2 2 1 2 modeling modstep . . 1 2 3 1 3 modeling nullstep . . . . # """) def test_model_dumper(self): """Test ModelDumper""" class CustomModel(modelcif.model.Model): """custom model""" system = modelcif.System() e1 = modelcif.Entity('ACGT') e1._id = 9 system.entities.append(e1) asym = modelcif.AsymUnit(e1, 'foo') asym._id = 'A' system.asym_units.append(asym) asmb = modelcif.Assembly((asym,)) asmb._id = 2 model1 = modelcif.model.HomologyModel(assembly=asmb, name='test model') model1._data_id = 42 model1._atoms = [modelcif.model.Atom(asym_unit=asym, seq_id=1, atom_id='C', type_symbol='C', x=1.0, y=2.0, z=3.0)] model2 = modelcif.model.AbInitioModel(assembly=asmb, name='model2') model2._data_id = 43 model3 = CustomModel(assembly=asmb, name='model3') model3._data_id = 44 mg = modelcif.model.ModelGroup((model1, model2, model3), name='test group') system.model_groups.append(mg) # model1 is in both groups mg = modelcif.model.ModelGroup((model1,), name='second group') # ModelGroup constructor only supports details with python-ihm > 1.8 mg.details = 'second group details' system.model_groups.append(mg) dumper = modelcif.dumper._ModelDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_model_list.ordinal_id _ma_model_list.model_name _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 'test model' 42 'Homology model' . 2 model2 43 'Ab initio model' . 3 model3 44 Other 'custom model' # # loop_ _ma_model_group.id _ma_model_group.name _ma_model_group.details 1 'test group' . 2 'second group' 'second group details' # # loop_ _ma_model_group_link.group_id _ma_model_group_link.model_id 1 1 1 2 1 3 2 1 # # loop_ _atom_site.group_PDB _atom_site.id _atom_site.type_symbol _atom_site.label_atom_id _atom_site.label_alt_id _atom_site.label_comp_id _atom_site.label_seq_id _atom_site.auth_seq_id _atom_site.pdbx_PDB_ins_code _atom_site.label_asym_id _atom_site.Cartn_x _atom_site.Cartn_y _atom_site.Cartn_z _atom_site.occupancy _atom_site.label_entity_id _atom_site.auth_asym_id _atom_site.auth_comp_id _atom_site.B_iso_or_equiv _atom_site.pdbx_PDB_model_num ATOM 1 C C . ALA 1 1 ? A 1.000 2.000 3.000 . 9 A ALA . 1 # # loop_ _atom_type.symbol C # """) def test_poly_seq_scheme_dumper(self): """Test PolySeqSchemeDumper with ModelCIF models""" system = modelcif.System() e1 = modelcif.Entity('ACGT') e1._id = 9 system.entities.append(e1) asym = modelcif.AsymUnit(e1, 'foo') asym._id = 'A' system.asym_units.append(asym) asmb = modelcif.Assembly((asym,)) asmb._id = 2 model1 = modelcif.model.HomologyModel(assembly=asmb, name='test model') model1._data_id = 42 model1._atoms = [modelcif.model.Atom(asym_unit=asym, seq_id=1, atom_id='C', type_symbol='C', x=1.0, y=2.0, z=3.0)] mg = modelcif.model.ModelGroup((model1,), name='test group') # Add at least one model, since the PolySeqSchemeDumper checks all # models' not_modeled_residue_ranges member when writing the table system.model_groups.append(mg) dumper = ihm.dumper._PolySeqSchemeDumper() out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _pdbx_poly_seq_scheme.asym_id _pdbx_poly_seq_scheme.entity_id _pdbx_poly_seq_scheme.seq_id _pdbx_poly_seq_scheme.mon_id _pdbx_poly_seq_scheme.pdb_seq_num _pdbx_poly_seq_scheme.auth_seq_num _pdbx_poly_seq_scheme.pdb_mon_id _pdbx_poly_seq_scheme.auth_mon_id _pdbx_poly_seq_scheme.pdb_strand_id _pdbx_poly_seq_scheme.pdb_ins_code A 9 1 ALA 1 1 ALA ALA A . A 9 2 CYS 2 2 CYS CYS A . A 9 3 GLY 3 3 GLY GLY A . A 9 4 THR 4 4 THR THR A . # """) def test_target_ref_db_dumper(self): """Test TargetRefDBDumper""" class CustomRef(modelcif.reference.TargetReference): """my custom ref""" system = modelcif.System() # Default alignment but with explicit align begin, end with warnings.catch_warnings(): warnings.simplefilter("ignore") ref1 = modelcif.reference.UniProt( code='testcode', accession='testacc', align_begin=4, align_end=8, isoform='testiso', ncbi_taxonomy_id='1234', organism_scientific='testorg', sequence_version_date=date(1979, 11, 22), sequence_crc64="A123B456C789D1E2", sequence='ACGT', is_primary=True) # Default alignment (entire sequence) ref2 = modelcif.reference.UniProt(code='c2', accession='a2', sequence='ACGT') ref3 = CustomRef(code='c3', accession='a3', isoform=ihm.unknown, sequence='ACGT') # Explicit alignment that extends to the end of the db sequence ref4 = modelcif.reference.UniProt(code='c4', accession='a4', sequence='CCACGT') ref4.alignments.append(modelcif.reference.Alignment(db_begin=3)) # Explicit alignment with explicit db_end ref5 = modelcif.reference.UniProt(code='c5', accession='a5', sequence='XXXACXXGTXXX', is_primary=False) ref5.alignments.append(modelcif.reference.Alignment( db_begin=4, db_end=5)) ref5.alignments.append(modelcif.reference.Alignment( db_begin=8, db_end=9)) e1 = modelcif.Entity('ACGT', references=[ref1, ref2, ref3, ref4, ref5]) e1._id = 1 system.entities.append(e1) dumper = modelcif.dumper._TargetRefDBDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_target_ref_db_details.target_entity_id _ma_target_ref_db_details.db_name _ma_target_ref_db_details.db_name_other_details _ma_target_ref_db_details.db_code _ma_target_ref_db_details.db_accession _ma_target_ref_db_details.seq_db_isoform _ma_target_ref_db_details.seq_db_align_begin _ma_target_ref_db_details.seq_db_align_end _ma_target_ref_db_details.ncbi_taxonomy_id _ma_target_ref_db_details.organism_scientific _ma_target_ref_db_details.seq_db_sequence_version_date _ma_target_ref_db_details.seq_db_sequence_checksum _ma_target_ref_db_details.is_primary 1 UNP . testcode testacc testiso 4 8 1234 testorg 1979-11-22 A123B456C789D1E2 YES 1 UNP . c2 a2 . 1 4 . . . . . 1 Other 'my custom ref' c3 a3 ? 1 4 . . . . . 1 UNP . c4 a4 . 3 6 . . . . . 1 UNP . c5 a5 . 4 9 . . . . NO # """) def test_alignment_dumper(self): """Test AlignmentDumper""" class CustomRef(modelcif.reference.TemplateReference): """my custom ref""" class Alignment(modelcif.alignment.Global, modelcif.alignment.Pairwise): pass class LocalAlignment(modelcif.alignment.Local, modelcif.alignment.Multiple): pass system = modelcif.System() tmp_e = modelcif.Entity('ACG') tgt_e = modelcif.Entity('ACE') tgt_e._id = 1 system.entities.extend((tmp_e, tgt_e)) asym = modelcif.AsymUnit(tgt_e, id='A') asym._id = 'A' system.asym_units.append(asym) ref1 = modelcif.reference.PDB('1abc', db_version_date=date(1979, 11, 22)) ref2 = CustomRef('2xyz') ref3 = modelcif.reference.PubChem("1234") ref4 = modelcif.reference.AlphaFoldDB("P12345", db_version_date=date(2022, 6, 1)) tr = modelcif.Transformation.identity() tr._id = 42 t = modelcif.Template(tmp_e, asym_id='H', model_num=1, name='testtmp', transformation=tr, references=[ref1, ref2, ref3, ref4], strand_id='Z') t._data_id = 99 p = modelcif.alignment.Pair( template=t.segment('AC-G', 1, 3), target=asym.segment('ACE-', 1, 3), score=modelcif.alignment.BLASTEValue("1e-15"), identity=modelcif.alignment.ShorterSequenceIdentity(42.)) aln = Alignment(name='testaln', pairs=[p]) aln._data_id = 100 system.alignments.append(aln) # The same alignment using HHblits e-value p1 = modelcif.alignment.Pair( template=p.template, target=p.target, score=modelcif.alignment.HHblitsEValue("1e-14"), identity=p.identity) # The same alignment with missing score and identity p2 = modelcif.alignment.Pair( template=p.template, target=p.target) aln = Alignment(name='testaln', pairs=[p1, p2]) aln._data_id = 101 system.alignments.append(aln) # Local alignment with no pairs aln2 = LocalAlignment(name='testaln2', pairs=[]) aln2._data_id = 102 system.alignments.append(aln2) system._before_write() # populate system.templates dumper = modelcif.dumper._AlignmentDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 1 'reference database' polymer 42 99 A H . 1 Z 2 1 'reference database' polymer 42 99 A H . 1 Z 3 1 'reference database' polymer 42 99 A H . 1 Z # # loop_ _ma_template_poly.template_id _ma_template_poly.seq_one_letter_code _ma_template_poly.seq_one_letter_code_can 1 ACG ACG # # loop_ _ma_template_poly_segment.id _ma_template_poly_segment.template_id _ma_template_poly_segment.residue_number_begin _ma_template_poly_segment.residue_number_end 1 1 1 3 # # loop_ _ma_template_ref_db_details.template_id _ma_template_ref_db_details.db_name _ma_template_ref_db_details.db_name_other_details _ma_template_ref_db_details.db_accession_code _ma_template_ref_db_details.db_version_date 1 PDB . 1abc 1979-11-22 1 Other 'my custom ref' 2xyz . 1 PubChem . 1234 . 1 AlphaFoldDB . P12345 2022-06-01 # # loop_ _ma_target_template_poly_mapping.id _ma_target_template_poly_mapping.template_segment_id _ma_target_template_poly_mapping.target_asym_id _ma_target_template_poly_mapping.target_seq_id_begin _ma_target_template_poly_mapping.target_seq_id_end 1 1 A 1 3 2 1 A 1 3 3 1 A 1 3 # # loop_ _ma_alignment_info.alignment_id _ma_alignment_info.data_id _ma_alignment_info.software_group_id _ma_alignment_info.alignment_length _ma_alignment_info.alignment_type _ma_alignment_info.alignment_mode 1 100 . 4 'target-template pairwise alignment' global 2 101 . 4 'target-template pairwise alignment' global 3 102 . . 'target-template MSA' local # # loop_ _ma_alignment_details.ordinal_id _ma_alignment_details.alignment_id _ma_alignment_details.template_segment_id _ma_alignment_details.target_asym_id _ma_alignment_details.score_type _ma_alignment_details.score_type_other_details _ma_alignment_details.score_value _ma_alignment_details.percent_sequence_identity _ma_alignment_details.sequence_identity_denominator _ma_alignment_details.sequence_identity_denominator_other_details 1 1 1 A 'BLAST e-value' . 1e-15 42.000 'Length of the shorter sequence' . 2 2 1 A 'HHblits e-value' . 1e-14 42.000 'Length of the shorter sequence' . 3 2 1 A . . . . . . # # loop_ _ma_alignment.ordinal_id _ma_alignment.alignment_id _ma_alignment.target_template_flag _ma_alignment.sequence 1 1 1 ACE- 2 1 2 AC-G 3 2 1 ACE- 4 2 2 AC-G 5 2 1 ACE- 6 2 2 AC-G # """) def test_non_poly_template_unused(self): """Test AlignmentDumper with unused nonpolymeric template""" system = modelcif.System() # Polymeric entity e1 = ihm.Entity('ACGT') t1 = modelcif.Template( e1, asym_id="A", model_num=1, name="test template", transformation=modelcif.Transformation.identity(), entity_id=9) t1._id = 1 t1._data_id = 99 # Non-polymeric entity e2 = ihm.Entity([ihm.NonPolymerChemComp('HEM')], description='heme') t2 = modelcif.Template( e2, asym_id="B", model_num=1, name="test template", transformation=modelcif.Transformation.identity(), entity_id=10) t2._id = 2 t2._data_id = 100 system.templates.extend((t1, t2)) dumper = modelcif.dumper._AlignmentDumper() out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 1 'reference database' polymer 42 99 . A 9 1 A 2 2 'reference database' non-polymer 42 100 . B 10 1 B # # loop_ _ma_template_poly.template_id _ma_template_poly.seq_one_letter_code _ma_template_poly.seq_one_letter_code_can 1 ACGT ACGT # # loop_ _ma_template_non_poly.template_id _ma_template_non_poly.comp_id _ma_template_non_poly.details 2 HEM heme # """) def test_non_poly_template_used(self): """Test AlignmentDumper with used nonpolymeric template""" system = modelcif.System() # Polymeric entity e1 = ihm.Entity('ACGT') t1 = modelcif.Template( e1, asym_id="A", model_num=1, name="test template", transformation=modelcif.Transformation.identity()) t1._id = 1 t1._data_id = 98 # Non-polymeric entity e2 = ihm.Entity([ihm.NonPolymerChemComp('HEM')], description='heme') # Template should use entity_id, not e2._id e2._id = "THIS SHOULD BE IGNORED" t2 = modelcif.Template( e2, asym_id="B", model_num=1, name="test template", transformation=modelcif.Transformation.identity(), entity_id=9) t2._id = 2 t2._data_id = 99 system.templates.extend((t1, t2)) a2 = modelcif.NonPolymerFromTemplate(template=t2, explicit=True) a2._id = 'X' system.asym_units.append(a2) dumper = modelcif.dumper._AlignmentDumper() out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 2 'reference database' non-polymer 42 99 X B 9 1 B 2 1 'reference database' polymer 42 98 . A . 1 A # # loop_ _ma_template_poly.template_id _ma_template_poly.seq_one_letter_code _ma_template_poly.seq_one_letter_code_can 1 ACGT ACGT # # loop_ _ma_template_non_poly.template_id _ma_template_non_poly.comp_id _ma_template_non_poly.details 2 HEM heme # """) def test_custom_template_unused(self): """Test AlignmentDumper with custom template""" system = modelcif.System() e1 = ihm.Entity('ACGT') t1 = modelcif.CustomTemplate( e1, asym_id="A", model_num=1, name="test template", transformation=modelcif.Transformation.identity(), entity_id=9, details='my custom template') t1.atoms.append(modelcif.TemplateAtom( seq_id=1, atom_id='CA', type_symbol='C', x=0.0, y=1.0, z=2.0, occupancy=0.5, biso=2.0, charge=1.0, auth_seq_id=42, auth_comp_id='XXX', auth_atom_id='X')) t1.atoms.append(modelcif.TemplateAtom( seq_id=2, atom_id='OXT', type_symbol='O', x=1.0, y=2.0, z=3.0)) t1._id = 1 t1._data_id = 99 system.templates.append(t1) dumper = modelcif.dumper._AlignmentDumper() out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 1 customized polymer 42 99 . A 9 1 A # # loop_ _ma_template_poly.template_id _ma_template_poly.seq_one_letter_code _ma_template_poly.seq_one_letter_code_can 1 ACGT ACGT # # loop_ _ma_template_customized.template_id _ma_template_customized.details 1 'my custom template' # # loop_ _ma_template_coord.template_id _ma_template_coord.group_PDB _ma_template_coord.ordinal_id _ma_template_coord.type_symbol _ma_template_coord.label_atom_id _ma_template_coord.label_comp_id _ma_template_coord.label_seq_id _ma_template_coord.label_asym_id _ma_template_coord.auth_seq_id _ma_template_coord.auth_asym_id _ma_template_coord.auth_atom_id _ma_template_coord.auth_comp_id _ma_template_coord.Cartn_x _ma_template_coord.Cartn_y _ma_template_coord.Cartn_z _ma_template_coord.occupancy _ma_template_coord.label_entity_id _ma_template_coord.B_iso_or_equiv _ma_template_coord.formal_charge 1 ATOM 1 C CA ALA 1 A 42 A X XXX 0 1.000 2.000 0.500 9 2.000 1.000 1 ATOM 2 O OXT CYS 2 A . A . . 1.000 2.000 3.000 . 9 . . # """) def test_template_transform_dumper(self): """Test TemplateTransformDumper""" system = modelcif.System() tr1 = modelcif.Transformation( rot_matrix=[[-0.64, 0.09, 0.77], [0.76, -0.12, 0.64], [0.15, 0.99, 0.01]], tr_vector=[1., 2., 3.]) system.template_transformations.append(tr1) dumper = modelcif.dumper._TemplateTransformDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_template_trans_matrix.id _ma_template_trans_matrix.rot_matrix[1][1] _ma_template_trans_matrix.rot_matrix[2][1] _ma_template_trans_matrix.rot_matrix[3][1] _ma_template_trans_matrix.rot_matrix[1][2] _ma_template_trans_matrix.rot_matrix[2][2] _ma_template_trans_matrix.rot_matrix[3][2] _ma_template_trans_matrix.rot_matrix[1][3] _ma_template_trans_matrix.rot_matrix[2][3] _ma_template_trans_matrix.rot_matrix[3][3] _ma_template_trans_matrix.tr_vector[1] _ma_template_trans_matrix.tr_vector[2] _ma_template_trans_matrix.tr_vector[3] 1 -0.640000 0.760000 0.150000 0.090000 -0.120000 0.990000 0.770000 0.640000 0.010000 1.000 2.000 3.000 # """) def test_target_entity_dumper(self): """Test TargetEntityDumper""" system = modelcif.System() e1 = modelcif.Entity("D") e1._id = 42 e1._data_id = 99 system.entities.append(e1) a1 = modelcif.AsymUnit(e1, 'foo') a1._id = 'X' system.asym_units.append(a1) dumper = modelcif.dumper._TargetEntityDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_target_entity.entity_id _ma_target_entity.data_id _ma_target_entity.origin 42 99 designed # # loop_ _ma_target_entity_instance.asym_id _ma_target_entity_instance.entity_id _ma_target_entity_instance.details X 42 foo # """) def test_associated_dumper(self): """Test AssociatedDumper""" system = modelcif.System() e = modelcif.Entity('M') # File in a repository f1 = modelcif.associated.File(path='foo.txt', details='test file') # File in an archive f2 = modelcif.associated.File(path='bar.txt', details='test file2') zf = modelcif.associated.ZipFile(path='t.zip', files=[f2]) # Local file with data f3 = modelcif.associated.File(path='baz.txt', details='test file3', data=e) f4 = modelcif.associated.QAMetricsFile(path='baz.txt', details='test file4') r = modelcif.associated.Repository(url_root='https://example.com', files=[f1, zf]) r2 = modelcif.associated.Repository(url_root=None, files=[f3, f4]) system.repositories.extend((r, r2)) system._before_write() # populate data dumper = modelcif.dumper._DataDumper() dumper.finalize(system) # Assign Data IDs dumper = modelcif.dumper._AssociatedDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_entry_associated_files.id _ma_entry_associated_files.entry_id _ma_entry_associated_files.file_url _ma_entry_associated_files.file_type _ma_entry_associated_files.file_format _ma_entry_associated_files.file_content _ma_entry_associated_files.details _ma_entry_associated_files.data_id 1 model https://example.com/foo.txt file other other 'test file' . 2 model https://example.com/t.zip archive zip 'archive with multiple files' . . 3 model baz.txt file other other 'test file3' 1 4 model baz.txt file cif 'QA metrics' 'test file4' . # # loop_ _ma_associated_archive_file_details.id _ma_associated_archive_file_details.archive_file_id _ma_associated_archive_file_details.file_path _ma_associated_archive_file_details.file_format _ma_associated_archive_file_details.file_content _ma_associated_archive_file_details.description _ma_associated_archive_file_details.data_id 1 2 bar.txt other other 'test file2' . # """) # Should be an error to put a zip file inside another zip zf2 = modelcif.associated.ZipFile(path='test2.zip', files=[]) zf.files.append(zf2) self.assertRaises(ValueError, dumper.finalize, system) def test_write_associated(self): """Test write() function with associated files""" s = modelcif.System(id='system1') f = modelcif.associated.CIFFile( path='test_write_associated.cif', categories=['struct', '_AUDIT_CONFORM'], entry_details='test details', entry_id='testcif') f2 = modelcif.associated.File(path='foo.txt', details='test file') r = modelcif.associated.Repository(url_root='https://example.com', files=[f, f2]) s.repositories.append(r) fh = StringIO() modelcif.dumper.write(fh, [s]) main_file = fh.getvalue() with open('test_write_associated.cif') as fh: assoc_file = fh.read() os.unlink('test_write_associated.cif') # struct and audit_conform categories should be in associated file, # not the main file self.assertIn('_struct.title', assoc_file) self.assertNotIn('_struct.title', main_file) self.assertIn('_audit_conform.dict_name', assoc_file) self.assertNotIn('_audit_conform.dict_name', main_file) def test_write_associated_in_zip(self): """Test write() function with associated files in a ZipFile""" s = modelcif.System(id='system1') f = modelcif.associated.CIFFile( path='test_write_associated_in_zip.cif', categories=['struct', '_AUDIT_CONFORM'], entry_details='test details', entry_id='testcif') zf = modelcif.associated.ZipFile(path='t.zip', files=[f]) r = modelcif.associated.Repository(url_root='https://example.com', files=[zf]) s.repositories.append(r) fh = StringIO() modelcif.dumper.write(fh, [s]) main_file = fh.getvalue() with open('test_write_associated_in_zip.cif') as fh: assoc_file = fh.read() os.unlink('test_write_associated_in_zip.cif') # struct and audit_conform categories should be in associated file, # not the main file self.assertIn('_struct.title', assoc_file) self.assertNotIn('_struct.title', main_file) self.assertIn('_audit_conform.dict_name', assoc_file) self.assertNotIn('_audit_conform.dict_name', main_file) def test_write_associated_copy(self): """Test write() function with associated files, copy_categories""" s = modelcif.System(id='system1') e1 = modelcif.Entity('ACGT') e1._id = 42 s.entities.append(e1) f = modelcif.associated.CIFFile( path='/not/exist/foo.cif', local_path='test_write_associated_copy.cif', categories=['struct'], copy_categories=['entity', 'audit_conform'], entry_details='test details', entry_id='testcif') r = modelcif.associated.Repository(url_root='https://example.com', files=[f]) s.repositories.append(r) fh = StringIO() modelcif.dumper.write(fh, [s]) main_file = fh.getvalue() with open('test_write_associated_copy.cif') as fh: assoc_file = fh.read() os.unlink('test_write_associated_copy.cif') # struct category should be in associated file, not the main file self.assertIn('_struct.title', assoc_file) self.assertNotIn('_struct.title', main_file) # entity and audit conform categories should be in *both* files self.assertIn('_entity.type', assoc_file) self.assertIn('_entity.type', main_file) self.assertIn('_audit_conform.dict_name', assoc_file) self.assertIn('_audit_conform.dict_name', main_file) def test_write_associated_none(self): """Test write() function with associated files, no categories""" s = modelcif.System(id='system1') f = modelcif.associated.CIFFile( path='test_write_associated_none.cif') r = modelcif.associated.Repository(url_root='https://example.com', files=[f]) s.repositories.append(r) fh = StringIO() modelcif.dumper.write(fh, [s]) main_file = fh.getvalue() self.assertIn('_struct.title', main_file) self.assertIn('_audit_conform.dict_name', main_file) @unittest.skipIf(msgpack is None, "needs Python 3 and msgpack") def test_write_associated_binary(self): """Test write() function with associated binary files""" s = modelcif.System(id='system1') f = modelcif.associated.CIFFile( path='test_write_associated_binary.bcif', categories=['struct', '_AUDIT_CONFORM'], entry_details='test details', entry_id='testcif', binary=True) r = modelcif.associated.Repository(url_root='https://example.com', files=[f]) s.repositories.append(r) fh = StringIO() modelcif.dumper.write(fh, [s]) main_file = fh.getvalue() with open('test_write_associated_binary.bcif', 'rb') as fh: assoc_file = msgpack.unpack(fh, raw=False) os.unlink('test_write_associated_binary.bcif') assoc_cats = frozenset( x['name'] for x in assoc_file['dataBlocks'][0]['categories']) self.assertIn('_struct', assoc_cats) self.assertNotIn('_struct.title', main_file) self.assertIn('_audit_conform', assoc_cats) self.assertNotIn('_audit_conform.dict_name', main_file) def test_system_writer(self): """Test _SystemWriter utility class""" class BaseWriter: def flush(self): return 'flush called' def write_comment(self, comment): return 'write comment ' + comment s = modelcif.dumper._SystemWriter(BaseWriter(), {}, {}) # These methods are not usually called in ordinary operation, but # we should provide them for Writer compatibility self.assertEqual(s.flush(), 'flush called') self.assertEqual(s.write_comment('foo'), 'write comment foo') def test_entity_non_poly_dumper(self): """Test EntityNonPolyDumper""" system = modelcif.System() # Polymeric entity (ignored) e1 = modelcif.Entity('ACGT') e1._id = 1 e2 = ihm.Entity([ihm.NonPolymerChemComp('HEM')], description='heme') e2._id = 2 e3 = ihm.Entity([ihm.NonPolymerChemComp('ZN')], description='zinc') e3._id = 3 system.entities.extend((e1, e2, e3)) t2 = modelcif.Template(e2, 'A', model_num=1, transformation=None) a1 = modelcif.AsymUnit(e1, 'foo') a2 = modelcif.NonPolymerFromTemplate(template=t2, explicit=True) system.asym_units.extend((a1, a2)) dumper = modelcif.dumper._EntityNonPolyDumper() dumper.finalize(system) out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _pdbx_entity_nonpoly.entity_id _pdbx_entity_nonpoly.name _pdbx_entity_nonpoly.comp_id _pdbx_entity_nonpoly.ma_model_mode 2 heme HEM explicit 3 zinc ZN . # """) def test_chem_comp_dumper(self): """Test ChemCompDumper""" system = modelcif.System() # ChemComp without ccd c1 = ihm.NonPolymerChemComp('C1', name='C1') # ChemComp using core CCD c2 = ihm.NonPolymerChemComp('C2', name='C2') c2.ccd = 'core' # ChemComp using MA CCD c3 = ihm.NonPolymerChemComp('C3', name='C3') c3.ccd = 'ma' # ChemComp with descriptors (local) c4 = ihm.NonPolymerChemComp('C4', name='C4') c4.ccd = None c4.descriptors = [modelcif.descriptor.IUPACName("foo")] e1 = modelcif.Entity(['A', 'C', c1, c2, c3, c4]) system.entities.append(e1) e2 = modelcif.Entity('GT') t2 = modelcif.Template(e2, 'A', model_num=1, transformation=None) system.templates.append(t2) dumper = modelcif.dumper._ChemCompDumper() out = _get_dumper_output(dumper, system) # chem_comp should include both system.entities and system.templates self.assertEqual(out, """# loop_ _chem_comp.id _chem_comp.type _chem_comp.name _chem_comp.formula _chem_comp.formula_weight _chem_comp.ma_provenance ALA 'L-peptide linking' ALANINE 'C3 H7 N O2' 89.094 'CCD Core' C1 non-polymer C1 . . 'CCD Core' C2 non-polymer C2 . . 'CCD Core' C3 non-polymer C3 . . 'CCD MA' C4 non-polymer C4 . . 'CCD local' CYS 'L-peptide linking' CYSTEINE 'C3 H7 N O2 S' 121.154 'CCD Core' GLY 'peptide linking' GLYCINE 'C2 H5 N O2' 75.067 'CCD Core' THR 'L-peptide linking' THREONINE 'C4 H9 N O3' 119.120 'CCD Core' # """) def test_chem_comp_dumper_bad_ccd(self): """Test ChemCompDumper with invalid value for ccd""" system = modelcif.System() c1 = ihm.NonPolymerChemComp('C1', name='C1') c1.ccd = 'garbage' e1 = modelcif.Entity([c1]) system.entities.append(e1) dumper = modelcif.dumper._ChemCompDumper() self.assertRaises(KeyError, _get_dumper_output, dumper, system) # Should work with checks disabled _ = _get_dumper_output(dumper, system, check=False) def test_chem_comp_descriptor_dumper(self): """Test ChemCompDescriptorDumper""" class MockObject: pass system = modelcif.System() # Old-style ChemComp without descriptors c1 = ihm.NonPolymerChemComp('C1', name='C1name') if hasattr(c1, 'descriptor'): del c1.descriptors c2 = ihm.NonPolymerChemComp('C2', name='C2name') c2.ccd = None soft = MockObject() soft._id = 42 c2.descriptors = [modelcif.descriptor.IUPACName("foo"), modelcif.descriptor.PubChemCID( "bar", details="test details", software=soft)] e1 = modelcif.Entity(['A', 'C', c1, c2]) system.entities.append(e1) dumper = modelcif.dumper._ChemCompDescriptorDumper() out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _ma_chem_comp_descriptor.ordinal_id _ma_chem_comp_descriptor.chem_comp_id _ma_chem_comp_descriptor.chem_comp_name _ma_chem_comp_descriptor.type _ma_chem_comp_descriptor.value _ma_chem_comp_descriptor.details _ma_chem_comp_descriptor.software_id 1 C2 C2name 'IUPAC Name' foo . . 2 C2 C2name 'PubChem CID' bar 'test details' 42 # """) def test_struct_ref(self): """Test StructRefDumper""" system = ihm.System() lpep = ihm.LPeptideAlphabet() sd = modelcif.reference.SeqDif( seq_id=2, db_monomer=lpep['W'], monomer=lpep['S'], details='Test mutation') # Test non-mandatory db_monomer sd2 = modelcif.reference.SeqDif( seq_id=3, db_monomer=None, monomer=lpep['P'], details='Test mutation') r1 = modelcif.reference.UniProt( code='NUP84_YEAST', accession='P52891', sequence='MELWPTYQT', details='test sequence') r1.alignments.append(modelcif.reference.Alignment( db_begin=3, seq_dif=[sd, sd2])) r2 = modelcif.reference.UniProt( code='testcode', accession='testacc', sequence='MELSPTYQT', details='test2') r2.alignments.append(modelcif.reference.Alignment( db_begin=4, db_end=5, entity_begin=2, entity_end=3)) r2.alignments.append(modelcif.reference.Alignment( db_begin=9, db_end=9, entity_begin=4, entity_end=4)) with warnings.catch_warnings(): warnings.simplefilter("ignore") r3 = modelcif.reference.UniProt( code='testcode2', accession='testacc2', sequence=None) r3.alignments.append(modelcif.reference.Alignment( db_begin=4, db_end=5, entity_begin=2, entity_end=3)) r4 = modelcif.reference.UniProt( code='testcode3', accession='testacc3', sequence=ihm.unknown) r4.alignments.append(modelcif.reference.Alignment( db_begin=4, db_end=5, entity_begin=2, entity_end=3)) system.entities.append(modelcif.Entity( 'LSPT', references=[r1, r2, r3, r4])) dumper = ihm.dumper._EntityDumper() dumper.finalize(system) # Assign entity IDs dumper = ihm.dumper._StructRefDumper() dumper.finalize(system) # Assign IDs out = _get_dumper_output(dumper, system) self.assertEqual(out, """# loop_ _struct_ref.id _struct_ref.entity_id _struct_ref.db_name _struct_ref.db_code _struct_ref.pdbx_db_accession _struct_ref.pdbx_align_begin _struct_ref.pdbx_seq_one_letter_code _struct_ref.details 1 1 UNP NUP84_YEAST P52891 3 LWPTYQT 'test sequence' 2 1 UNP testcode testacc 4 SPTYQT test2 3 1 UNP testcode2 testacc2 4 . . 4 1 UNP testcode3 testacc3 4 ? . # # loop_ _struct_ref_seq.align_id _struct_ref_seq.ref_id _struct_ref_seq.seq_align_beg _struct_ref_seq.seq_align_end _struct_ref_seq.db_align_beg _struct_ref_seq.db_align_end 1 1 1 4 3 6 2 2 2 3 4 5 3 2 4 4 9 9 4 3 2 3 4 5 5 4 2 3 4 5 # # loop_ _struct_ref_seq_dif.pdbx_ordinal _struct_ref_seq_dif.align_id _struct_ref_seq_dif.seq_num _struct_ref_seq_dif.db_mon_id _struct_ref_seq_dif.mon_id _struct_ref_seq_dif.details 1 1 2 TRP SER 'Test mutation' 2 1 3 ? PRO 'Test mutation' # """) if __name__ == '__main__': unittest.main() python-modelcif-1.5/test/test_edit.py000066400000000000000000000064101506655355100200110ustar00rootroot00000000000000import utils import os import unittest import io TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import modelcif.reader import modelcif.dumper class Tests(unittest.TestCase): def test_model_with_assembly(self): """Test read of Model with Assembly followed by write""" sin = io.StringIO(""" loop_ _entity.id _entity.type _entity.pdbx_description _entity.pdbx_number_of_molecules _entity.formula_weight _entity.details 1 polymer Nup84 2 100.0 . # loop_ _entity_poly_seq.entity_id _entity_poly_seq.num _entity_poly_seq.mon_id _entity_poly_seq.hetero 1 1 ALA . 1 2 CYS . # loop_ _struct_asym.id _struct_asym.entity_id _struct_asym.details A 1 'Model subunit A' # loop_ _ma_struct_assembly.ordinal_id _ma_struct_assembly.assembly_id _ma_struct_assembly.entity_id _ma_struct_assembly.asym_id _ma_struct_assembly.seq_id_begin _ma_struct_assembly.seq_id_end 1 1 1 A 1 2 # loop_ _ma_model_list.ordinal_id _ma_model_list.model_id _ma_model_list.model_group_id _ma_model_list.model_name _ma_model_list.model_group_name _ma_model_list.assembly_id _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 1 1 'Best scoring model' 'All models' 1 4 'Homology model' . # loop_ _atom_site.group_PDB _atom_site.id _atom_site.type_symbol _atom_site.label_atom_id _atom_site.label_alt_id _atom_site.label_comp_id _atom_site.label_seq_id _atom_site.auth_seq_id _atom_site.pdbx_PDB_ins_code _atom_site.label_asym_id _atom_site.Cartn_x _atom_site.Cartn_y _atom_site.Cartn_z _atom_site.occupancy _atom_site.label_entity_id _atom_site.auth_asym_id _atom_site.B_iso_or_equiv _atom_site.pdbx_PDB_model_num ATOM 1 C CA . ALA 1 1 ? A 1.000 2.000 3.000 . 1 A . 1 """) s, = modelcif.reader.read(sin) sout = io.StringIO() modelcif.dumper.write(sout, [s]) def test_model_without_assembly(self): """Test read of Model without Assembly followed by write""" sin = io.StringIO(""" loop_ _entity.id _entity.type _entity.pdbx_description _entity.pdbx_number_of_molecules _entity.formula_weight _entity.details 1 polymer Nup84 2 100.0 . # loop_ _entity_poly_seq.entity_id _entity_poly_seq.num _entity_poly_seq.mon_id _entity_poly_seq.hetero 1 1 ALA . 1 2 CYS . # loop_ _struct_asym.id _struct_asym.entity_id _struct_asym.details A 1 'Model subunit A' # loop_ _ma_model_list.ordinal_id _ma_model_list.model_id _ma_model_list.model_group_id _ma_model_list.model_name _ma_model_list.model_group_name _ma_model_list.assembly_id _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 1 1 'Best scoring model' 'All models' . 4 'Homology model' . # loop_ _atom_site.group_PDB _atom_site.id _atom_site.type_symbol _atom_site.label_atom_id _atom_site.label_alt_id _atom_site.label_comp_id _atom_site.label_seq_id _atom_site.auth_seq_id _atom_site.pdbx_PDB_ins_code _atom_site.label_asym_id _atom_site.Cartn_x _atom_site.Cartn_y _atom_site.Cartn_z _atom_site.occupancy _atom_site.label_entity_id _atom_site.auth_asym_id _atom_site.B_iso_or_equiv _atom_site.pdbx_PDB_model_num ATOM 1 C CA . ALA 1 1 ? A 1.000 2.000 3.000 . 1 A . 1 """) s, = modelcif.reader.read(sin) sout = io.StringIO() modelcif.dumper.write(sout, [s]) if __name__ == '__main__': unittest.main() python-modelcif-1.5/test/test_examples.py000066400000000000000000000075031506655355100207060ustar00rootroot00000000000000import utils import os import unittest import sys import shutil import subprocess try: import msgpack except ImportError: msgpack = None TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import modelcif.reader def get_example_dir(): return os.path.join(TOPDIR, "examples") def get_example_path(fname): return os.path.join(get_example_dir(), fname) class Tests(unittest.TestCase): @unittest.skipIf('APPVEYOR' in os.environ, "AppVeyor environments have old SSL certs") @unittest.skipIf('GITHUB_ACTIONS' in os.environ, "Example is slow and fails when ModBase is down") def test_validate_modbase_example(self): """Test validate_modbase example""" subprocess.check_call([sys.executable, get_example_path("validate_modbase.py")]) @unittest.skipIf('APPVEYOR' in os.environ, "AppVeyor environments have old SSL certs") @unittest.skipIf('GITHUB_ACTIONS' in os.environ, "Example is slow and fails when ModBase is down") def test_validate_mmcif_example(self): """Test validate_mmcif example""" with utils.temporary_directory() as tmpdir: subprocess.check_call([sys.executable, get_example_path("validate_mmcif.py"), get_example_path("input/ligands.cif")], cwd=tmpdir) def test_associated_example(self): """Test associated example""" subprocess.check_call([sys.executable, get_example_path("associated.py")]) def test_mkmodbase_example(self): """Test mkmodbase example""" with utils.temporary_directory() as tmpdir: subprocess.check_call([sys.executable, get_example_path("mkmodbase.py")], cwd=tmpdir) # Make sure that a complete output file was produced and that we # can read it with open(os.path.join(tmpdir, 'output.cif')) as fh: contents = fh.readlines() self.assertEqual(len(contents), 451) with open(os.path.join(tmpdir, 'output.cif')) as fh: s, = modelcif.reader.read(fh) def test_ligands_example(self): """Test ligands example""" with utils.temporary_directory() as tmpdir: subprocess.check_call([sys.executable, get_example_path("ligands.py")], cwd=tmpdir) # Make sure that a complete output file was produced and that we # can read it with open(os.path.join(tmpdir, 'output.cif')) as fh: contents = fh.readlines() self.assertEqual(len(contents), 334) with open(os.path.join(tmpdir, 'output.cif')) as fh: s, = modelcif.reader.read(fh) @unittest.skipIf(msgpack is None, "BinaryCIF needs msgpack") def test_convert_bcif_example(self): """Test convert_bcif example""" with utils.temporary_directory() as tmpdir: from_input = get_example_path("input") to_input = os.path.join(tmpdir, 'input') os.mkdir(to_input) shutil.copy(os.path.join(from_input, "ligands.cif"), to_input) subprocess.check_call([sys.executable, get_example_path("convert_bcif.py")], cwd=tmpdir) # Make sure that a complete output file was produced and that we # can read it with open(os.path.join(tmpdir, 'ligands.bcif'), 'rb') as fh: s, = modelcif.reader.read(fh, format='BCIF') if __name__ == '__main__': unittest.main() python-modelcif-1.5/test/test_main.py000066400000000000000000000200031506655355100200020ustar00rootroot00000000000000import os import unittest import utils TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import modelcif import modelcif.protocol import modelcif.descriptor import modelcif.associated import ihm class Tests(unittest.TestCase): def test_all_data(self): """Test _all_data() method""" s = modelcif.System() e1 = modelcif.Entity("D") e2 = modelcif.Entity("M") s.entities.extend((e1, e2)) e3 = modelcif.Entity("A") s.data.extend((e1, e3)) s.data_groups.append('something not a group') e4 = modelcif.Entity("M") s.data_groups.append(modelcif.data.DataGroup([e1, e4])) e5 = modelcif.Entity("M") f = modelcif.associated.File(path='foo', details='bar', data=e5) s.repositories.append(modelcif.associated.Repository('/', [f])) d = s._all_data() # List may contain duplicates self.assertEqual(list(d), [e1, e3, e1, e2, e1, e4, e5]) def test_all_asym_units(self): """Test _all_asym_units() method""" s = modelcif.System() e1 = modelcif.Entity("DDDD") e2 = modelcif.Entity("MMMM") a1 = modelcif.AsymUnit(e1) a2 = modelcif.AsymUnit(e2) s.asym_units.append(a1) asmb = modelcif.Assembly((a1, a2(1, 2))) s.assemblies.append(asmb) asyms = s._all_asym_units() # List may contain duplicates and should be all AsymUnit, # not AsymUnitRange self.assertEqual(list(asyms), [a1, a1, a2]) def test_all_entities(self): """Test _all_entities() method""" s = modelcif.System() e1 = modelcif.Entity("DDDD") e2 = modelcif.Entity("MMMM") s.entities.append(e1) a1 = modelcif.AsymUnit(e1) s.asym_units.append(a1) t2 = modelcif.Template(e2, asym_id='A', model_num=1, transformation=None) s.templates.append(t2) es = s._all_entities() # List may contain duplicates, but does not contain template entity e2 self.assertEqual(list(es), [e1, e1]) def test_all_data_groups(self): """Test _all_data_groups() method""" s = modelcif.System() e1 = modelcif.Entity("A") s.data_groups.append(e1) e2 = modelcif.Entity("C") p = modelcif.protocol.Protocol() p.steps.append(modelcif.protocol.ModelingStep( input_data=e1, output_data=e2)) p.steps.append(modelcif.protocol.ModelingStep( input_data=None, output_data=None)) s.protocols.append(p) d = s._all_data_groups() self.assertEqual(list(d), [e1, e1, e2]) def test_all_template_transformations(self): """Test _all_template_transformations() method""" s = modelcif.System() tr1 = 'tr1' tr2 = 'tr2' s.template_transformations.extend((tr1, tr2)) template = modelcif.Template('mockentity', asym_id="A", model_num=1, name="test template", transformation=tr1) s.templates.append(template) tt = s._all_template_transformations() # List may contain duplicates self.assertEqual(list(tt), [tr1, tr2, tr1]) def test_transformation(self): """Test Transformation class""" _ = modelcif.Transformation([[1, 0, 0], [0, 1, 0], [0, 0, 1]], [1, 2, 3]) def test_identity_transformation(self): """Test identity transformation""" t = modelcif.Transformation.identity() for i in range(3): self.assertAlmostEqual(t.tr_vector[i], 0., delta=0.1) for j in range(3): self.assertAlmostEqual(t.rot_matrix[i][j], 1. if i == j else 0., delta=0.1) # Should always get the same object t2 = modelcif.Transformation.identity() self.assertIs(t, t2) def test_all_software_groups(self): """Test _all_software_groups() method""" s = modelcif.System() sg1 = 'sg1' sg2 = 'sg2' s.software_groups.extend((sg1, sg2)) p = modelcif.protocol.Protocol() p.steps.append(modelcif.protocol.ModelingStep( input_data=None, output_data=None, software=sg1)) s.protocols.append(p) allsg = s._all_software_groups() # List may contain duplicates self.assertEqual(list(allsg), [sg1, sg2, sg1]) def test_all_ref_software(self): """Test _all_ref_software() method""" s1 = modelcif.Software( name='foo', version='1.0', classification='1', description='2', location='3') s2 = modelcif.Software( name='foo', version='2.0', classification='4', description='5', location='6') p = modelcif.SoftwareParameter(name='foo', value='bar') s2param = modelcif.SoftwareWithParameters(s2, [p]) s = modelcif.System() s.software_groups.append(modelcif.SoftwareGroup((s1, s2param))) s.software_groups.append(s1) e1 = modelcif.Entity("DDDD") t1 = modelcif.Template(e1, asym_id='A', model_num=1, transformation=None) s.templates.append(t1) # Old-style ChemComp without descriptors c1 = ihm.NonPolymerChemComp('C1', name='C1') if hasattr(c1, 'descriptors'): del c1.descriptors # ChemComp with descriptor without software c2 = ihm.NonPolymerChemComp('C2', name='C2') c2.descriptors = [modelcif.descriptor.IUPACName('foo')] # ChemComp with descriptor with software c3 = ihm.NonPolymerChemComp('C3', name='C3') s3 = modelcif.Software( name='foo', version='2.0', classification='4', description='5', location='6') c3.descriptors = [modelcif.descriptor.IUPACName('foo', software=s3)] e2 = modelcif.Entity([c1, c2, c3]) s.entities.append(e2) alls = s._all_ref_software() # List may contain duplicates self.assertEqual(list(alls), [s1, s2, s1, s3]) def test_software_parameter(self): """Test SoftwareParameter class""" p = modelcif.SoftwareParameter(name='foo', value=42) self.assertEqual(p.name, 'foo') self.assertEqual(p.value, 42) self.assertIsNone(p.description) _ = repr(p) def test_software_with_parameters(self): """Test SoftwareWithParameters class""" s = modelcif.Software( name='foo', version='1.0', classification='1', description='2', location='3') p = modelcif.SoftwareParameter(name='foo', value=42) swp = modelcif.SoftwareWithParameters(software=s, parameters=[p]) self.assertEqual(swp.software.name, 'foo') self.assertEqual(swp.parameters, [p]) self.assertEqual(swp.name, 'foo') self.assertEqual(swp.classification, '1') self.assertEqual(swp.description, '2') self.assertEqual(swp.location, '3') self.assertEqual(swp.type, 'program') self.assertEqual(swp.version, '1.0') self.assertIsNone(swp.citation) def test_template(self): """Test Template class""" e1 = modelcif.Entity("DDDD") t1 = modelcif.Template(e1, asym_id='A', model_num=1, transformation=None) self.assertEqual(t1.seq_id_range, (1, 4)) self.assertEqual(t1.template, t1) def test_software_group_parameters(self): """Test old-style SoftwareGroup construction with parameters""" s = modelcif.Software( name='foo', version='1.0', classification='1', description='2', location='3') p = modelcif.SoftwareParameter(name='foo', value=42) self.assertWarns(UserWarning, modelcif.SoftwareGroup, [s], parameters=[p]) def test_feature(self): """Test Feature base class""" f = modelcif.Feature() self.assertIs(f._get_entity_type(), ihm.unknown) if __name__ == '__main__': unittest.main() python-modelcif-1.5/test/test_make_mmcif.py000066400000000000000000000070251506655355100211570ustar00rootroot00000000000000import utils import os import sys import unittest import subprocess TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import modelcif.reader import modelcif.util.make_mmcif # Script should also be importable MAKE_MMCIF = os.path.join(TOPDIR, 'modelcif', 'util', 'make_mmcif.py') class Tests(unittest.TestCase): def test_simple(self): """Simple test of make_mmcif utility script""" incif = utils.get_input_file_name(TOPDIR, 'struct_only.cif') subprocess.check_call([sys.executable, MAKE_MMCIF, incif]) with open('output.cif') as fh: s, = modelcif.reader.read(fh) self.assertEqual(s.title, 'Architecture of Pol II(G) and molecular mechanism ' 'of transcription regulation by Gdown1') os.unlink('output.cif') def test_non_default_output(self): """Simple test of make_mmcif with non-default output name""" incif = utils.get_input_file_name(TOPDIR, 'struct_only.cif') subprocess.check_call([sys.executable, MAKE_MMCIF, incif, 'non-default-output.cif']) with open('non-default-output.cif') as fh: s, = modelcif.reader.read(fh) self.assertEqual(s.title, 'Architecture of Pol II(G) and molecular mechanism ' 'of transcription regulation by Gdown1') os.unlink('non-default-output.cif') def test_no_title(self): """Check that make_mmcif adds missing title""" incif = utils.get_input_file_name(TOPDIR, 'no_title.cif') subprocess.check_call([sys.executable, MAKE_MMCIF, incif]) with open('output.cif') as fh: s, = modelcif.reader.read(fh) self.assertEqual(s.title, 'Auto-generated system') os.unlink('output.cif') def test_bad_usage(self): """Bad usage of make_mmcif utility script""" ret = subprocess.call([sys.executable, MAKE_MMCIF]) self.assertEqual(ret, 2) def test_same_file(self): """Check that make_mmcif fails if input and output are the same""" incif = utils.get_input_file_name(TOPDIR, 'struct_only.cif') ret = subprocess.call([sys.executable, MAKE_MMCIF, incif, incif]) self.assertEqual(ret, 1) def test_not_modeled(self): """Check addition of not-modeled residue information""" incif = utils.get_input_file_name(TOPDIR, 'not_modeled.cif') subprocess.check_call([sys.executable, MAKE_MMCIF, incif]) with open('output.cif') as fh: contents = fh.readlines() loop = contents.index("_pdbx_poly_seq_scheme.pdb_ins_code\n") scheme = "".join(contents[loop - 11:loop + 11]) # Residues 5 and 6 in chain A, and 2 in chain B, are missing from # atom_site, so should now be missing from the scheme table. self.assertEqual(scheme, """# loop_ _pdbx_poly_seq_scheme.asym_id _pdbx_poly_seq_scheme.entity_id _pdbx_poly_seq_scheme.seq_id _pdbx_poly_seq_scheme.mon_id _pdbx_poly_seq_scheme.pdb_seq_num _pdbx_poly_seq_scheme.auth_seq_num _pdbx_poly_seq_scheme.pdb_mon_id _pdbx_poly_seq_scheme.auth_mon_id _pdbx_poly_seq_scheme.pdb_strand_id _pdbx_poly_seq_scheme.pdb_ins_code A 1 1 VAL 2 2 VAL VAL A ? A 1 2 GLY 3 3 GLY GLY A ? A 1 3 GLN 4 4 GLN GLN A ? A 1 4 GLN 5 5 GLN GLN A ? A 1 5 TYR 5 ? ? ? A . A 1 6 SER 6 ? ? ? A . A 1 7 SER 8 8 SER SER A ? B 2 1 ASP 3 3 ASP ASP B ? B 2 2 GLU 2 ? ? ? B . # """) os.unlink('output.cif') if __name__ == '__main__': unittest.main() python-modelcif-1.5/test/test_model.py000066400000000000000000000015311506655355100201630ustar00rootroot00000000000000import utils import os import unittest TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import modelcif.model class Tests(unittest.TestCase): def test_model(self): """Test Model classes""" m = modelcif.model.HomologyModel([]) self.assertEqual(m.model_type, "Homology model") self.assertIsNone(m.other_details) # generic "other" model m = modelcif.model.Model([]) self.assertEqual(m.model_type, "Other") self.assertIsNone(m.other_details) # custom "other" model class CustomRef(modelcif.model.Model): """foo bar""" m = CustomRef([]) self.assertEqual(m.model_type, "Other") self.assertEqual(m.other_details, "foo") if __name__ == '__main__': unittest.main() python-modelcif-1.5/test/test_qa_metric.py000066400000000000000000000062121506655355100210300ustar00rootroot00000000000000import os import unittest import utils TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import modelcif.qa_metric class Tests(unittest.TestCase): def test_metric_types(self): """Test MetricType subclasses""" self.assertEqual(modelcif.qa_metric.Energy.type, "energy") self.assertIsNone(modelcif.qa_metric.Energy.other_details) self.assertEqual(modelcif.qa_metric.PAE.type, "PAE") self.assertEqual(modelcif.qa_metric.ContactProbability.type, "contact probability") # MetricType itself should have no other_details class Custom1(modelcif.qa_metric.Global, modelcif.qa_metric.MetricType): """Custom 1""" x = Custom1(42) self.assertEqual(x.type, "other") self.assertIsNone(x.other_details) class CustomMetricType(modelcif.qa_metric.MetricType): """foo bar""" class Custom2(modelcif.qa_metric.Global, CustomMetricType): """Custom 2""" x = Custom2(42) self.assertEqual(x.type, "other") self.assertEqual(x.other_details, "foo") # MetricType in the enumeration should have no "other_details" class EnumMetricType(modelcif.qa_metric.MetricType): """foo""" type = "enum" class Custom3(modelcif.qa_metric.Global, EnumMetricType): """Custom 3""" x = Custom3(42) self.assertEqual(x.type, "enum") self.assertIsNone(x.other_details) def test_global_metric(self): """Test Global MetricMode""" class MyScore(modelcif.qa_metric.Global, modelcif.qa_metric.Energy): pass q = MyScore(42) _ = repr(q) def test_local_metric(self): """Test Local MetricMode""" class MyScore(modelcif.qa_metric.Local, modelcif.qa_metric.Energy): pass e1 = modelcif.Entity('ACGT') asym = modelcif.AsymUnit(e1, 'foo') q = MyScore(asym.residue(2), 42) _ = repr(q) def test_local_pairwise_metric(self): """Test LocalPairwise MetricMode""" class MyScore(modelcif.qa_metric.LocalPairwise, modelcif.qa_metric.Energy): pass e1 = modelcif.Entity('ACGT') asym = modelcif.AsymUnit(e1, 'foo') q = MyScore(asym.residue(2), asym.residue(3), 42) _ = repr(q) def test_feature_metric(self): """Test Feature MetricMode""" class MyScore(modelcif.qa_metric.Feature, modelcif.qa_metric.Energy): pass e1 = modelcif.Entity('ACGT') asym = modelcif.AsymUnit(e1, 'foo') q = MyScore(asym.residue(2), 42) _ = repr(q) def test_feature_pairwise_metric(self): """Test FeaturePairwise MetricMode""" class MyScore(modelcif.qa_metric.FeaturePairwise, modelcif.qa_metric.Energy): pass e1 = modelcif.Entity('ACGT') asym = modelcif.AsymUnit(e1, 'foo') q = MyScore(asym.residue(2), asym.residue(3), 42) _ = repr(q) if __name__ == '__main__': unittest.main() python-modelcif-1.5/test/test_reader.py000066400000000000000000002030631506655355100203310ustar00rootroot00000000000000from datetime import date import unittest import utils import os import datetime from io import StringIO TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import modelcif.reader import modelcif.reference import ihm import ihm.reader ASYM_ENTITY = """ loop_ _entity_poly_seq.entity_id _entity_poly_seq.num _entity_poly_seq.mon_id _entity_poly_seq.hetero 1 1 MET . 1 2 CYS . 1 3 MET . 1 4 SER . # loop_ _struct_asym.id _struct_asym.entity_id _struct_asym.details A 1 foo """ class Tests(unittest.TestCase): def test_old_file_read_default(self): """Test default handling of old files""" cif = """ loop_ _audit_conform.dict_name _audit_conform.dict_version mmcif_pdbx.dic 5.311 mmcif_ma.dic 0.14 """ s, = modelcif.reader.read(StringIO(cif)) def test_old_file_read_fail(self): """Test failure reading old files""" cif = """ loop_ _audit_conform.dict_name _audit_conform.dict_version mmcif_pdbx.dic 5.311 mmcif_ma.dic 0.1.3 """ self.assertRaises(modelcif.reader.OldFileError, modelcif.reader.read, StringIO(cif), reject_old_file=True) def test_new_file_read_ok(self): """Test success reading not-old files""" # File read is OK if version is new enough, or version cannot be parsed # because it is non-int or has too many elements for ver in ('1.3.2', '1.3', '0.0.4.3', '0.0a'): cif = """ loop_ _audit_conform.dict_name _audit_conform.dict_version mmcif_pdbx.dic 5.311 mmcif_ma.dic %s """ % ver s, = modelcif.reader.read(StringIO(cif), reject_old_file=True) def test_software_group_handler(self): """Test SoftwareGroupHandler and SoftwareParameterHandler""" cif = """ loop_ _ma_software_parameter.parameter_id _ma_software_parameter.group_id _ma_software_parameter.data_type _ma_software_parameter.name _ma_software_parameter.value _ma_software_parameter.description 1 1 integer foo 42 foodesc 2 1 boolean bar YES . 3 1 string baz ok . 4 1 integer-csv intlist 1,2,3,4 . 5 1 float-csv floatlist 1.5,3.8 . # loop_ _ma_software_group.ordinal_id _ma_software_group.group_id _ma_software_group.software_id _ma_software_group.parameter_group_id 1 1 1 . 2 1 2 . 3 2 3 . 4 2 4 1 """ s, = modelcif.reader.read(StringIO(cif)) s1, s2, s3, s4 = s.software g1, g2 = s.software_groups self.assertEqual(len(g1), 2) self.assertEqual(len(g2), 2) self.assertIsInstance(g1[0], modelcif.Software) self.assertIsInstance(g1[1], modelcif.Software) self.assertEqual(g1[0], s1) self.assertEqual(g1[1], s2) self.assertIsInstance(g2[0], modelcif.Software) self.assertIsInstance(g2[1], modelcif.SoftwareWithParameters) self.assertEqual(g2[0], s3) self.assertEqual(g2[1].software, s4) p1, p2, p3, intlist, floatlist = g2[1].parameters self.assertEqual(p1.name, 'foo') self.assertEqual(p1.value, 42) self.assertEqual(p1.description, 'foodesc') self.assertEqual(p2.name, 'bar') self.assertTrue(p2.value) self.assertIsNone(p2.description) self.assertEqual(p3.name, 'baz') self.assertEqual(p3.value, 'ok') self.assertIsNone(p3.description) self.assertEqual(intlist.value, [1, 2, 3, 4]) f1, f2 = floatlist.value self.assertAlmostEqual(f1, 1.5, delta=1e-1) self.assertAlmostEqual(f2, 3.8, delta=1e-1) def test_enumeration_mapper(self): """Test EnumerationMapper class""" m = modelcif.reader._EnumerationMapper( modelcif.reference, modelcif.reference.TargetReference) # Check get of a handled enumeration value unp = m.get('UNP', None) self.assertIs(unp, modelcif.reference.UniProt) self.assertEqual(unp.name, 'UNP') self.assertIsNone(unp.other_details) # We should get the same class each time (case insensitive) unp2 = m.get('unp', None) self.assertIs(unp, unp2) # Check get of an unhandled value miss = m.get('MIS', None) self.assertEqual(miss.name, 'MIS') self.assertIsNone(unp.other_details) # We should get the same class each time (case insensitive) miss2 = m.get('mis', None) self.assertIs(miss, miss2) # Check get of a custom "other" value custom = m.get('other', "custom type 1") self.assertEqual(custom.name, 'Other') self.assertEqual(custom.other_details, "custom type 1") # We should get the same class each time (case insensitive) custom2 = m.get('Other', "CUSTOM TYPE 1") self.assertIs(custom, custom2) # Check get of a different custom "other" value custom = m.get('other', "custom type 2") self.assertEqual(custom.name, 'Other') self.assertEqual(custom.other_details, "custom type 2") def test_database_handler(self): """Test DatabaseHandler""" cif = """ _database_2.database_id 'PDB' _database_2.database_code '5HVP' """ s, = modelcif.reader.read(StringIO(cif)) self.assertEqual(s.database.id, 'PDB') self.assertEqual(s.database.code, '5HVP') def test_target_ref_db_handler(self): """Test TargetRefDBHander""" cif = """ loop_ _ma_target_ref_db_details.target_entity_id _ma_target_ref_db_details.db_name _ma_target_ref_db_details.db_name_other_details _ma_target_ref_db_details.db_code _ma_target_ref_db_details.db_accession _ma_target_ref_db_details.seq_db_isoform _ma_target_ref_db_details.seq_db_align_begin _ma_target_ref_db_details.seq_db_align_end _ma_target_ref_db_details.ncbi_taxonomy_id _ma_target_ref_db_details.organism_scientific _ma_target_ref_db_details.seq_db_sequence_version_date _ma_target_ref_db_details.seq_db_sequence_checksum _ma_target_ref_db_details.is_primary 1 UNP . MED1_YEAST Q12321 test_iso 1 10 test_tax test_org 1996-11-01 637FEA3E78D915BC YES 1 Other foo . . ? 1 10 . . . . NO 1 other bar . . ? 1 10 . . . . . 1 MIS baz . . ? 1 10 . . . . . """ s, = modelcif.reader.read(StringIO(cif)) e, = s.entities r1, r2, r3, r4 = e.references self.assertIsInstance(r1, modelcif.reference.UniProt) self.assertEqual(r1.code, 'MED1_YEAST') self.assertEqual(r1.accession, 'Q12321') self.assertEqual(r1.isoform, 'test_iso') self.assertEqual(r1.align_begin, 1) self.assertEqual(r1.align_end, 10) self.assertEqual(r1.ncbi_taxonomy_id, 'test_tax') self.assertEqual(r1.organism_scientific, 'test_org') self.assertEqual(r1.sequence_version_date, date(1996, 11, 1)) self.assertIsNone(r1.sequence) self.assertIsNone(r1.details) self.assertTrue(r1.is_primary) self.assertEqual(r1.alignments, []) self.assertEqual(r2.name, 'Other') self.assertFalse(r2.is_primary) self.assertEqual(r2.other_details, 'foo') self.assertEqual(r3.name, 'Other') self.assertEqual(r3.other_details, 'bar') self.assertIsNone(r3.is_primary) self.assertEqual(r4.name, 'MIS') self.assertIsNone(r4.other_details) # should be ignored def test_target_ref_db_handler_with_struct_ref(self): """Test TargetRefDBHander combined with struct_ref info""" cif = """ loop_ _struct_ref.id _struct_ref.entity_id _struct_ref.db_name _struct_ref.db_code _struct_ref.pdbx_db_accession _struct_ref.pdbx_align_begin _struct_ref.pdbx_seq_one_letter_code _struct_ref.details 1 1 UNP MED1_YEAST Q12321 1 DSYVETLDCC "test details" 2 1 UNP sr_only_code sr_only_acc 1 DSYVETLDPP . # # loop_ _struct_ref_seq.align_id _struct_ref_seq.ref_id _struct_ref_seq.seq_align_beg _struct_ref_seq.seq_align_end _struct_ref_seq.db_align_beg _struct_ref_seq.db_align_end 1 1 1 10 1 10 2 2 1 10 1 10 # loop_ _ma_target_ref_db_details.target_entity_id _ma_target_ref_db_details.db_name _ma_target_ref_db_details.db_name_other_details _ma_target_ref_db_details.db_code _ma_target_ref_db_details.db_accession _ma_target_ref_db_details.seq_db_isoform _ma_target_ref_db_details.seq_db_align_begin _ma_target_ref_db_details.seq_db_align_end _ma_target_ref_db_details.ncbi_taxonomy_id _ma_target_ref_db_details.organism_scientific _ma_target_ref_db_details.seq_db_sequence_version_date _ma_target_ref_db_details.seq_db_sequence_checksum _ma_target_ref_db_details.is_primary 1 UNP . MED1_YEAST Q12321 test_iso 1 10 test_tax test_org 1996-11-01 637FEA3E78D915BC YES 1 UNP . rd_only_code rd_only_acc rd_only_iso . . . . . . NO """ s, = modelcif.reader.read(StringIO(cif)) e, = s.entities r1, r2, r3 = e.references # r1 should contain both target_ref_db and struct_ref info self.assertIsInstance(r1, modelcif.reference.UniProt) self.assertEqual(r1.code, 'MED1_YEAST') self.assertEqual(r1.accession, 'Q12321') self.assertEqual(r1.isoform, 'test_iso') self.assertEqual(r1.align_begin, 1) self.assertEqual(r1.align_end, 10) self.assertEqual(r1.ncbi_taxonomy_id, 'test_tax') self.assertEqual(r1.organism_scientific, 'test_org') self.assertEqual(r1.sequence_version_date, date(1996, 11, 1)) self.assertEqual(r1.sequence, 'DSYVETLDCC') self.assertEqual(r1.details, "test details") self.assertTrue(r1.is_primary) a, = r1.alignments self.assertEqual(a.db_begin, 1) self.assertEqual(a.db_end, 10) self.assertEqual(a.entity_begin, 1) self.assertEqual(a.entity_end, 10) # r2 should contain only target_ref_db info self.assertIsInstance(r2, modelcif.reference.UniProt) self.assertEqual(r2.code, 'rd_only_code') self.assertEqual(r2.accession, 'rd_only_acc') self.assertEqual(r2.isoform, 'rd_only_iso') self.assertIsNone(r2.sequence) self.assertFalse(r2.is_primary) # r3 should contain only struct_ref info self.assertIsInstance(r3, modelcif.reference.UniProt) self.assertEqual(r3.code, 'sr_only_code') self.assertEqual(r3.accession, 'sr_only_acc') self.assertIsNone(r3.isoform) self.assertIsNone(r3.ncbi_taxonomy_id) self.assertEqual(r3.sequence, 'DSYVETLDPP') self.assertIsNone(r3.is_primary) a, = r3.alignments self.assertEqual(a.db_begin, 1) self.assertEqual(a.db_end, 10) self.assertEqual(a.entity_begin, 1) self.assertEqual(a.entity_end, 10) def test_transformation_handler(self): """Test _TransformationHandler""" cif = """ loop_ _ma_template_trans_matrix.id _ma_template_trans_matrix.rot_matrix[1][1] _ma_template_trans_matrix.rot_matrix[2][1] _ma_template_trans_matrix.rot_matrix[3][1] _ma_template_trans_matrix.rot_matrix[1][2] _ma_template_trans_matrix.rot_matrix[2][2] _ma_template_trans_matrix.rot_matrix[3][2] _ma_template_trans_matrix.rot_matrix[1][3] _ma_template_trans_matrix.rot_matrix[2][3] _ma_template_trans_matrix.rot_matrix[3][3] _ma_template_trans_matrix.tr_vector[1] _ma_template_trans_matrix.tr_vector[2] _ma_template_trans_matrix.tr_vector[3] 1 1.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 1.000000 0.000 0.000 0.000 """ s, = modelcif.reader.read(StringIO(cif)) t, = s.template_transformations self.assertAlmostEqual(t.rot_matrix[0][0], 1.0, delta=1e-6) self.assertAlmostEqual(t.tr_vector[0], 0.0, delta=1e-6) def test_template_details_handler(self): """Test _TemplateDetailsHandler""" cif = """ loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 1 'reference database' polymer 1 2 A B 3 4 Z 2 2 'reference database' polymer 2 3 . B 3 4 Z 3 3 customized polymer 2 3 . B 3 4 Z """ s, = modelcif.reader.read(StringIO(cif)) t, t2, t3 = s.templates self.assertIsInstance(t, modelcif.Template) self.assertIsInstance(t2, modelcif.Template) self.assertIsInstance(t3, modelcif.CustomTemplate) self.assertEqual(t.entity_id, '3') self.assertEqual(t.model_num, 4) self.assertEqual(t.asym_id, 'B') self.assertEqual(t.strand_id, 'Z') self.assertEqual(len(s.alignments), 0) def test_template_customized_handler(self): """Test _TemplateCustomizedHandler""" cif = """ loop_ _ma_template_details.ordinal_id _ma_template_details.template_id 1 1 # loop_ _ma_template_customized.template_id _ma_template_customized.details 1 'details x' 2 'details y' """ s, = modelcif.reader.read(StringIO(cif)) t1, t2 = s.templates # template_details does not specify template_origin, so template #1 # will be initially instantiated as a Template, and should be corrected # to CustomTemplate on reading template_customized: self.assertIsInstance(t1, modelcif.CustomTemplate) self.assertEqual(t1.details, 'details x') self.assertEqual(len(t1.atoms), 0) self.assertIsInstance(t2, modelcif.CustomTemplate) self.assertEqual(t2.details, 'details y') self.assertEqual(len(t2.atoms), 0) def test_template_details_handler_nonpoly(self): """Test _TemplateDetailsHandler with nonpolymeric template""" cif = """ loop_ _pdbx_entity_nonpoly.entity_id _pdbx_entity_nonpoly.name _pdbx_entity_nonpoly.comp_id _pdbx_entity_nonpoly.ma_model_mode 3 Heme HEM explicit # loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 1 'reference database' non-polymer 1 2 A B 3 4 Z # loop_ _ma_template_non_poly.template_id _ma_template_non_poly.comp_id _ma_template_non_poly.details 1 HEM "Template Heme" """ s, = modelcif.reader.read(StringIO(cif)) t, = s.templates self.assertEqual(t.entity_id, '3') self.assertEqual(t.model_num, 4) self.assertEqual(t.asym_id, 'B') self.assertEqual(t.strand_id, 'Z') self.assertEqual(len(s.alignments), 0) self.assertEqual(t.entity.description, 'Template Heme') a, = s.asym_units self.assertIsInstance(a, modelcif.NonPolymerFromTemplate) self.assertIs(a.template, t) self.assertTrue(a.explicit) def test_custom_template_coord_handler(self): """Test reading of coordinates for CustomTemplate""" cif = """ loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 1 customized polymer 2 3 . B 3 4 Z # loop_ _ma_template_customized.template_id _ma_template_customized.details 1 'Provided by user' # loop_ _ma_template_coord.template_id _ma_template_coord.group_PDB _ma_template_coord.ordinal_id _ma_template_coord.type_symbol _ma_template_coord.label_atom_id _ma_template_coord.label_comp_id _ma_template_coord.label_seq_id _ma_template_coord.label_asym_id _ma_template_coord.auth_seq_id _ma_template_coord.auth_asym_id _ma_template_coord.auth_atom_id _ma_template_coord.auth_comp_id _ma_template_coord.Cartn_x _ma_template_coord.Cartn_y _ma_template_coord.Cartn_z _ma_template_coord.occupancy _ma_template_coord.label_entity_id _ma_template_coord.B_iso_or_equiv _ma_template_coord.formal_charge 1 ATOM 1 C CA ALA 1 A 42 A X XXX 0 1.000 2.000 0.500 9 2.000 1.000 1 ATOM 2 O OXT CYS 2 A . A . . 1.000 2.000 3.000 . 9 . . # """ s, = modelcif.reader.read(StringIO(cif)) t, = s.templates self.assertIsInstance(t, modelcif.CustomTemplate) self.assertEqual(t.details, 'Provided by user') self.assertEqual(len(t.atoms), 2) a1 = t.atoms[0] self.assertEqual(a1.seq_id, 1) self.assertEqual(a1.atom_id, 'CA') self.assertEqual(a1.type_symbol, 'C') self.assertAlmostEqual(a1.x, 0.0, delta=1e-2) self.assertAlmostEqual(a1.y, 1.0, delta=1e-2) self.assertAlmostEqual(a1.z, 2.0, delta=1e-2) self.assertAlmostEqual(a1.occupancy, 0.5, delta=1e-2) self.assertAlmostEqual(a1.biso, 2.0, delta=1e-2) self.assertAlmostEqual(a1.charge, 1.0, delta=1e-2) self.assertEqual(a1.auth_seq_id, 42) self.assertEqual(a1.auth_comp_id, 'XXX') self.assertEqual(a1.auth_atom_id, 'X') a2 = t.atoms[1] self.assertEqual(a2.seq_id, 2) self.assertEqual(a2.atom_id, 'OXT') self.assertEqual(a2.type_symbol, 'O') def test_entity_nonpoly_bad_model_mode(self): """Test pdbx_entity_nonpoly with missing ma_model_mode""" cif = """ loop_ _struct_asym.id _struct_asym.entity_id A 1 B 2 C 3 # loop_ _pdbx_entity_nonpoly.entity_id _pdbx_entity_nonpoly.name _pdbx_entity_nonpoly.comp_id _pdbx_entity_nonpoly.ma_model_mode 1 test1 TE1 explicit 2 test2 TE2 . 3 test3 TE3 ? # loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 1 'reference database' non-polymer 1 2 A A 1 4 Z 2 2 'reference database' non-polymer 1 3 B B 2 4 Z 3 3 'reference database' non-polymer 1 4 C C 3 4 Z # loop_ _ma_template_non_poly.template_id _ma_template_non_poly.comp_id _ma_template_non_poly.details 1 TE1 test1 2 TE2 test2 3 TE3 test3 """ s, = modelcif.reader.read(StringIO(cif)) a1, a2, a3 = s.asym_units self.assertTrue(a1.explicit) self.assertIsNone(a2.explicit) self.assertIs(a3.explicit, ihm.unknown) def test_template_ref_db_handler(self): """Test _TemplateRefDBHandler""" cif = """ loop_ _ma_template_ref_db_details.template_id _ma_template_ref_db_details.db_name _ma_template_ref_db_details.db_name_other_details _ma_template_ref_db_details.db_accession_code _ma_template_ref_db_details.db_version_date 1 PDB . 3nc1 2021-10-06 1 MIS . testacc . 1 Other foo acc2 . 1 PubChem . 1046 . 1 AlphaFoldDB . I6XD65 2022-06-01 """ s, = modelcif.reader.read(StringIO(cif)) t, = s.templates r1, r2, r3, r4, r5 = t.references self.assertIsInstance(r1, modelcif.reference.PDB) self.assertEqual(r1.accession, '3nc1') self.assertEqual(r1.db_version_date, date(2021, 10, 6)) self.assertEqual(r2.name, 'MIS') self.assertIsNone(r2.other_details) self.assertIsNone(r2.db_version_date) self.assertEqual(r3.name, 'Other') self.assertEqual(r3.other_details, 'foo') self.assertIsInstance(r4, modelcif.reference.PubChem) self.assertEqual(r4.accession, '1046') self.assertIsInstance(r5, modelcif.reference.AlphaFoldDB) self.assertEqual(r5.accession, 'I6XD65') self.assertEqual(r5.db_version_date, date(2022, 6, 1)) def _get_models_cif(self, old=False): if old: cif = """ loop_ _ma_model_list.ordinal_id _ma_model_list.model_id _ma_model_list.model_group_id _ma_model_list.model_name _ma_model_list.model_group_name _ma_model_list.assembly_id _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 1 1 'Best scoring model' 'All models' 99 4 'Homology model' . 2 2 1 '2nd best scoring model' 'All models' 99 5 'Ab initio model' . 3 3 2 'Best scoring model' 'group2' 99 6 'Other' 'Custom other model' # """ else: cif = """ loop_ _ma_model_list.ordinal_id _ma_model_list.model_name _ma_model_list.assembly_id _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 'Best scoring model' 99 4 'Homology model' . 2 '2nd best scoring model' 99 5 'Ab initio model' . 3 'Best scoring model' 99 6 'Other' 'Custom other model' # loop_ _ma_model_group.id _ma_model_group.name _ma_model_group.details 1 'All models' . 2 'group2' 'second group details' # # loop_ _ma_model_group_link.group_id _ma_model_group_link.model_id 1 1 1 2 2 3 # """ cif += """ loop_ _atom_site.group_PDB _atom_site.id _atom_site.type_symbol _atom_site.label_atom_id _atom_site.label_alt_id _atom_site.label_comp_id _atom_site.label_seq_id _atom_site.auth_seq_id _atom_site.pdbx_PDB_ins_code _atom_site.label_asym_id _atom_site.Cartn_x _atom_site.Cartn_y _atom_site.Cartn_z _atom_site.occupancy _atom_site.label_entity_id _atom_site.auth_asym_id _atom_site.B_iso_or_equiv _atom_site.pdbx_PDB_model_num ATOM 1 C CA . ASP 1 1 ? A 1.000 2.000 3.000 . 1 A . 6 ATOM 2 C CA . ASP 1 1 ? A 1.000 2.000 3.000 . 1 A . 8 """ return cif def test_model_list_handler_default_old(self): """Test _ModelListHandler with default model class, old dictionary""" self._test_model_list_handler_default(old=True) def test_model_list_handler_default(self): """Test _ModelListHandler with default model class""" self._test_model_list_handler_default(old=False) def _test_model_list_handler_default(self, old): cif = self._get_models_cif(old=old) s, = modelcif.reader.read(StringIO(cif)) mg1, mg2, mg3 = s.model_groups self.assertEqual(mg1.name, 'All models') m1, m2 = list(mg1) self.assertIsInstance(m1, modelcif.model.HomologyModel) self.assertEqual(m1.model_type, 'Homology model') self.assertIsNone(m1.other_details) self.assertIsInstance(m2, modelcif.model.AbInitioModel) self.assertEqual(m2.model_type, 'Ab initio model') self.assertIsNone(m2.other_details) self.assertEqual(m1.name, 'Best scoring model') self.assertEqual(m2.name, '2nd best scoring model') self.assertEqual(mg2.name, 'group2') m1, = list(mg2) self.assertEqual(m1.model_type, 'Other') self.assertEqual(m1.other_details, 'Custom other model') self.assertEqual(m1.name, 'Best scoring model') self.assertEqual(m1.assembly._id, '99') # Last group is auto-created to contain the non-grouped models # referenced by atom_site self.assertIsNone(mg3.name) m1, m2 = list(mg3) self.assertEqual(m1.model_type, 'Other') self.assertEqual(m1._id, '6') self.assertEqual(m2.model_type, 'Other') self.assertEqual(m2._id, '8') def test_model_list_handler_group_new_old(self): """Test _ModelListHandler handling mix of new and old style groups""" cif = """ loop_ _ma_model_list.ordinal_id _ma_model_list.model_id _ma_model_list.model_group_id _ma_model_list.model_name _ma_model_list.model_group_name _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 1 1 . . 4 'Homology model' . 2 2 . . . 4 'Homology model' . 3 3 1 . . 4 'Homology model' . # # loop_ _ma_model_group.id _ma_model_group.name _ma_model_group.details 1 'group1' . 2 'group2' . # # loop_ _ma_model_group_link.model_id _ma_model_group_link.group_id 2 1 3 2 """ s, = modelcif.reader.read(StringIO(cif)) # model1 is in group1, using old-style tables; # model2 is in group1, using new-style tables; # model3 is in group2 according to new-style tables but group1 # according to old style (new-style should take precedence) mg1, mg2 = s.model_groups self.assertEqual(mg1._id, '1') self.assertEqual(mg2._id, '2') self.assertEqual([m._id for m in mg1], ['2', '1']) self.assertEqual([m._id for m in mg2], ['3']) def test_model_list_handler_custom(self): """Test _ModelListHandler with custom model class""" class MyModel(modelcif.model.Model): """Custom model type""" pass cif = self._get_models_cif() s, = modelcif.reader.read(StringIO(cif), model_class=MyModel) mg1, mg2, mg3 = s.model_groups m1, m2 = list(mg1) m3, = list(mg2) m4, m5 = list(mg3) # Custom model type should always be returned, regardless of what # the mmCIF file says it is, but model_type should be set self.assertIsInstance(m1, MyModel) self.assertIsInstance(m2, MyModel) self.assertIsInstance(m3, MyModel) self.assertIsInstance(m4, MyModel) self.assertIsInstance(m5, MyModel) self.assertEqual(m1.model_type, 'Homology model') self.assertEqual(m2.model_type, 'Ab initio model') self.assertEqual(m3.model_type, 'Other') self.assertEqual(m4.model_type, 'Other') self.assertEqual(m5.model_type, 'Other') def test_assembly_handler(self): """Test _AssemblyHandler and _AssemblyDetailsHandler""" cif = """ loop_ _entity_poly_seq.entity_id _entity_poly_seq.num _entity_poly_seq.mon_id 1 1 ALA 1 2 ALA # loop_ _struct_asym.id _struct_asym.entity_id _struct_asym.details A 1 Nup84 # loop_ _ma_struct_assembly.ordinal_id _ma_struct_assembly.assembly_id _ma_struct_assembly.entity_id _ma_struct_assembly.asym_id _ma_struct_assembly.seq_id_begin _ma_struct_assembly.seq_id_end 1 1 1 A 1 2 2 1 1 A 1 1 3 1 1 A . . # loop_ _ma_struct_assembly_details.assembly_id _ma_struct_assembly_details.assembly_name _ma_struct_assembly_details.assembly_description 1 foo bar """ s, = modelcif.reader.read(StringIO(cif)) a, = s.assemblies self.assertEqual(a.name, 'foo') self.assertEqual(a.description, 'bar') self.assertEqual(len(a), 3) # Complete asym self.assertIsInstance(a[0], modelcif.AsymUnit) # asym range self.assertIsInstance(a[1], modelcif.AsymUnitRange) self.assertEqual(a[1].seq_id_range, (1, 1)) # No specified range -> complete asym self.assertIsInstance(a[2], modelcif.AsymUnit) def test_template_poly_segment_handler(self): """Test _TemplatePolySegmentHandler""" cif = """ loop_ _ma_template_poly_segment.id _ma_template_poly_segment.template_id _ma_template_poly_segment.residue_number_begin _ma_template_poly_segment.residue_number_end 1 42 2 9 """ s, = modelcif.reader.read(StringIO(cif)) seg, = s.template_segments self.assertEqual(seg.template._id, '42') self.assertEqual(seg.seq_id_range, (2, 9)) def test_data__handler(self): """Test _DataHandler""" cif = """ loop_ _ma_data.id _ma_data.name _ma_data.content_type _ma_data.content_type_other_details 1 'Template Structure' 'template structure' . 2 'Model subunit' target . 3 'Default model name' 'model coordinates' . # loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 1 'reference database' polymer 1 1 A A 1 1 Z # loop_ _ma_model_list.ordinal_id _ma_model_list.model_id _ma_model_list.model_group_id _ma_model_list.model_name _ma_model_list.model_group_name _ma_model_list.assembly_id _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 1 1 'Model name' 'All models' 1 3 'Homology model' . """ s, = modelcif.reader.read(StringIO(cif)) d1, d2, d3 = s.data self.assertIsInstance(d1, modelcif.Template) # d2 is not referenced by any other table, so gets Data base class self.assertIsInstance(d2, modelcif.data.Data) self.assertIsInstance(d3, modelcif.model.Model) # Name not given in template_details so taken from ma_data self.assertEqual(d1.name, 'Template Structure') self.assertEqual(d2.name, 'Model subunit') # Name in model_list used rather than that from ma_data self.assertEqual(d3.name, 'Model name') def test_data_group_handler(self): """Test _DataGroupHandler""" cif = """ loop_ _ma_data.id _ma_data.name _ma_data.content_type _ma_data.content_type_other_details 1 'Template Structure' 'template structure' . 2 'Model subunit' target . # loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 1 'reference database' polymer 1 1 A A 1 1 Z # loop_ _ma_data_group.ordinal_id _ma_data_group.group_id _ma_data_group.data_id 1 1 1 2 1 2 3 2 3 """ s, = modelcif.reader.read(StringIO(cif)) g1, g2, = s.data_groups self.assertEqual(len(g1), 2) self.assertIsInstance(g1[0], modelcif.Template) self.assertEqual(g1[0]._data_id, '1') self.assertIsInstance(g1[1], modelcif.data.Data) self.assertEqual(g1[1]._data_id, '2') self.assertEqual(len(g2), 1) self.assertIsNone(g2[0]) def test_data_ref_db_handler(self): """Test _DataRefDBHandler""" cif = """ loop_ _ma_data.id _ma_data.name _ma_data.content_type _ma_data.content_type_other_details 1 defaultname1 'reference database' . 2 defaultname2 'reference database' . # loop_ _ma_data_ref_db.data_id _ma_data_ref_db.name _ma_data_ref_db.location_url _ma_data_ref_db.version _ma_data_ref_db.release_date 1 name1 url1 1.0 1979-11-22 2 . url2 . . """ s, = modelcif.reader.read(StringIO(cif)) d1, d2 = s.data self.assertIsInstance(d1, modelcif.ReferenceDatabase) self.assertIsInstance(d2, modelcif.ReferenceDatabase) # Name in ma_data_ref_db used rather than that from ma_data self.assertEqual(d1.name, 'name1') self.assertEqual(d1.url, 'url1') self.assertEqual(d1.version, '1.0') self.assertIsInstance(d1.release_date, date) self.assertEqual(d1.release_date, date(1979, 11, 22)) # Name not given in ma_data_ref_db so taken from ma_data self.assertEqual(d2.name, 'defaultname2') self.assertIsNone(d2.version) self.assertIsNone(d2.release_date) def test_protocol_handler(self): """Test _ProtocolHandler""" cif = """ loop_ _ma_protocol_step.ordinal_id _ma_protocol_step.protocol_id _ma_protocol_step.step_id _ma_protocol_step.method_type _ma_protocol_step.step_name _ma_protocol_step.details _ma_protocol_step.software_group_id _ma_protocol_step.input_data_group_id _ma_protocol_step.output_data_group_id 1 1 1 'template search' 'ModPipe Seq-Prf (0001)' . 1 1 2 2 1 2 'template selection' . . . . . 3 1 3 'target-template alignment' . . . . . 4 1 4 modeling . . 2 2 1 5 1 5 'model selection' . . 1 1 1 6 1 6 'model refinement' . . . . . 7 1 7 other testname testdetails 42 99 66 """ s, = modelcif.reader.read(StringIO(cif)) p, = s.protocols self.assertEqual(len(p.steps), 7) s1, s2, s3, s4, s5, s6, s7 = p.steps self.assertIsInstance(s1, modelcif.protocol.TemplateSearchStep) self.assertIsInstance(s2, modelcif.protocol.TemplateSelectionStep) self.assertIsInstance(s3, modelcif.protocol.TargetTemplateAlignmentStep) self.assertIsInstance(s4, modelcif.protocol.ModelingStep) self.assertIsInstance(s5, modelcif.protocol.ModelSelectionStep) self.assertIsInstance(s6, modelcif.protocol.ModelRefinementStep) self.assertIsInstance(s7, modelcif.protocol.Step) self.assertEqual(s7.method_type, "other") self.assertEqual(s7.name, "testname") self.assertEqual(s7.details, "testdetails") self.assertEqual(s7.input_data._id, '99') self.assertEqual(s7.output_data._id, '66') self.assertEqual(s7.software._id, '42') def test_target_entity_handler(self): """Test _TargetEntityHandler""" cif = """ loop_ _ma_target_entity.entity_id _ma_target_entity.data_id _ma_target_entity.origin 1 2 'reference database' """ s, = modelcif.reader.read(StringIO(cif)) e, = s.entities self.assertEqual(e._data_id, '2') def test_qa_metric_global_handler(self): """Test _QAMetricGlobalHandler""" cif = """ loop_ _ma_model_list.ordinal_id _ma_model_list.model_id _ma_model_list.model_group_id _ma_model_list.model_name _ma_model_list.model_group_name _ma_model_list.assembly_id _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 1 1 'Best scoring model' 'All models' 1 4 'Homology model' . # loop_ _ma_qa_metric.id _ma_qa_metric.name _ma_qa_metric.description _ma_qa_metric.type _ma_qa_metric.mode _ma_qa_metric.type_other_details _ma_qa_metric.software_group_id 1 MPQS 'ModPipe Quality Score' other global 'composite score, values >1.1 are considered reliable' 1 2 zDOPE 'Normalized DOPE' zscore global . 2 3 'TSVMod RMSD' 'TSVMod predicted RMSD (MSALL)' distance global . . 4 'TSVMod NO35' . 'normalized score' global . . # loop_ _ma_qa_metric_global.ordinal_id _ma_qa_metric_global.model_id _ma_qa_metric_global.metric_id _ma_qa_metric_global.metric_value 1 1 1 1.0 2 1 2 2.0 3 1 3 3.0 4 1 4 4.0 """ s, = modelcif.reader.read(StringIO(cif)) mg, = s.model_groups m, = mg q1, q2, q3, q4 = m.qa_metrics self.assertIsInstance(q1, modelcif.qa_metric.Global) self.assertEqual(q1.type, "other") self.assertEqual(q1.name, "MPQS") self.assertEqual(type(q1).__name__, "MPQS") self.assertEqual(q1.description, "ModPipe Quality Score") self.assertEqual(q1.__doc__, "ModPipe Quality Score") self.assertEqual(q1.software._id, '1') self.assertAlmostEqual(q1.value, 1.0, delta=1e-6) self.assertIsInstance(q2, modelcif.qa_metric.Global) self.assertIsInstance(q2, modelcif.qa_metric.ZScore) self.assertAlmostEqual(q2.value, 2.0, delta=1e-6) self.assertIsInstance(q3, modelcif.qa_metric.Global) self.assertIsInstance(q3, modelcif.qa_metric.Distance) self.assertAlmostEqual(q3.value, 3.0, delta=1e-6) self.assertIsInstance(q4, modelcif.qa_metric.Global) self.assertIsInstance(q4, modelcif.qa_metric.NormalizedScore) self.assertIsNone(q4.description) self.assertIsNone(q4.__doc__) def test_qa_metric_local_handler(self): """Test _QAMetricLocalHandler""" cif = """ loop_ _ma_model_list.ordinal_id _ma_model_list.model_id _ma_model_list.model_group_id _ma_model_list.model_name _ma_model_list.model_group_name _ma_model_list.assembly_id _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 1 1 'Best scoring model' 'All models' 1 4 'Homology model' . # loop_ _ma_qa_metric.id _ma_qa_metric.name _ma_qa_metric.description _ma_qa_metric.type _ma_qa_metric.mode _ma_qa_metric.type_other_details _ma_qa_metric.software_group_id 1 'test local' 'some local score' 'normalized score' local . . # loop_ _ma_qa_metric_local.ordinal_id _ma_qa_metric_local.model_id _ma_qa_metric_local.label_asym_id _ma_qa_metric_local.label_seq_id _ma_qa_metric_local.label_comp_id _ma_qa_metric_local.metric_id _ma_qa_metric_local.metric_value 1 1 A 2 CYS 1 1.0 """ s, = modelcif.reader.read(StringIO(cif)) mg, = s.model_groups m, = mg q1, = m.qa_metrics self.assertIsInstance(q1, modelcif.qa_metric.Local) self.assertIsInstance(q1, modelcif.qa_metric.NormalizedScore) self.assertEqual(q1.type, "normalized score") self.assertEqual(q1.name, "test local") self.assertEqual(q1.description, "some local score") self.assertIsNone(q1.software) self.assertEqual(q1.residue.asym._id, 'A') self.assertEqual(q1.residue.seq_id, 2) self.assertAlmostEqual(q1.value, 1.0, delta=1e-6) def test_qa_metric_pairwise_handler(self): """Test _QAMetricPairwiseHandler""" cif = """ loop_ _ma_model_list.ordinal_id _ma_model_list.model_id _ma_model_list.model_group_id _ma_model_list.model_name _ma_model_list.model_group_name _ma_model_list.assembly_id _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 1 1 'Best scoring model' 'All models' 1 4 'Homology model' . # loop_ _ma_qa_metric.id _ma_qa_metric.name _ma_qa_metric.description _ma_qa_metric.type _ma_qa_metric.mode _ma_qa_metric.type_other_details _ma_qa_metric.software_group_id 1 'test pair' 'some pair score' 'normalized score' local-pairwise . . # loop_ _ma_qa_metric_local_pairwise.ordinal_id _ma_qa_metric_local_pairwise.model_id _ma_qa_metric_local_pairwise.label_asym_id_1 _ma_qa_metric_local_pairwise.label_seq_id_1 _ma_qa_metric_local_pairwise.label_comp_id_1 _ma_qa_metric_local_pairwise.label_asym_id_2 _ma_qa_metric_local_pairwise.label_seq_id_2 _ma_qa_metric_local_pairwise.label_comp_id_2 _ma_qa_metric_local_pairwise.metric_id _ma_qa_metric_local_pairwise.metric_value 1 1 A 2 CYS B 4 GLY 1 1.0 """ s, = modelcif.reader.read(StringIO(cif)) mg, = s.model_groups m, = mg q1, = m.qa_metrics self.assertIsInstance(q1, modelcif.qa_metric.LocalPairwise) self.assertIsInstance(q1, modelcif.qa_metric.NormalizedScore) self.assertEqual(q1.type, "normalized score") self.assertEqual(q1.name, "test pair") self.assertEqual(q1.description, "some pair score") self.assertIsNone(q1.software) self.assertEqual(q1.residue1.asym._id, 'A') self.assertEqual(q1.residue1.seq_id, 2) self.assertEqual(q1.residue2.asym._id, 'B') self.assertEqual(q1.residue2.seq_id, 4) self.assertAlmostEqual(q1.value, 1.0, delta=1e-6) def test_qa_metric_feature_handler(self): """Test _QAMetricFeatureHandler""" feat = """ loop_ _ma_atom_feature.ordinal_id _ma_atom_feature.feature_id _ma_atom_feature.atom_id 1 1 1 # loop_ _ma_poly_residue_feature.ordinal_id _ma_poly_residue_feature.feature_id _ma_poly_residue_feature.label_asym_id _ma_poly_residue_feature.label_seq_id _ma_poly_residue_feature.label_comp_id 1 2 Y 1 ALA # loop_ _ma_entity_instance_feature.ordinal_id _ma_entity_instance_feature.feature_id _ma_entity_instance_feature.label_asym_id 1 3 Y """ qa = """ loop_ _ma_feature_list.feature_id _ma_feature_list.feature_type _ma_feature_list.entity_type _ma_feature_list.details 1 atom other 'atom f' 2 residue polymer prf 3 'entity instance' polymer . # loop_ _ma_model_list.ordinal_id _ma_model_list.model_id _ma_model_list.model_group_id _ma_model_list.model_name _ma_model_list.model_group_name _ma_model_list.assembly_id _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 1 1 'Best scoring model' 'All models' 1 4 'Homology model' . # loop_ _ma_qa_metric.id _ma_qa_metric.name _ma_qa_metric.description _ma_qa_metric.type _ma_qa_metric.mode _ma_qa_metric.type_other_details _ma_qa_metric.software_group_id 1 'test local' 'some local score' 'normalized score' per-feature . . # loop_ _ma_qa_metric_feature.ordinal_id _ma_qa_metric_feature.model_id _ma_qa_metric_feature.feature_id _ma_qa_metric_feature.metric_id _ma_qa_metric_feature.metric_value 1 1 1 1 1.0 2 1 2 1 2.0 3 1 3 1 3.0 """ # Test both ways to make sure features still work if they are # referenced by ID before their type is known for cif in (feat + qa, qa + feat): s, = modelcif.reader.read(StringIO(cif)) mg, = s.model_groups m, = mg q1, q2, q3 = m.qa_metrics self.assertIsInstance(q1, modelcif.qa_metric.Feature) self.assertIsInstance(q1, modelcif.qa_metric.NormalizedScore) self.assertIsInstance(q1.feature, modelcif.AtomFeature) self.assertEqual(q1.feature.details, 'atom f') self.assertAlmostEqual(q1.value, 1.0, delta=1e-6) self.assertIsInstance(q2.feature, modelcif.PolyResidueFeature) self.assertEqual(len(q2.feature.residues), 1) self.assertEqual(q2.feature.residues[0].seq_id, 1) self.assertAlmostEqual(q2.value, 2.0, delta=1e-6) self.assertIsInstance(q3.feature, modelcif.EntityInstanceFeature) self.assertEqual(len(q3.feature.asym_units), 1) self.assertAlmostEqual(q3.value, 3.0, delta=1e-6) def test_qa_metric_feature_pairwise_handler(self): """Test _QAMetricFeaturePairwiseHandler""" feat = """ loop_ _ma_poly_residue_feature.ordinal_id _ma_poly_residue_feature.feature_id _ma_poly_residue_feature.label_asym_id _ma_poly_residue_feature.label_seq_id _ma_poly_residue_feature.label_comp_id 1 1 Y 1 ALA 2 2 Y 2 CYS """ qa = """ loop_ _ma_feature_list.feature_id _ma_feature_list.feature_type _ma_feature_list.entity_type _ma_feature_list.details 1 residue polymer . 2 residue polymer . # loop_ _ma_model_list.ordinal_id _ma_model_list.model_id _ma_model_list.model_group_id _ma_model_list.model_name _ma_model_list.model_group_name _ma_model_list.assembly_id _ma_model_list.data_id _ma_model_list.model_type _ma_model_list.model_type_other_details 1 1 1 'Best scoring model' 'All models' 1 4 'Homology model' . # loop_ _ma_qa_metric.id _ma_qa_metric.name _ma_qa_metric.description _ma_qa_metric.type _ma_qa_metric.mode _ma_qa_metric.type_other_details _ma_qa_metric.software_group_id 1 'test local' 'some local score' 'normalized score' per-feature-pair . . # loop_ _ma_qa_metric_feature_pairwise.ordinal_id _ma_qa_metric_feature_pairwise.model_id _ma_qa_metric_feature_pairwise.feature_id_1 _ma_qa_metric_feature_pairwise.feature_id_2 _ma_qa_metric_feature_pairwise.metric_id _ma_qa_metric_feature_pairwise.metric_value 1 1 1 2 1 50.000 """ # Test both ways to make sure features still work if they are # referenced by ID before their type is known for cif in (feat + qa, qa + feat): s, = modelcif.reader.read(StringIO(cif)) mg, = s.model_groups m, = mg q1, = m.qa_metrics self.assertIsInstance(q1, modelcif.qa_metric.FeaturePairwise) self.assertIsInstance(q1, modelcif.qa_metric.NormalizedScore) self.assertIsInstance(q1.feature1, modelcif.PolyResidueFeature) self.assertIsInstance(q1.feature2, modelcif.PolyResidueFeature) self.assertAlmostEqual(q1.value, 50.0, delta=1e-6) def test_alignment_info_details_handler(self): """Test _AlignmentInfoHandler and _AlignmentDetailsHandler""" cif = """ loop_ _ma_alignment_info.alignment_id _ma_alignment_info.data_id _ma_alignment_info.software_group_id _ma_alignment_info.alignment_length _ma_alignment_info.alignment_type _ma_alignment_info.alignment_mode 1 3 1 . 'target-template pairwise alignment' global 2 4 1 . 'target-template pairwise alignment' global 3 5 1 . 'target-template MSA' local # # loop_ _ma_alignment_details.ordinal_id _ma_alignment_details.alignment_id _ma_alignment_details.template_segment_id _ma_alignment_details.target_asym_id _ma_alignment_details.score_type _ma_alignment_details.score_type_other_details _ma_alignment_details.score_value _ma_alignment_details.percent_sequence_identity _ma_alignment_details.sequence_identity_denominator _ma_alignment_details.sequence_identity_denominator_other_details 1 1 1 A 'BLAST e-value' . 1.0 45.000 'Length of the shorter sequence' . 2 2 1 A . . . . . . 3 3 1 A 'HHblits e-value' . 2.0 45.000 'Arithmetic mean sequence length' . # loop_ _ma_alignment.ordinal_id _ma_alignment.alignment_id _ma_alignment.target_template_flag _ma_alignment.sequence 1 1 1 DSYV-ETLD 2 1 2 DMACDTFIK 3 1 1 DSYV-ETLD 4 1 2 DMACDTFIK # loop_ _ma_target_template_poly_mapping.id _ma_target_template_poly_mapping.template_segment_id _ma_target_template_poly_mapping.target_asym_id _ma_target_template_poly_mapping.target_seq_id_begin _ma_target_template_poly_mapping.target_seq_id_end 1 1 A 1 8 2 1 A 1 8 """ s, = modelcif.reader.read(StringIO(cif)) a1, a2, a3, = s.alignments self.assertIs(a1.__class__, a2.__class__) self.assertIsInstance(a1, modelcif.alignment.Global) self.assertIsInstance(a1, modelcif.alignment.Pairwise) p, = a1.pairs self.assertIsInstance(p.score, modelcif.alignment.BLASTEValue) self.assertAlmostEqual(p.score.value, 1.0, delta=1e-6) self.assertIsInstance(p.identity, modelcif.alignment.ShorterSequenceIdentity) self.assertAlmostEqual(p.identity.value, 45.0, delta=1e-6) self.assertIsInstance(p.template, modelcif.TemplateSegment) self.assertEqual(p.template._id, '1') self.assertEqual(p.template.gapped_sequence, 'DMACDTFIK') self.assertIsInstance(p.target, ihm.AsymUnitSegment) self.assertEqual(p.target.asym._id, 'A') self.assertEqual(p.target.gapped_sequence, 'DSYV-ETLD') self.assertEqual(p.target.seq_id_range, (1, 8)) self.assertIsInstance(a3, modelcif.alignment.Local) self.assertIsInstance(a3, modelcif.alignment.Multiple) p, = a2.pairs self.assertIsNone(p.score) self.assertIsNone(p.identity) p, = a3.pairs self.assertIsInstance(p.score, modelcif.alignment.HHblitsEValue) self.assertAlmostEqual(p.score.value, 2.0, delta=1e-6) self.assertIsInstance(p.identity, modelcif.alignment.MeanSequenceIdentity) def test_associated_files(self): """Test _AssociatedHandler and _AssociatedArchiveHandler""" cif = """ loop_ _ma_data.id _ma_data.name _ma_data.content_type _ma_data.content_type_other_details 42 'Model subunit' target . loop_ _ma_target_entity.entity_id _ma_target_entity.data_id _ma_target_entity.origin 1 99 'reference database' loop_ _ma_entry_associated_files.id _ma_entry_associated_files.entry_id _ma_entry_associated_files.file_url _ma_entry_associated_files.file_type _ma_entry_associated_files.file_format _ma_entry_associated_files.file_content _ma_entry_associated_files.details _ma_entry_associated_files.data_id 1 model https://example.com/foo.txt file other other 'test file' . 2 model https://example.com/t.zip archive zip 'archive with multiple files' . . 3 model baz.txt file other other 'test file3' . 4 model baz.cif file cif other 'test mmCIF' . 5 model baz.bcif file bcif other 'test BinaryCIF' 42 # # loop_ _ma_associated_archive_file_details.id _ma_associated_archive_file_details.archive_file_id _ma_associated_archive_file_details.file_path _ma_associated_archive_file_details.file_format _ma_associated_archive_file_details.file_content _ma_associated_archive_file_details.description _ma_associated_archive_file_details.data_id 1 2 bar.txt other other 'test file2' . 2 99 99.txt other other 'test file99' . 3 2 bar.cif cif other 'test mmCIF in zip' . 4 2 bar.bcif bcif 'local pairwise QA scores' 'test BinaryCIF in zip' 99 5 2 bar2.bcif bcif 'QA metrics' 'test BinaryCIF in zip' 99 """ s, = modelcif.reader.read(StringIO(cif)) r1, r2 = s.repositories self.assertEqual(r1.url_root, 'https://example.com') f1, zf = r1.files self.assertIsInstance(f1, modelcif.associated.File) self.assertEqual(f1.path, 'foo.txt') self.assertEqual(f1.details, 'test file') self.assertIsInstance(zf, modelcif.associated.ZipFile) self.assertEqual(zf.path, 't.zip') self.assertIsNone(zf.details) f2, f3, f4, f5 = zf.files self.assertEqual(f2.path, 'bar.txt') self.assertEqual(f2.details, 'test file2') self.assertIsNone(f2.data) self.assertIsInstance(f3, modelcif.associated.CIFFile) self.assertFalse(f3.binary) # QA metrics file using old "local pairwise QA scores" name self.assertIsInstance( f4, modelcif.associated.QAMetricsFile) self.assertEqual(f4.file_content, 'QA metrics') self.assertTrue(f4.binary) self.assertIsInstance(f4.data, modelcif.Entity) self.assertIsInstance( f5, modelcif.associated.QAMetricsFile) self.assertEqual(f5.file_content, 'QA metrics') self.assertTrue(f5.binary) self.assertIsInstance(f5.data, modelcif.Entity) self.assertIsNone(r2.url_root) f3, f4, f5 = r2.files self.assertEqual(f3.path, 'baz.txt') self.assertEqual(f3.details, 'test file3') self.assertIsInstance(f4, modelcif.associated.CIFFile) self.assertFalse(f4.binary) self.assertIsNone(f4.data) self.assertIsInstance(f5, modelcif.associated.CIFFile) self.assertTrue(f5.binary) self.assertEqual(f5.data.__class__, modelcif.data.Data) def test_template_poly_handler(self): """Test _TemplatePolyHandler""" cif = """ loop_ _chem_comp.id _chem_comp.type _chem_comp.name _chem_comp.formula MYTYPE 'D-PEPTIDE LINKING' 'MY CUSTOM COMPONENT' 'C6 H12' MYTYP2 'D-PEPTIDE LINKING' 'MY CUSTOM COMPONENT2' 'C6 H12' # loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 1 'reference database' polymer 1 1 A A . 1 A 2 2 'reference database' polymer 1 1 A A . 1 A 3 3 'reference database' polymer 1 1 A A . 1 A # loop_ _ma_template_poly.template_id _ma_template_poly.seq_one_letter_code _ma_template_poly.seq_one_letter_code_can 1 A(MYTYPE)V AVV 2 A(MYTYP2)V . 3 . . 4 CCC CCC """ s, = modelcif.reader.read(StringIO(cif)) # template_id=4 in template_poly should be ignored t1, t2, t3 = s.templates s1, s2, s3 = t1.entity.sequence self.assertEqual(s1.id, 'ALA') self.assertEqual(s1.code, 'A') # Both one-letter and one-letter-canonical were provided self.assertEqual(s2.id, 'MYTYPE') self.assertEqual(s2.code, 'MYTYPE') self.assertEqual(s2.code_canonical, 'V') # Only one-letter was provided s1, s2, s3 = t2.entity.sequence self.assertEqual(s2.id, 'MYTYP2') self.assertEqual(s2.code, 'MYTYP2') self.assertIsNone(s2.code_canonical) # No sequence provided self.assertEqual(len(t3.entity.sequence), 0) def test_template_non_poly_handler(self): """Test _TemplateNonPolyHandler""" cif = """ loop_ _chem_comp.id _chem_comp.type HEM non-polymer # loop_ _ma_template_details.ordinal_id _ma_template_details.template_id _ma_template_details.template_origin _ma_template_details.template_entity_type _ma_template_details.template_trans_matrix_id _ma_template_details.template_data_id _ma_template_details.target_asym_id _ma_template_details.template_label_asym_id _ma_template_details.template_label_entity_id _ma_template_details.template_model_num _ma_template_details.template_auth_asym_id 1 1 'reference database' polymer 1 1 A A . 1 A # loop_ _ma_template_non_poly.template_id _ma_template_non_poly.comp_id _ma_template_non_poly.details 1 HEM Heme """ s, = modelcif.reader.read(StringIO(cif)) t, = s.templates s1, = t.entity.sequence self.assertEqual(s1.id, 'HEM') self.assertEqual(s1.type, 'non-polymer') self.assertIsInstance(s1, ihm.NonPolymerChemComp) def test_chem_comp_handler(self): """Test ChemCompHandler and ChemCompDescriptorHandler""" cif = """ loop_ _chem_comp.id _chem_comp.type _chem_comp.name _chem_comp.formula _chem_comp.ma_provenance MET 'L-peptide linking' . . . CYS 'D-peptide linking' CYSTEINE . ? ALA 'L-peptide linking' ALANINE . 'CCD Core' MATYPE 'L-PEPTIDE LINKING' 'MODELARCHIVE COMPONENT' . 'CCD MA' MYTYPE 'L-PEPTIDE LINKING' 'MY CUSTOM COMPONENT' . 'CCD local' # loop_ _ma_chem_comp_descriptor.ordinal_id _ma_chem_comp_descriptor.chem_comp_id _ma_chem_comp_descriptor.chem_comp_name _ma_chem_comp_descriptor.type _ma_chem_comp_descriptor.value _ma_chem_comp_descriptor.details _ma_chem_comp_descriptor.software_id 1 MYTYPE 'ignored' 'InChI Key' XDAOLTSRNUSPPH-XMMPIXPASA-N foo 1 2 MYTYPE ? 'IUPAC Name' foobar . . # loop_ _entity_poly_seq.entity_id _entity_poly_seq.num _entity_poly_seq.mon_id _entity_poly_seq.hetero 1 1 MET . 1 2 CYS . 1 3 ALA . 1 4 MATYPE . 1 5 MYTYPE . """ s, = modelcif.reader.read(StringIO(cif)) e1, = s.entities s = e1.sequence self.assertEqual(len(s), 5) self.assertEqual(s[2].ccd, 'core') self.assertEqual(s[3].ccd, 'ma') self.assertEqual(s[4].ccd, 'local') d1, d2 = s[4].descriptors self.assertIsInstance(d1, modelcif.descriptor.InChIKey) self.assertEqual(d1.value, 'XDAOLTSRNUSPPH-XMMPIXPASA-N') self.assertEqual(d1.details, 'foo') self.assertEqual(d1.software._id, '1') self.assertIsInstance(d2, modelcif.descriptor.IUPACName) self.assertEqual(d2.value, 'foobar') self.assertIsNone(d2.details) self.assertIsNone(d2.software) def test_add_to_system(self): """Test adding new mmCIF input to existing System""" s = modelcif.System() e = modelcif.Entity('AHC') e._id = '42' s.entities.append(e) fh = StringIO(""" loop_ _struct_asym.id _struct_asym.entity_id _struct_asym.details A 42 foo B 99 bar """) s2, = modelcif.reader.read(fh, add_to_system=s) self.assertIs(s2, s) self.assertEqual(len(s.asym_units), 2) # asym A should point to existing entity self.assertEqual(s.asym_units[0].id, 'A') self.assertIs(s.asym_units[0].entity, e) def test_audit_revision_handler(self): """Test AuditRevisionHistoryHandler""" # We leverage the support in python-ihm, so only a basic test here cif = """ loop_ _pdbx_audit_revision_history.ordinal _pdbx_audit_revision_history.data_content_type _pdbx_audit_revision_history.major_revision _pdbx_audit_revision_history.minor_revision _pdbx_audit_revision_history.revision_date 40 'Structure model' 1 0 ? 41 'Structure model' 1 0 . 42 'Structure model' 2 0 1979-05-03 """ s, = modelcif.reader.read(StringIO(cif)) r1, r2, r3 = s.revisions self.assertEqual(r3.major, 2) self.assertEqual(r3.minor, 0) self.assertEqual(r3.date, datetime.date(1979, 5, 3)) def test_data_usage_handler(self): """Test DataUsageHandler""" # We leverage the support in python-ihm, so only a basic test here cif = """ loop_ _pdbx_data_usage.id _pdbx_data_usage.type _pdbx_data_usage.details _pdbx_data_usage.url _pdbx_data_usage.name 1 license 'some license' someurl somename """ s, = modelcif.reader.read(StringIO(cif)) d1, = s.data_usage self.assertEqual(d1.details, "some license") def test_atom_site_handler_water(self): """Test AtomSiteHandler reading water molecules""" fh = StringIO(""" loop_ _entity.id _entity.type 1 water loop_ _struct_asym.id _struct_asym.entity_id _struct_asym.details A 1 Water B 1 Water # loop_ _pdbx_nonpoly_scheme.asym_id _pdbx_nonpoly_scheme.entity_id _pdbx_nonpoly_scheme.mon_id _pdbx_nonpoly_scheme.ndb_seq_num _pdbx_nonpoly_scheme.pdb_seq_num _pdbx_nonpoly_scheme.auth_seq_num _pdbx_nonpoly_scheme.auth_mon_id _pdbx_nonpoly_scheme.pdb_strand_id _pdbx_nonpoly_scheme.pdb_ins_code A 1 HOH 1 50 500 HOH A . # loop_ _atom_site.group_PDB _atom_site.id _atom_site.type_symbol _atom_site.label_atom_id _atom_site.label_alt_id _atom_site.label_comp_id _atom_site.label_seq_id _atom_site.auth_seq_id _atom_site.pdbx_PDB_ins_code _atom_site.label_asym_id _atom_site.Cartn_x _atom_site.Cartn_y _atom_site.Cartn_z _atom_site.occupancy _atom_site.label_entity_id _atom_site.auth_asym_id _atom_site.B_iso_or_equiv _atom_site.pdbx_PDB_model_num _atom_site.ihm_model_id HETATM 1 O O . HOH . 40 ? A 10.000 10.000 10.000 . 1 A . 1 1 HETATM 2 O O . HOH . 50 ? A 10.000 10.000 10.000 . 1 A . 1 1 HETATM 3 O O . HOH . 60 . A 20.000 20.000 20.000 . 1 A . 1 1 HETATM 4 O O . HOH . 70 . B 20.000 20.000 20.000 . 1 B . 1 1 """) s, = modelcif.reader.read(fh) m = s.model_groups[0][0] a1, a2, a3, b1 = m._atoms # Should include info from both atom_site and scheme table self.assertEqual(a1.asym_unit.auth_seq_id_map, {1: (40, None), 2: (50, None), 3: (60, None)}) self.assertEqual(a1.asym_unit.orig_auth_seq_id_map, {2: 500}) self.assertEqual(b1.asym_unit.auth_seq_id_map, {1: (70, None)}) self.assertIsNone(b1.asym_unit.orig_auth_seq_id_map) # Should get a WaterAsymUnit, not regular AsymUnit self.assertIsInstance(a1.asym_unit, modelcif.WaterAsymUnit) self.assertIsInstance(b1.asym_unit, modelcif.WaterAsymUnit) # seq_id should be assigned based on atom_site self.assertEqual(a1.seq_id, 1) self.assertEqual(a2.seq_id, 2) self.assertEqual(a3.seq_id, 3) self.assertEqual(b1.seq_id, 1) def test_nonpoly_scheme_handler(self): """Test NonPolySchemeHandler""" fh = StringIO(""" loop_ _chem_comp.id _chem_comp.type _chem_comp.name CA non-polymer 'CALCIUM ION' # loop_ _entity.id _entity.type _entity.pdbx_description 1 non-polymer 'CALCIUM ION entity' 2 non-polymer 'no-chem-comp entity' 3 water 'no-chem-comp water' # loop_ _pdbx_entity_nonpoly.entity_id _pdbx_entity_nonpoly.name _pdbx_entity_nonpoly.comp_id 1 'CALCIUM ION' CA # loop_ _struct_asym.id _struct_asym.entity_id _struct_asym.details A 1 foo B 2 bar C 3 baz # loop_ _pdbx_nonpoly_scheme.asym_id _pdbx_nonpoly_scheme.entity_id _pdbx_nonpoly_scheme.mon_id _pdbx_nonpoly_scheme.ndb_seq_num _pdbx_nonpoly_scheme.pdb_seq_num _pdbx_nonpoly_scheme.auth_seq_num _pdbx_nonpoly_scheme.pdb_strand_id _pdbx_nonpoly_scheme.pdb_ins_code A 1 BAR 1 101 202 . . B 2 BAR 1 1 1 Q X C 3 HOH . 1 1 . . C 3 HOH 2 2 2 . . C 3 HOH 3 5 10 . . C 3 HOH 4 1 20 . . C 3 HOH 5 7 7 . . """) s, = modelcif.reader.read(fh) e1, e2, e3 = s.entities # e1 should have sequence filled in by pdbx_entity_nonpoly self.assertEqual([cc.name for cc in e1.sequence], ['CALCIUM ION']) # e2,e3 should have sequence filled in by pdbx_nonpoly_scheme self.assertEqual([(cc.id, cc.name) for cc in e2.sequence], [('BAR', 'no-chem-comp entity')]) self.assertEqual([(cc.id, cc.name) for cc in e3.sequence], [('HOH', 'WATER')]) asym, a2, a3 = s.asym_units # non-polymers have no seq_id_range self.assertEqual(asym.seq_id_range, (None, None)) self.assertEqual(asym.auth_seq_id_map, {1: (101, None)}) self.assertEqual(asym.residue(1).auth_seq_id, 101) self.assertIsNone(asym.residue(1).ins_code) self.assertEqual(asym.strand_id, asym._id) self.assertIsNone(asym._strand_id) self.assertEqual(asym.orig_auth_seq_id_map, {1: 202}) self.assertEqual(a2.auth_seq_id_map, {1: (1, 'X')}) self.assertEqual(a2.residue(1).auth_seq_id, 1) self.assertEqual(a2.residue(1).ins_code, 'X') self.assertEqual(a2.strand_id, 'Q') self.assertEqual(a2._strand_id, 'Q') self.assertIsNone(a2.orig_auth_seq_id_map) self.assertEqual(a3.auth_seq_id_map, {1: (1, None), 2: (2, None), 3: (5, None), 4: (1, None), 5: (7, None)}) self.assertEqual(a3.orig_auth_seq_id_map, {3: 10, 4: 20}) def test_poly_seq_scheme_handler_offset(self): """Test PolySeqSchemeHandler with constant offset""" fh = StringIO(ASYM_ENTITY + """ loop_ _pdbx_poly_seq_scheme.asym_id _pdbx_poly_seq_scheme.entity_id _pdbx_poly_seq_scheme.seq_id _pdbx_poly_seq_scheme.pdb_seq_num _pdbx_poly_seq_scheme.pdb_strand_id A 1 1 6 A A 1 2 7 A A 1 3 8 A A 1 4 9 A """) s, = modelcif.reader.read(fh) asym, = s.asym_units self.assertEqual(asym.auth_seq_id_map, 5) self.assertIsNone(asym._strand_id) self.assertEqual([asym.residue(i).auth_seq_id for i in range(1, 5)], [6, 7, 8, 9]) self.assertIsNone(asym.orig_auth_seq_id_map) def test_poly_seq_scheme_handler_offset_ins_code(self): """Test PolySeqSchemeHandler with constant offset but inscodes""" fh = StringIO(ASYM_ENTITY + """ loop_ _pdbx_poly_seq_scheme.asym_id _pdbx_poly_seq_scheme.entity_id _pdbx_poly_seq_scheme.seq_id _pdbx_poly_seq_scheme.pdb_seq_num _pdbx_poly_seq_scheme.pdb_strand_id _pdbx_poly_seq_scheme.pdb_ins_code A 1 1 6 A . A 1 2 7 A . A 1 3 8 A . A 1 4 9 A A """) s, = modelcif.reader.read(fh) asym, = s.asym_units self.assertEqual(asym.auth_seq_id_map, {1: (6, None), 2: (7, None), 3: (8, None), 4: (9, 'A')}) self.assertIsNone(asym._strand_id) self.assertEqual([asym.residue(i).auth_seq_id for i in range(1, 5)], [6, 7, 8, 9]) self.assertIsNone(asym.residue(1).ins_code) self.assertEqual(asym.residue(4).ins_code, 'A') self.assertIsNone(asym.orig_auth_seq_id_map) def test_poly_seq_scheme_handler_empty(self): """Test PolySeqSchemeHandler with no poly_seq_scheme""" fh = StringIO(ASYM_ENTITY) s, = modelcif.reader.read(fh) asym, = s.asym_units self.assertEqual(asym.auth_seq_id_map, 0) self.assertEqual([asym.residue(i).auth_seq_id for i in range(1, 5)], [1, 2, 3, 4]) self.assertIsNone(asym.orig_auth_seq_id_map) def test_poly_seq_scheme_handler_nop(self): """Test PolySeqSchemeHandler with a do-nothing poly_seq_scheme""" fh = StringIO(ASYM_ENTITY + """ loop_ _pdbx_poly_seq_scheme.asym_id _pdbx_poly_seq_scheme.entity_id _pdbx_poly_seq_scheme.seq_id _pdbx_poly_seq_scheme.pdb_seq_num A 1 1 1 A 1 2 2 A 1 3 3 """) s, = modelcif.reader.read(fh) asym, = s.asym_units self.assertEqual(asym.auth_seq_id_map, 0) self.assertEqual([asym.residue(i).auth_seq_id for i in range(1, 5)], [1, 2, 3, 4]) self.assertIsNone(asym.orig_auth_seq_id_map) def test_poly_seq_scheme_handler_partial(self): """Test PolySeqSchemeHandler with partial information""" fh = StringIO(ASYM_ENTITY + """ loop_ _pdbx_poly_seq_scheme.asym_id _pdbx_poly_seq_scheme.entity_id _pdbx_poly_seq_scheme.seq_id _pdbx_poly_seq_scheme.pdb_seq_num _pdbx_poly_seq_scheme.auth_seq_num A 1 1 6 . A 1 2 7 9 A 1 3 8 . """) s, = modelcif.reader.read(fh) asym, = s.asym_units # No mapping for residue 4 (and no insertion codes at all) self.assertEqual(asym.auth_seq_id_map, {1: (6, None), 2: (7, None), 3: (8, None)}) self.assertEqual([asym.residue(i).auth_seq_id for i in range(1, 5)], [6, 7, 8, 4]) self.assertIsNone(asym.residue(1).ins_code) self.assertEqual(asym.orig_auth_seq_id_map, {2: 9}) def test_poly_seq_scheme_handler_incon_off(self): """Test PolySeqSchemeHandler with inconsistent offset""" fh = StringIO(ASYM_ENTITY + """ loop_ _pdbx_poly_seq_scheme.asym_id _pdbx_poly_seq_scheme.entity_id _pdbx_poly_seq_scheme.seq_id _pdbx_poly_seq_scheme.pdb_seq_num _pdbx_poly_seq_scheme.pdb_strand_id A 1 1 6 X A 1 2 7 X A 1 3 8 X A 1 4 10 X """) s, = modelcif.reader.read(fh) asym, = s.asym_units self.assertEqual(asym._strand_id, 'X') self.assertEqual(asym.auth_seq_id_map, {1: (6, None), 2: (7, None), 3: (8, None), 4: (10, None)}) self.assertEqual([asym.residue(i).auth_seq_id for i in range(1, 5)], [6, 7, 8, 10]) self.assertIsNone(asym.residue(1).ins_code) self.assertIsNone(asym.orig_auth_seq_id_map) def test_poly_seq_scheme_handler_unknown_auth_seq(self): """Test PolySeqSchemeHandler with explicit unknown auth_seq_num""" fh = StringIO(ASYM_ENTITY + """ loop_ _pdbx_poly_seq_scheme.asym_id _pdbx_poly_seq_scheme.entity_id _pdbx_poly_seq_scheme.seq_id _pdbx_poly_seq_scheme.pdb_seq_num _pdbx_poly_seq_scheme.auth_seq_num _pdbx_poly_seq_scheme.pdb_strand_id A 1 1 1 1 A A 1 2 2 2 A A 1 3 3 ? A A 1 4 4 4 A """) s, = modelcif.reader.read(fh) asym, = s.asym_units self.assertEqual(asym.auth_seq_id_map, 0) self.assertEqual(asym.orig_auth_seq_id_map, {3: ihm.unknown}) def test_poly_seq_scheme_handler_str_seq_id(self): """Test PolySeqSchemeHandler with a non-integer pdb_seq_num""" fh = StringIO(ASYM_ENTITY + """ loop_ _pdbx_poly_seq_scheme.asym_id _pdbx_poly_seq_scheme.entity_id _pdbx_poly_seq_scheme.seq_id _pdbx_poly_seq_scheme.pdb_seq_num _pdbx_poly_seq_scheme.auth_seq_num _pdbx_poly_seq_scheme.pdb_strand_id _pdbx_poly_seq_scheme.pdb_ins_code A 1 1 6 6 ? . A 1 2 7 12 ? . A 1 3 8 24 ? . A 1 4 9A 48A ? . """) s, = modelcif.reader.read(fh) asym, = s.asym_units self.assertIsNone(asym._strand_id) self.assertEqual(asym.auth_seq_id_map, {1: (6, None), 2: (7, None), 3: (8, None), 4: ('9A', None)}) self.assertEqual([asym.residue(i).auth_seq_id for i in range(1, 5)], [6, 7, 8, '9A']) self.assertIsNone(asym.residue(1).ins_code) self.assertIsNone(asym.residue(3).ins_code) self.assertEqual(asym.orig_auth_seq_id_map, {2: 12, 3: 24, 4: '48A'}) if __name__ == '__main__': unittest.main() python-modelcif-1.5/test/test_reference.py000066400000000000000000000037651506655355100210340ustar00rootroot00000000000000import utils import os import unittest TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import modelcif.reference class Tests(unittest.TestCase): def test_template_reference(self): """Test TemplateReference classes""" ref = modelcif.reference.PDB("1abc") self.assertEqual(ref.name, "PDB") self.assertIsNone(ref.other_details) # generic "other" reference ref = modelcif.reference.TemplateReference("1abc") self.assertEqual(ref.name, "Other") self.assertIsNone(ref.other_details) # custom "other" reference class CustomRef(modelcif.reference.TemplateReference): """foo bar""" ref = CustomRef("1abc") self.assertEqual(ref.name, "Other") self.assertEqual(ref.other_details, "foo") def test_target_reference(self): """Test TargetReference classes""" ref = modelcif.reference.UniProt("code", "acc", sequence='CC') self.assertEqual(ref.name, "UNP") self.assertIsNone(ref.other_details) # Reference with (deprecated) align begin, end self.assertWarns(UserWarning, modelcif.reference.UniProt, "code", "acc", align_begin=1, align_end=10, sequence='CC') # Reference without explicit sequence self.assertWarns(UserWarning, modelcif.reference.UniProt, "code", "acc") # generic "other" reference ref = modelcif.reference.TargetReference("code", "acc", sequence='CC') self.assertEqual(ref.name, "Other") self.assertIsNone(ref.other_details) # custom "other" reference class CustomRef(modelcif.reference.TargetReference): """foo bar""" ref = CustomRef("code", "acc", sequence='CC') self.assertEqual(ref.name, "Other") self.assertEqual(ref.other_details, "foo") if __name__ == '__main__': unittest.main() python-modelcif-1.5/test/utils.py000066400000000000000000000025151506655355100171670ustar00rootroot00000000000000import os import sys import tempfile import contextlib import shutil def set_search_paths(topdir): """Set search paths so that we can import Python modules""" os.environ['PYTHONPATH'] = topdir + os.pathsep \ + os.environ.get('PYTHONPATH', '') sys.path.insert(0, topdir) def get_input_file_name(topdir, fname): """Return full path to a test input file""" return os.path.join(topdir, 'test', 'input', fname) @contextlib.contextmanager def temporary_directory(dir=None): _tmpdir = tempfile.mkdtemp(dir=dir) yield _tmpdir shutil.rmtree(_tmpdir, ignore_errors=True) if 'coverage' in sys.modules: import atexit # Collect coverage information from subprocesses __site_tmpdir = tempfile.mkdtemp() with open(os.path.join(__site_tmpdir, 'sitecustomize.py'), 'w') as fh: fh.write(""" import coverage import atexit import os _cov = coverage.coverage(branch=True, data_suffix=True, auto_data=True, data_file=os.path.join('%s', '.coverage')) _cov.start() def _coverage_cleanup(c): c.stop() atexit.register(_coverage_cleanup, _cov) """ % os.getcwd()) os.environ['PYTHONPATH'] = __site_tmpdir + os.pathsep \ + os.environ.get('PYTHONPATH', '') def __cleanup(d): shutil.rmtree(d, ignore_errors=True) atexit.register(__cleanup, __site_tmpdir) python-modelcif-1.5/util/000077500000000000000000000000001506655355100154505ustar00rootroot00000000000000python-modelcif-1.5/util/check-db-entries.py000066400000000000000000000024761506655355100211420ustar00rootroot00000000000000import unittest import modelcif.reader import modelcif.dumper import urllib.request import os class Tests(unittest.TestCase): def _read_cif(self, url): with urllib.request.urlopen(url) as fh: s, = modelcif.reader.read(fh) return s def _write_cif(self, s, check=True): with open('test.cif', 'w') as fh: modelcif.dumper.write(fh, [s], check=check) os.unlink('test.cif') def test_modbase(self): """Test ModBase structure without errors""" model_id = '3c79945a94ec00cac8a03104e853ca50' modbase_top = 'https://salilab.org/modbase/retrieve/modbase' url = '%s/?modelID=%s&format=mmcif' % (modbase_top, model_id) s = self._read_cif(url) self._write_cif(s) def test_swiss_model(self): """Test SWISS-MODEL structure without errors""" model_id = '680335e5cca47f7d2b00afc1' url = 'https://swissmodel.expasy.org/repository/%s.cif' % model_id s = self._read_cif(url) self._write_cif(s) def test_alpha_fold(self): """Test AlphaFold structure without errors""" model_id = 'AF-B4GKE9-F1-model_v4' url = 'https://alphafold.ebi.ac.uk/files/%s.cif' % model_id s = self._read_cif(url) self._write_cif(s) if __name__ == '__main__': unittest.main() python-modelcif-1.5/util/python-modelcif.spec000066400000000000000000000041101506655355100214210ustar00rootroot00000000000000Name: python3-modelcif License: MIT Group: Applications/Engineering Version: 1.5 Release: 1%{?dist} Summary: Package for handling ModelCIF mmCIF and BinaryCIF files Packager: Ben Webb URL: https://pypi.python.org/pypi/modelcif Source: modelcif-%{version}.tar.gz BuildRequires: python3-devel, python3-setuptools, python3-ihm >= 2.6 Requires: python3-ihm >= 2.6 BuildArch: noarch %if 0%{?fedora} >= 42 BuildRequires: python3-pytest %endif %description This is a Python package to assist in handling mmCIF and BinaryCIF files compliant with the ModelCIF extension. It works with Python 3.6 or later. %prep %setup -n modelcif-%{version} %build %{__python3} setup.py install --root=${RPM_BUILD_ROOT} --record=INSTALLED_FILES %check %if 0%{?fedora} >= 42 %pytest modelcif/test.py %else %{__python3} setup.py test %endif %files -f INSTALLED_FILES %defattr(-,root,root) %changelog * Wed Sep 17 2025 Ben Webb 1.5-1 - Update to latest upstream. * Wed Jun 11 2025 Ben Webb 1.4-1 - Update to latest upstream. * Tue Jan 14 2025 Ben Webb 1.3-1 - Update to latest upstream. * Wed Oct 23 2024 Ben Webb 1.2-1 - Update to latest upstream. * Fri Sep 27 2024 Ben Webb 1.1-1 - Update to latest upstream. * Thu Jun 20 2024 Ben Webb 1.0-1 - Update to latest upstream. * Mon Oct 02 2023 Ben Webb 0.9-1 - Update to latest upstream. * Fri Aug 04 2023 Ben Webb 0.8-1 - Update to latest upstream. * Mon Jul 31 2023 Ben Webb 0.7-1 - Update to latest upstream. * Tue May 10 2022 Ben Webb 0.5-1 - Update to latest upstream. * Thu Apr 14 2022 Ben Webb 0.4-1 - Update to latest upstream. * Mon Mar 21 2022 Ben Webb 0.3-1 - Update to latest upstream. * Thu Jan 27 2022 Ben Webb 0.2-1 - Update to latest upstream. * Thu Jan 27 2022 Ben Webb 0.1-1 - Initial package. python-modelcif-1.5/util/validate-outputs.py000077500000000000000000000013761506655355100213460ustar00rootroot00000000000000#!/usr/bin/python3 """Check the output of each example for validity against the PDBx and ModelCIF dictionaries. This should be periodically rechecked in case the PDBx and ModelCIF dictionaries are updated. """ import sys import os import subprocess import ihm.dictionary import urllib.request with urllib.request.urlopen( 'https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_ma.dic') as fh: pdbx_mcif = ihm.dictionary.read(fh) for script in ('mkmodbase.py', 'ligands.py'): print(script) subprocess.check_call([sys.executable, '../examples/' + script]) with open('output.cif') as fh: try: pdbx_mcif.validate(fh) except ihm.dictionary.ValidatorError as exc: print(exc) os.unlink('output.cif')