pax_global_header 0000666 0000000 0000000 00000000064 15035733372 0014522 g ustar 00root root 0000000 0000000 52 comment=53509bb8fd74fb4bc2073565d88c310406839b02
python-ihm-2.7/ 0000775 0000000 0000000 00000000000 15035733372 0013466 5 ustar 00root root 0000000 0000000 python-ihm-2.7/.appveyor.yml 0000664 0000000 0000000 00000001404 15035733372 0016133 0 ustar 00root root 0000000 0000000 environment:
# For Python versions available on Appveyor, see
# https://www.appveyor.com/docs/windows-images-software/#python
matrix:
- {APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017, PYTHON: "C:\\Python36-x64"}
- {APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019, PYTHON: "C:\\Python38-x64"}
- {APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019, PYTHON: "C:\\Python39-x64"}
- {APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2022, PYTHON: "C:\\Python313-x64"}
install:
- "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
- "python.exe -m pip install codecov coverage pytest-cov setuptools"
- choco install -y swig
- python.exe setup.py build_ext --inplace
build: off
test_script:
- "py.test --cov=ihm --cov-branch -v ."
on_success:
- "codecov"
python-ihm-2.7/.codecov.yml 0000664 0000000 0000000 00000000066 15035733372 0015713 0 ustar 00root root 0000000 0000000 ignore:
- test
- util
- src/cmp.c
- src/cmp.h
python-ihm-2.7/.github/ 0000775 0000000 0000000 00000000000 15035733372 0015026 5 ustar 00root root 0000000 0000000 python-ihm-2.7/.github/workflows/ 0000775 0000000 0000000 00000000000 15035733372 0017063 5 ustar 00root root 0000000 0000000 python-ihm-2.7/.github/workflows/codeql-analysis.yml 0000664 0000000 0000000 00000003605 15035733372 0022702 0 ustar 00root root 0000000 0000000 # For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"
on:
push:
branches: [ main ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ main ]
schedule:
- cron: '27 17 * * 5'
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ 'cpp', 'python' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
# Learn more:
# https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
steps:
- name: Checkout repository
uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
- run: |
python setup.py build_ext --inplace -t build
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
python-ihm-2.7/.github/workflows/testpy.yml 0000664 0000000 0000000 00000002306 15035733372 0021137 0 ustar 00root root 0000000 0000000 name: build
on: [push, pull_request]
jobs:
build:
strategy:
fail-fast: false
matrix:
os: [ubuntu-24.04]
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
include:
- os: macos-latest
python-version: '3.10'
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies (Mac)
if: matrix.os == 'macos-latest'
run: |
brew install swig
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install coverage pytest-cov flake8 setuptools pep8-naming
- name: Test
run: |
# Test with Python tokenizer
py.test --cov=ihm --cov-branch -v .
CFLAGS="-coverage" python setup.py build_ext --inplace -t build
# Test with C tokenizer
py.test --cov=ihm --cov-branch --cov-report=xml --cov-append -v .
flake8 --ignore E402,W503,W504,N816
- uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
python-ihm-2.7/.gitignore 0000664 0000000 0000000 00000002256 15035733372 0015463 0 ustar 00root root 0000000 0000000 .DS_Store
# vim swapfiles
.*.swp
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.pytest_cache
python-ihm-2.7/.pylintrc 0000664 0000000 0000000 00000000200 15035733372 0015323 0 ustar 00root root 0000000 0000000 [MASTER]
init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc()))"
python-ihm-2.7/.readthedocs.yaml 0000664 0000000 0000000 00000002014 15035733372 0016712 0 ustar 00root root 0000000 0000000 # Read the Docs configuration file for Sphinx projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the OS, Python version and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.11"
# You can also specify other tool versions:
# nodejs: "20"
# rust: "1.70"
# golang: "1.20"
# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: docs/conf.py
# You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
# builder: "dirhtml"
# Fail on all warnings to avoid broken references
# fail_on_warning: true
# Optionally build your docs in additional formats such as PDF and ePub
# formats:
# - pdf
# - epub
# Optional but recommended, declare the Python requirements required
# to build your documentation
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
# python:
# install:
# - requirements: docs/requirements.txt
python-ihm-2.7/ChangeLog.rst 0000664 0000000 0000000 00000074651 15035733372 0016064 0 ustar 00root root 0000000 0000000 2.7 - 2025-07-07
================
- Representative models for :class:`ihm.model.ModelGroup` can now be
denoted using the :class:`ihm.model.ModelRepresentative` class (#173).
- Bugfix: fix reference counts of Python bool objects.
2.6 - 2025-06-09
================
- The new :class:`ihm.License` and :class:`ihm.Disclaimer` classes
allow describing how the data in the file can be used, and map to
the ``pdbx_data_usage`` mmCIF table (#171).
- Two :class:`ihm.Assembly` classes are now considered equal if they
contain the same set of constituents, even if those constituents are
in differing orders or are split differently into domains (#170).
- The new :class:`ihm.format.ChangeFuncValueFilter` class allows modifying
mmCIF values by passing them through an arbitrary Python function.
- Checks for non-canonical atom names no longer use Ligand Expo to obtain
CCD info, as it is being retired (#169).
- :class:`ihm.metadata.CIFParser` and :class:`ihm.metadata.BinaryCIFParser`
now return the same ``entity_source`` mapping that
:class:`ihm.metadata.PDBParser` does (#168).
2.5 - 2025-04-25
================
- Assemblies are now checked at file-output time to ensure that each
asym referenced in an assembly is represented by atoms and/or spheres
in at least one model (#165).
- ``make_mmcif`` has a new ``--check_atom_names`` option which, if set,
will check for any non-canonical atom names (#166).
- ``pip install`` should no longer fail to install if the C extension
module cannot be built; it will fall back to use the slower pure Python
implementation.
2.4 - 2025-03-25
================
- Add support for fits of model groups or ensembles to cross-links
using the new :class:`ihm.restraint.CrossLinkGroupFit` class.
2.3 - 2025-03-13
================
- The C-accelerated BinaryCIF parser now supports interval quantization
encoding and a wider range of data types.
- ``make_mmcif`` now has a new ``--histidines`` option to convert any
non-standard histidine names to HIS (#164).
- Bugfix: don't change name or description of the complete assembly
in ``make_mmcif``.
- Bugfix: if :class:`ihm.reference.SeqDif` is used to annotate an
insertion or deletion, don't erroneously claim the alignment is incorrect.
2.2 - 2025-02-13
================
- The C-accelerated BinaryCIF parser now supports a wider range of data types.
2.1 - 2025-02-12
================
- Minor fixes to the C-accelerated BinaryCIF parser.
2.0 - 2025-02-11
================
- python-ihm no longer supports Python 2; the oldest supported version
is now Python 3.6 (#161).
- BinaryCIF files are now read in using a C-accelerated parser, which is
faster and more memory efficient than the older Python parser (#160).
- The tokenizers for mmCIF and BinaryCIF now return data of the correct
type (int, float, bool, or string); previously, all values were returned
as strings. :class:`ihm.reader.Handler` subclasses now request data of
the correct type using Python type annotations. The API of the
C-accelerated parsers has changed accordingly (#162).
- The new :class:`ihm.metadata.BinaryCIFParser` class can extract metadata
such as database IDs and template information from BinaryCIF files, in
a similar fashion to the existing :class:`ihm.metadata.CIFParser`.
- Information about a deposited structure, such as the deposition date,
can now be read from :attr:`System.database_status` (#163).
- The new :class:`ihm.format.ReplaceCategoryFilter` class can be used to
completely replace or remove an mmCIF category when using
:class:`ihm.format.CifTokenReader`.
1.8 - 2024-11-26
================
- Support added for datasets containing EPR, X-ray diffraction, footprinting
or predicted contacts using the :class:`ihm.dataset.EPRDataset`,
:class:`ihm.dataset.XRayDiffractionDataset`,
:class:`ihm.dataset.HydroxylRadicalFootprintingDataset`,
:class:`ihm.dataset.DNAFootprintingDataset` and
:class:`ihm.dataset.PredictedContactsDataset` classes (#157).
- Revision information (in the ``pdbx_audit_revision_*`` mmCIF tables)
can now be read or written using the new :class:`ihm.Revision`
and :class:`ihm.RevisionDetails` classes (#156).
- The new :class:`ihm.location.BMRbigLocation` class allows for
referencing datasets stored in the BMRbig database.
- All references to the old PDB-Dev database are now updated to PDB-IHM.
1.7 - 2024-10-22
================
- Sanity checks when writing out a file can now be disabled if desired,
using the new ``check`` argument to :func:`ihm.dumper.write` (#153).
- Data that have been split over multiple mmCIF or BinaryCIF files can now
be combined into a single :class:`ihm.System` object using the new
``add_to_system`` argument to :func:`ihm.reader.read`.
- Input files that assign multiple :class:`ihm.location.Location` objects
to a single :class:`ihm.dataset.Dataset` can now be read (#151).
- Bugfix: multiple :class:`ihm.restraint.EM3DRestraint` and
:class:`ihm.restraint.SASRestraint` objects can now be created for a
single dataset, as long as they act on different assemblies, as allowed
by the dictionary.
- Bugfix: allow for non-standard residues in the ``struct_ref`` table (#154).
1.6 - 2024-09-27
================
- The new class :class:`ihm.model.NotModeledResidueRange` allows for
the annotation of residue ranges that were explicitly not modeled.
These are written to the ``_ihm_residues_not_modeled`` mmCIF table,
and any residue marked as not-modeled in all models will also be
excluded from the ``pdbx_poly_seq_scheme`` table.
- The ``make_mmcif`` utility script will now automatically add any
missing :class:`ihm.model.NotModeledResidueRange` objects for
not-modeled residue ranges (#150).
- Bugfix: the residue range checks introduced in version 1.5 broke the
API used by python-modelcif. They have been reimplemented using the
original API.
- Bugfix: an unknown (?) value for ``pdbx_poly_seq_scheme.auth_seq_num``
is now preserved, not silently removed, when reading an mmCIF file.
1.5 - 2024-09-06
================
- Trying to create a :class:`ihm.Residue`, :class:`ihm.EntityRange`, or
:class:`ihm.AsymUnitRange` that references out-of-range residues (i.e.
``seq_id`` less than 1 or beyond the length of the :class:`ihm.Entity`
sequence) will now raise an error.
- Bugfix: :class:`ihm.reference.Reference` objects are no longer given
erroneous duplicate IDs on output (#149).
1.4 - 2024-08-30
================
- :class:`ihm.metadata.CIFParser` now extracts metadata from mmCIF starting
models from Model Archive or compliant with the ModelCIF dictionary.
- :meth:`ihm.Citation.from_pubmed_id` now takes an ``is_primary`` argument,
to allow denoting the publication as the most pertinent for the modeling.
- Duplicate references, pseudo sites, and features are now pruned on
output (#148).
- :class:`ihm.restraint.ResidueFeature` now reports an error if it is
given zero residue ranges (#147).
- Bugfix: allow for :class:`ihm.startmodel.Template` ``seq_id_range``
or ``template_seq_id_range`` to be empty.
1.3 - 2024-07-16
================
- The new class :class:`ihm.location.ProteomeXchangeLocation` can be used
for datasets stored in the ProteomeXchange database.
- Support is added for changes in the IHMCIF dictionary, specifically
the renaming of "CX-MS data" to "Crosslinking-MS data" and the
``_ihm_ordered_ensemble`` category to ``_ihm_ordered_model``. python-ihm
will output the new names, but for backwards compatibility will read both
old and new names.
- :class:`ihm.protocol.Protocol` can now be given additional text to
describe the protocol.
- :class:`ihm.model.Atom` now takes an ``alt_id`` argument to support
alternate conformations (#146).
- Support added for NumPy 2.0.
1.2 - 2024-06-12
================
- :class:`ihm.format.CifTokenReader` allows for reading an mmCIF file
and breaking it into tokens. This can be used for various housekeeping
tasks directly on an mmCIF file, such as changing chain IDs or renaming
categories or data items, while preserving most other formatting such
as comments and whitespace (#141).
- :class:`ihm.restraint.HDXRestraint` adds support for restraints
derived from Hydrogen-Deuterium Exchange experiments (#143).
- The ``make_mmcif`` utility script now preserves more "orphan" data from
the input file that is not referenced by other tables (#144).
1.1 - 2024-05-09
================
- :class:`ihm.System` now allows for one or more official database IDs to
be associated with the system using the new :class:`ihm.Database` class.
This maps to the mmCIF ``_database_2`` category (#135).
- :class:`ihm.location.FileLocation` now allows for an optional file format
to be specified (#139).
- The ``util/make-mmcif.py`` script is now included in the installed package,
so can be run if desired with ``python3 -m ihm.util.make_mmcif`` (#134).
- Bugfix: allow for file sizes in input mmCIF files to be floating point
values (#138).
- Bugfix: the 'Other' content type is now handled correctly when reading
information about external files from an mmCIF file (#139).
1.0 - 2024-02-13
================
- Support for multi-state schemes (such as kinetic rates and relaxation
times for conversions between states) was added;
see :mod:`ihm.multi_state_scheme`.
- Residue numbering in non-polymer, water, and branched entities should
now be better handled, no longer requiring the various scheme tables
to precede ``atom_site``. If you subclass :class:`ihm.model.Model`, atoms
may need to be renumbered; see :meth:`ihm.model.Model.add_atom` (#130).
- Original author-provided numbering can now be provided for waters,
using the ``orig_auth_seq_id_map`` argument to :class:`ihm.WaterAsymUnit`.
- The make-mmcif.py utility script now has basic functionality for
combining multiple input files into one, relabeling chain IDs if necessary.
- An :class:`ihm.Entity` containing just a single sugar is now classified
as a nonpolymer, not branched.
0.43 - 2023-12-08
=================
- Branched and polymeric empty entities are now distinguished
based on entity.type in the input file (previously, any empty
entity would always be reported as a polymer) (#129).
- Warn rather than reporting an error if the system contains
one or more empty entities (#128).
- If an input file contains multiple duplicated datasets, preserve
them as is rather than consolidating into a single dataset (#127).
- Allow for multiple branched entities to have the same composition
(they could have different connectivity) (#126).
0.42 - 2023-11-30
=================
- The :class:`ihm.metadata.CIFParser` class now parses Modeller-specific
CIF categories to add information about software and templates for
Modeller-generated mmCIF starting models.
- Basic support for original author-provided residue numbering is now
provided in the :class:`ihm.AsymUnit` class with a new
``orig_auth_seq_id_map`` argument. This information is read from and
written to the various mmCIF tables such as ``pdbx_poly_seq_scheme``
(#124).
0.41 - 2023-10-02
=================
- More complete support for oligosaccharides, in particular correct
numbering for atoms in `atom_site`, and the addition of some
data items to the output which are required for full
dictionary compliance.
0.40 - 2023-09-25
=================
- Basic support for oligosaccharides is now provided. New classes are
provided to describe saccharide chemical components
(:class:`ihm.SaccharideChemComp` and subclasses). Unlike polymers and
non-polymers, oligosaccharides can be branched, and a new
:class:`ihm.BranchLink` class allows the linkage between individual
components to be described.
- A summary report of the system can now be produced by calling
:meth:`ihm.System.report`. This can help to reveal errors or
inconsistencies, and will warn about missing data that may not be
technically required for a compliant mmCIF file, but is usually
expected to be present.
- :class:`ihm.metadata.MRCParser` now uses the new EMDB API to extract
version information and details for electron density map datasets.
- RPM packages are now available for recent versions of Fedora and
RedHat Enterprise Linux.
0.39 - 2023-08-04
=================
- :class:`ihm.location.DatabaseLocation` no longer accepts a ``db_name``
parameter. Derived classes (such as :class:`ihm.location.PDBLocation`)
should be used instead; the base class should only be used for "other"
databases that are not described in the IHM dictionary (#116).
- Bugfix: AlphaFold models in PDB format are no longer categorized by
:class:`ihm.metadata.PDBParser` as being deposited in the PDB database
with an empty accession code.
0.38 - 2023-05-26
=================
- Convenience classes are added to describe datasets stored in
the Model Archive, iProX, and AlphaFoldDB repositories
(:class:`ihm.location.ModelArchiveLocation`,
:class:`ihm.location.IProXLocation`, and
:class:`ihm.location.AlphaFoldDBLocation` respectively).
- The new class :class:`ihm.metadata.CIFParser` can be used to extract
metadata from starting models in mmCIF format. It is currently in
development and only supports model metadata from PDB or Model Archive
at this time.
- Line wrapping of output mmCIF files can now be turned if desired using
:func:`ihm.dumper.set_line_wrap` (by default files are wrapped to 80
characters if possible).
- The make-mmcif.py utility script now allows for the name of the output
mmCIF file to be overridden (#115).
0.37 - 2023-02-03
=================
- Convenience classes are added to describe ensemble FRET datasets
(:class:`ihm.dataset.EnsembleFRETDataset`) and datasets stored in
the jPOSTrepo repository (:class:`ihm.location.JPOSTLocation`).
- Related depositions can now be grouped using the :class:`ihm.Collection`
class (#108).
- The :class:`ihm.model.Ensemble` class has a new ``superimposed`` attribute
to indicate whether the grouped models are structurally aligned.
0.36 - 2023-01-25
=================
- When reading a file that references external files, preserve any
information on the size of those files (#104).
- When reading a file containing models not in a model group, preserve
any information on the number of models deposited (#105).
- Bugfix: :func:`ihm.dictionary.read` now correctly handles dictionaries
that define a category after data items in that category (#107).
0.35 - 2022-09-16
=================
- Author names now use PDB style ("Lastname, A.B.") by default rather
than PubMed style ("Lastname AB") (#95).
- Asyms containing multiple water molecules should now be correctly
handled (previously every water molecule in the output ``atom_site``
table was given the same ``auth_seq_id``). Use the new
:class:`ihm.WaterAsymUnit` to create an asym containing waters (#98).
- Masses for all elements are now included, so that
``_entity.formula_weight`` can be correctly populated for ligands (#99).
- Bugfix: :class:`ihm.analysis.Analysis` objects are now read correctly
from input files when two objects share the same ID but are part
of different protocols (#101).
0.34 - 2022-08-03
=================
- Strings that start with STAR reserved words such as ``stop_`` are now
quoted to help some readers such as the GEMMI library
(ihmwg/python-modelcif#25).
- If an input file defines a chemical descriptor with an empty name
but also defines ``linker_type``, use that to fill in the name (#91).
- :class:`ihm.ChemComp` now allows for chemical components to be defined
in a chemical component dictionary (CCD) outside of the wwPDB CCD. This
is not used in python-ihm itself but can be used in python-modelcif.
- Bugfix: if a read mmCIF file defines a complete assembly, do not overwrite
its name and description on output (#92).
- Bugfix: only allow clustering methods/features that are supported by
the underlying IHM dictionary for :class:`ihm.model.Ensemble` (#94).
- Bugfix: categories such as ``_struct`` that are not typically looped
now support multi-line strings (ihmwg/python-modelcif#27).
0.33 - 2022-06-27
=================
- Improve reading of mmCIF files with incomplete data (#86, #87) or with
categories in an unexpected order (#85).
- Bugfix: fix sanity check for multiple atoms with the same atom_id and
seq_id to handle bulk water (where such duplicates are OK) (#88).
0.32 - 2022-05-31
=================
- :class:`ihm.protocol.Step` now takes an ``ensemble`` flag, to indicate
whether the modeling involved an ensemble, and which defaults to True if
the system contains at least one :class:`ihm.model.Ensemble` (#83).
- When reading an incomplete mmCIF file, such as that generated by some
versions of PyMOL, python-ihm will now fill in missing entity-related
information by guessing the sequence from the atom_site table (#67).
- Bugfix: :class:`ihm.flr.RefMeasurementGroup` objects are now read
from mmCIF files correctly.
0.31 - 2022-04-14
=================
- The :class:`ihm.dumper.IgnoreVariant` class can now be used to exclude
selected categories from the mmCIF/BinaryCIF output.
- The _pdbx_nonpoly_scheme CIF table should now fully comply with the
PDBx dictionary.
- Atoms are now checked at file-output time to ensure that a given model
chain does not contain multiple atoms with the same atom_id and
seq_id (#81).
0.30 - 2022-04-05
=================
- Add support for a long description of the system (like an abstract)
using struct.pdbx_model_details (#80).
- Bugfix: correctly read mmCIF files with missing entity.type.
0.29 - 2022-04-01
=================
- Output mmCIF files containing non-polymers should now validate against
the PDBx dictionary (#76).
- Bugfix: non-polymers that are erroneously marked as polymers in
the input mmCIF can now be read in without causing a Python
exception (#78).
- Bugfix: strings starting with an underscore (e.g. chain names) are now
quoted in mmCIF output to conform to the CIF syntax (#75).
0.28 - 2022-03-21
=================
- :class:`ihm.Citation` now takes a ``is_primary`` argument, which can
be used to denote the most pertinent publication for the modeling.
- Improved support for non-standard residues, and for standard amino acids
used as nonpolymers.
0.27 - 2022-01-27
=================
- Minor documentation improvements.
- Add support for the _struct.pdbx_structure_determination_methodology
mmCIF data item.
0.26 - 2022-01-12
=================
- :func:`ihm.dumper.write` and :func:`ihm.reader.read` both now take
a ``variant`` argument which can be used to control the set of tables
that are read/written. This can be used by other libraries (such as
python-ma) to support other mmCIF extensions.
0.25 - 2021-12-03
=================
- :func:`ihm.dictionary.Dictionary.validate` will now report errors for
any keywords or categories in the file that are not present in the
dictionary.
- :class:`ihm.LPeptideAlphabet` now supports the ASX and GLX ambiguous
residue types.
0.24 - 2021-12-01
=================
- :class:`ihm.AsymUnit` now supports insertion codes in its
``auth_seq_id_map``. The target of this mapping can either be an
author-provided residue number (as previously) or a 2-element tuple
containing this number and an insertion code.
- :class:`ihm.AsymUnit` now allows the PDB or author-provided strand/chain ID
to be different from the regular ID.
- Bugfix: if two :class:`ihm.dictionary.Dictionary` objects both contain
information about a given category, adding the two dictionaries together
now combines the category information, rather than just using that from
one dictionary.
- Bugfix: :class:`ihm.dictionary.Dictionary` should now be able to validate
BinaryCIF files containing integer or float values (#66).
0.23 - 2021-11-01
=================
- Bugfix: _struct_ref.pdbx_seq_one_letter_code is now treated as the subset
of the reference (e.g. UniProt) sequence that overlaps with our Entities,
not the entire sequence (#64).
0.22 - 2021-10-22
=================
- The :class:`ihm.Software` class now allows a citation for the software
to be provided.
- A new :mod:`ihm.citations` module contains citations for some packages
that are commonly used in integrative modeling.
0.21 - 2021-07-14
=================
- BinaryCIF files now use UTF8 msgpack strings for all text, rather than
raw bytes. This should make python-ihm's BinaryCIF files interoperable
with those used by, e.g., CoordinateServer.
- Output mmCIF files now include author-provided numbering (auth_seq_id)
for atoms in the atom_site table. This should help packages that don't
read the pdbx_poly_seq_scheme table to show the desired residue
numbering (#61).
0.20 - 2021-05-06
=================
- Support for Python 2.6 has been dropped. The library needs Python 2.7
or Python 3.
- Bugfix: correctly read in multiline reference sequence one-letter codes.
- Bugfix: the reader is now more tolerant of omitted or unknown values
(. or ?) in input mmCIF files.
0.19 - 2021-04-16
=================
- A convenience class is added to describe datasets stored in the
ProXL database (:class:`ihm.location.ProXLLocation`).
0.18 - 2020-11-06
=================
- Update to match latest FLR dictionary.
- Add a simple utility (util/make-mmcif.py) to make a minimal compliant
IHM mmCIF file, given an mmCIF file (potentially just coordinates) as input.
- Bugfix: the full residue range spanned by a starting model is now reported,
rather than just the subset that is mapped to one or more templates (#55).
- Bugfix: handle TrEMBL UniProt sequences (#57).
0.17 - 2020-07-10
=================
- Convenience classes are added to describe hydrogen/deuterium exchange
data (:class:`ihm.dataset.HDXDataset`) and datasets stored in the
PDB-Dev database (:class:`ihm.location.PDBDevLocation`).
- Multiple :class:`ihm.restraint.CrossLinkPseudoSite` objects can now
be assigned to a given :class:`ihm.restraint.CrossLink`.
- Bugfix: the :class:`ihm.dataset.Dataset` base class now has a type
of "Other" rather than "unspecified" to conform with the latest
IHM dictionary.
0.16 - 2020-05-29
=================
- :func:`ihm.reader.read` no longer discards models read from non-IHM mmCIF
files; they are instead placed in their own :class:`ihm.model.ModelGroup`.
- Bugfix: both the pure Python and C-accelerated mmCIF readers are now more
robust, able to handle files in binary mode (e.g. from opening a URL)
and in Unicode (mmCIF files are supposed to be ASCII but python-ihm should
handle any encoding Python supports).
0.15 - 2020-04-14
=================
- :class:`ihm.dataset.Dataset` objects that derive from another dataset
can now record any transformation involved; see
:class:`ihm.dataset.TransformedDataset`.
- :class:`ihm.metadata.PDBParser` now extracts basic metadata from
PDB files generated by SWISS-MODEL.
- An :class:`ihm.Entity` can now be linked to one or more reference databases
(e.g. UniProt). See the classes in the :mod:`ihm.reference` module.
0.14 - 2020-02-26
=================
- A cross-link can now use pseudo sites to represent one or both ends of the
link. The new :class:`ihm.restraint.CrossLinkPseudoSite` object is used
when the end of the cross-link is not represented in the model but its
position is known (e.g. it may have been approximated given the position
of nearby residues).
- :class:`ihm.restraint.PseudoSiteFeature` now references an underlying
:class:`ihm.restraint.PseudoSite`, allowing a single pseudo site to be
shared between a feature and a cross-link if desired.
- :class:`ihm.model.Ensemble` now supports describing subsamples from which
the ensemble was constructed; see :class:`ihm.model.Subsample`.
- Bugfix: :meth:`ihm.Citation.from_pubmed_id` now works correctly when the
journal volume or page range are empty, or the page "range" is just a
single page.
0.13 - 2019-11-14
=================
- :func:`ihm.reader.read` has a new optional ``reject_old_file`` argument.
If set, it will raise an exception if asked to read a file that conforms
to too old a version of the IHM extension dictionary.
- Definitions for the DHSO and BMSO cross-linkers are now provided in the
:mod:`ihm.cross_linkers` module.
0.12 - 2019-10-16
=================
- :class:`ihm.restraint.ResidueFeature` objects can now act on one or
more :class:`Residue` objects, which act equivalently to
1-residue ranges (:class:`AsymUnitRange` or :class:`EntityRange`).
- The new :class:`ihm.dataset.GeneticInteractionsDataset` class and the
``mic_value`` argument to :class:`ihm.restraint.DerivedDistanceRestraint`
can be used to represent restraints from genetic interactions, such as
point-mutant epistatic miniarray profile (pE-MAP) data.
0.11 - 2019-09-05
=================
- :class:`ihm.Assembly` objects can now only contain :class:`AsymUnit`
and :class:`AsymUnitRange` objects (not :class:`Entity` or
:class:`EntityRange`).
- Bugfix: ensembles that don't reference a :class:`ihm.model.ModelGroup`
no longer cause the reader to create bogus empty model groups.
0.10 - 2019-07-09
=================
- Features (:class:`ihm.restraint.AtomFeature`,
:class:`ihm.restraint.ResidueFeature`, and
:class:`ihm.restraint.NonPolyFeature`), which previously could select part
or all of an :class:`ihm.AsymUnit`, can now also select parts of an
:class:`Entity`. A restraint acting on an entity-feature is assumed
to apply to all instances of that entity.
0.9 - 2019-05-31
================
- Add support for the latest version of the IHM dictionary.
0.8 - 2019-05-28
================
- :func:`ihm.reader.read` can now be asked to warn if it encounters
categories or keywords in the mmCIF or BinaryCIF file that it doesn't
know about (and will ignore).
- Predicted contacts (:class:`ihm.restraint.PredictedContactRestraint`)
are now supported.
- :func:`ihm.reader.read` will now read starting model coordinates and
sequence difference information into the
:class:`ihm.startmodel.StartingModel` class. Applications that don't require
coordinates can instruct the reader to ignore them with the new
`read_starting_model_coord` flag.
- The new :mod:`ihm.flr` module allows for information from
Fluorescence / FRET experiments to be stored. This follows the definitions
in the `FLR dictionary `_.
0.7 - 2019-04-24
================
- Authors of the mmCIF file itself (`_audit_author` category) can now be
set by manipulating :attr:`ihm.System.authors`. (If this list is empty on
output, the set of all citation authors is used instead, as before.)
- Any grants that supported the modeling can now be listed in
:attr:`ihm.System.grants`.
- A copy of `SWIG `_ is no longer needed to install
releases of python-ihm via `pip` as pre-generated SWIG outputs are
included in the PyPI package. SWIG is still needed to build directly
from source code though.
0.6 - 2019-03-22
================
- :class:`Entity` now takes an optional :class:`ihm.source.Source` object to
describe the method by which the sample for the entity was produced.
:class:`ihm.metadata.PDBParser` will also extract this information
from input PDB files.
- :func:`ihm.reader.read` and :func:`ihm.dumper.write` now support reading
or writing additional user-defined mmCIF categories.
0.5 - 2019-01-17
================
- :class:`ihm.restraint.CrossLinkRestraint` now takes an
:class:`ihm.ChemDescriptor` object rather than the name of the cross-linker
used. This allows the use of novel cross-linkers (beyond those currently
listed in a fixed enumeration in the IHM dictionary).
:class:`ihm.ChemDescriptor` allows for the chemical structure of the
cross-linker to be uniquely specified, as a SMILES or INCHI string.
The :mod:`ihm.cross_linkers` module provides chemical descriptors for
some commonly-used cross-linkers.
- Pseudo sites are now supported. :class:`ihm.restraint.PseudoSiteFeature`
allows points or spheres with arbitrary coordinates to be designated as
features, which can then be used in
:class:`ihm.restraint.DerivedDistanceRestraint`.
0.4 - 2018-12-17
================
- Certain restraints can now be grouped using the
:class:`ihm.restraint.RestraintGroup` class. Due to limitations of the
underlying dictionary, this only works for some restraint types (currently
only :class:`ihm.restraint.DerivedDistanceRestraint`) and all restraints
in the group must be of the same type.
- Bugfix: the model's representation (see :mod:`ihm.representation`)
need not be a strict subset of the model's :class:`ihm.Assembly`. However,
any :class:`ihm.model.Atom` or :class:`ihm.model.Sphere` objects must be
covered by both the representation and the model's :class:`ihm.Assembly`.
- Bugfix: the reader no longer fails to read files that contain
_entity.formula_weight.
0.3 - 2018-11-21
================
- The library now includes basic support for nonpolymers and water molecules.
In addition to the previous support for polymers (amino or nucleic acid
chains), :class:`ihm.Entity` objects can now comprise ligands, water
molecules, and user-defined chemical components.
- The library can now read mmCIF dictionaries and validate mmCIF or BinaryCIF
files against them. See :mod:`ihm.dictionary`.
- Any :class:`ihm.model.Atom` or :class:`ihm.model.Sphere` objects are now
checked against the model's representation (see :mod:`ihm.representation`);
for example, an :class:`ihm.model.Atom` must correspond to an
:class:`ihm.representation.AtomicSegment`. The representation in turn must
be a subset of the model's :class:`ihm.Assembly`.
- More examples are now provided, of creating and using non-standard residue
types (chemical components); representing nonpolymers; and using the C
mmCIF parser in other C programs.
0.2 - 2018-09-06
================
- This release should fix installation of the package using pip:
`pip install ihm` should now work correctly.
0.1 - 2018-09-06
================
- First stable release. This provides largely complete support for the current
version of the wwPDB IHM mmCIF extension dictionary, and will read and
write mmCIF and BinaryCIF files that are compliant with the PDBx and
IHM dictionaries.
python-ihm-2.7/LICENSE 0000664 0000000 0000000 00000002067 15035733372 0014500 0 ustar 00root root 0000000 0000000 MIT License
Copyright (c) 2018-2025 IHM Working Group
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
python-ihm-2.7/MANIFEST.in 0000664 0000000 0000000 00000000237 15035733372 0015226 0 ustar 00root root 0000000 0000000 include ChangeLog.rst
include LICENSE
include examples/*
include src/cmp.h
include src/ihm_format.h
include src/ihm_format.i
include src/ihm_format_wrap_2.7.c
python-ihm-2.7/README.md 0000664 0000000 0000000 00000006723 15035733372 0014755 0 ustar 00root root 0000000 0000000 [](https://doi.org/10.5281/zenodo.2603378)
[](https://python-ihm.readthedocs.org/)
[](https://anaconda.org/conda-forge/ihm)
[](https://badge.fury.io/py/ihm)
[](https://github.com/ihmwg/python-ihm/actions?query=workflow%3Abuild)
[](https://ci.appveyor.com/project/benmwebb/python-ihm)
[](https://codecov.io/gh/ihmwg/python-ihm)
This is a Python package to assist in handling [mmCIF](https://mmcif.wwpdb.org/)
and [BinaryCIF](https://github.com/molstar/BinaryCIF) files compliant with the
[integrative/hybrid modeling (IHM)](https://mmcif.wwpdb.org/dictionaries/mmcif_ihm_ext.dic/Index/)
extension. It works with Python 3.6 or later.
To handle non-integrative theoretical models (for example, homology models),
see the [python-modelcif](https://github.com/ihmwg/python-modelcif) package
which supports files compliant with the
[ModelCIF](https://mmcif.wwpdb.org/dictionaries/mmcif_ma.dic/Index/)
extension.
Please [see the documentation](https://python-ihm.readthedocs.org/)
or some
[worked examples](https://github.com/ihmwg/python-ihm/tree/main/examples)
for more details.
# Installation with conda, Homebrew or pip
If you are using [Anaconda Python](https://www.anaconda.com/), install with
```
conda install -c conda-forge ihm
```
On a Mac with [Homebrew](https://brew.sh/), install with
```
brew tap salilab/salilab; brew install ihm
```
On a Fedora or RedHat Enterprise Linux box, install with
```
dnf copr enable salilab/salilab; dnf install python3-ihm
```
On an Ubuntu LTS box, install from
[our PPA](https://launchpad.net/~salilab/+archive/ubuntu/ppa) with
```
apt install software-properties-common; add-apt-repository ppa:salilab/ppa;
apt install python3-ihm
```
Alternatively, install with pip:
```
pip install ihm
```
(Note that pip builds a C extension module for faster reading of mmCIF and
BinaryCIF files. This requires that your system has a C compiler. If you
don't have a C compiler available, the library will read files using pure
Python instead.)
# Installation from source code
To build and install from a clone of the GitHub repository, run
```
python setup.py build
python setup.py install
```
Note that this will attempt to build a C extension module for faster reading
of mmCIF and BinaryCIF files. This requires that your system has a C compiler
and [SWIG](https://www.swig.org/). If either of these components are missing,
the library will fall back to reading files using pure Python instead.
If you want to write [BinaryCIF](https://github.com/molstar/BinaryCIF)
files (or to read them without the C extension module), you will also need the
Python [msgpack](https://github.com/msgpack/msgpack-python) package.
# Testing
There are a number of testcases in the `test` directory. Each one can be run
like a normal Python script to test the library. They can also be all run at
once using [nose](https://nose.readthedocs.io/en/latest/)
or [pytest](https://docs.pytest.org/en/latest/). They will also test
the C extension module if it is first built with
`python setup.py build_ext --inplace`.
python-ihm-2.7/docs/ 0000775 0000000 0000000 00000000000 15035733372 0014416 5 ustar 00root root 0000000 0000000 python-ihm-2.7/docs/Makefile 0000664 0000000 0000000 00000001137 15035733372 0016060 0 ustar 00root root 0000000 0000000 # Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXPROJ = Python-IHM
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) python-ihm-2.7/docs/analysis.rst 0000664 0000000 0000000 00000000652 15035733372 0016776 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _analysis_module:
The :mod:`ihm.analysis` Python module
=====================================
.. automodule:: ihm.analysis
.. autoclass:: Step
:members:
.. autoclass:: FilterStep
:members:
.. autoclass:: ClusterStep
:members:
.. autoclass:: RescoreStep
:members:
.. autoclass:: ValidationStep
:members:
.. autoclass:: EmptyStep
:members:
.. autoclass:: Analysis
:members:
python-ihm-2.7/docs/changes.rst 0000664 0000000 0000000 00000000143 15035733372 0016556 0 ustar 00root root 0000000 0000000 .. _changes:
.. currentmodule:: ihm
Change history
**************
.. include:: ../ChangeLog.rst
python-ihm-2.7/docs/citations.rst 0000664 0000000 0000000 00000002324 15035733372 0017146 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _citations_module:
The :mod:`ihm.citations` Python module
======================================
.. automodule:: ihm.citations
.. data:: imp
The Integrative Modeling Platform (IMP).
.. data:: pmi
The PMI module of the Integrative Modeling Platform (IMP).
.. data:: modeller
MODELLER, comparative modeling by satisfaction of spatial restraints.
.. data:: psipred
PSIPRED, protein secondary structure prediction based on position-specific
scoring matrices.
.. data:: disopred
DISOPRED, disordered region prediction.
.. data:: hhpred
HHpred, protein homology detection and structure prediction.
.. data:: relion
RELION, a Bayesian approach for cryo-EM structure determination.
.. data:: phyre2
Phyre2, a web portal for protein modeling, prediction and analysis.
.. data:: swiss_model
SWISS-MODEL: homology modeling of protein structures and complexes.
.. data:: alphafold2
AlphaFold: ab-initio modeling of protein structures.
.. data:: colabfold
ColabFold: accessible AlphaFold pipeline.
.. data:: qmeandisco
QMEANDisCo: model quality estimation with distance constraints.
.. data:: mmseqs2
MMseqs2: app for fast, interactive sequence searches.
python-ihm-2.7/docs/conf.py 0000664 0000000 0000000 00000011535 15035733372 0015722 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
#
# Python-IHM documentation build configuration file, created by
# sphinx-quickstart on Thu Mar 1 14:05:33 2018.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.abspath('..'))
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ['sphinx.ext.autodoc',
'sphinx.ext.viewcode']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'Python-IHM'
copyright = u'2018-2025, Benjamin Webb'
author = u'Benjamin Webb'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = u''
# The full version, including alpha/beta/rc tags.
release = u''
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'default'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
html_sidebars = {}
# -- Options for HTMLHelp output ------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'Python-IHMdoc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'Python-IHM.tex', u'Python-IHM Documentation',
u'Benjamin Webb', 'manual'),
]
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'python-ihm', u'Python-IHM Documentation',
[author], 1)
]
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'Python-IHM', u'Python-IHM Documentation',
author, 'Python-IHM', 'One line description of project.',
'Miscellaneous'),
]
# Warn about broken links to classes, etc.
nitpicky = True
python-ihm-2.7/docs/cross_linkers.rst 0000664 0000000 0000000 00000004632 15035733372 0020035 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _cross_linkers_module:
The :mod:`ihm.cross_linkers` Python module
==========================================
.. automodule:: ihm.cross_linkers
.. data:: dss
DSS cross-linker that links a primary amine with another primary amine
(non-water-soluble).
.. data:: dsg
DSG cross-linker that links a primary amine with another primary amine
(non-water-soluble).
.. data:: bs3
BS3 cross-linker that links a primary amine with another primary amine
(water-soluble).
.. data:: dsso
DSSO cross-linker that links a primary amine with another primary amine
(non-water-soluble). It is similar to DSS but can be cleaved in the gas
phase using collision-induced dissociation.
.. data:: edc
EDC cross-linker that links a carboxyl group with a primary amine.
.. data:: dhso
DHSO (dihydrazide sulfoxide) MS-cleavable cross-linker that links
carboxyl groups, described in
`Gutierrez et al, 2016 `_.
.. data:: bmso
BMSO (bismaleimide sulfoxide) MS-cleavable cross-linker that links
cysteines, described in
`Gutierrez et al, 2018 `_.
.. data:: sda
SDA (NHS-Diazirine) (succinimidyl 4,4′-azipentanoate) cross-linker that
links primary amines with nearly any other functional group via
long-wave UV-light activation.
.. data:: photo_leucine
L-photo-leucine. Non-canonical amino acid incorporated at leucine
positions that links leucine to any other functional group via long-wave
UV-light activation.
See `Suchanek et al, 2005 `_.
.. data:: dsbu
dsbu (disuccinimidyl dibutyric urea) cross-linker that links a primary
amine with another primary amine (non-water-soluble).
Cleavable in the gas phase using collision-induced dissociation.
See `Müller et al, 2011 `_.
.. data:: phoX
PhoX cross-linker that links a primary amine with another primary amine.
The spacer group contains a phosphonate group, making the cross-linker
IMAC-enrichable. Also known by the name DSPP. See
`Steigenberger et al, 2019 `_.
.. data:: tbuphoX
Tert-butyl PhoX cross-linker. Similar to PhoX, but containing a tert-butyl
group that renders the cross-linker cell permeable.
See `Jiang et al, 2021 `_.
python-ihm-2.7/docs/dataset.rst 0000664 0000000 0000000 00000002451 15035733372 0016577 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _dataset_module:
The :mod:`ihm.dataset` Python module
====================================
.. automodule:: ihm.dataset
.. autoclass:: Dataset
:members:
.. autoclass:: TransformedDataset
:members:
.. autoclass:: DatasetGroup
:members:
.. autoclass:: CXMSDataset
:members:
.. autoclass:: MassSpecDataset
:members:
.. autoclass:: HDXDataset
:members:
.. autoclass:: PDBDataset
:members:
.. autoclass:: ComparativeModelDataset
:members:
.. autoclass:: IntegrativeModelDataset
:members:
.. autoclass:: DeNovoModelDataset
:members:
.. autoclass:: NMRDataset
:members:
.. autoclass:: MutagenesisDataset
:members:
.. autoclass:: EMDensityDataset
:members:
.. autoclass:: EMMicrographsDataset
:members:
.. autoclass:: EM2DClassDataset
:members:
.. autoclass:: SASDataset
:members:
.. autoclass:: FRETDataset
:members:
.. autoclass:: EnsembleFRETDataset
:members:
.. autoclass:: YeastTwoHybridDataset
:members:
.. autoclass:: GeneticInteractionsDataset
:members:
.. autoclass:: EPRDataset
:members:
.. autoclass:: XRayDiffractionDataset
:members:
.. autoclass:: HydroxylRadicalFootprintingDataset
:members:
.. autoclass:: DNAFootprintingDataset
:members:
.. autoclass:: PredictedContactsDataset
:members:
python-ihm-2.7/docs/design.rst 0000664 0000000 0000000 00000007454 15035733372 0016433 0 ustar 00root root 0000000 0000000 Design principles
*****************
Lightweight
===========
The classes in this package are designed to be lightweight, taking up as
little memory as possible. For example, individual atoms are *not* stored
in Python classes, and are only requested when needed. This is because the
library is designed to work with an existing modeling package, which likely
already stores data on the system in its own files or data structures, such
that duplicating this information would be very inefficient.
Mutable
=======
All classes are designed to be *mutable*; that is, their contents can be
changed after creation. For example, protein chains can be added to or removed
from an existing :class:`ihm.Assembly` object, or the amino acid sequence
of an :class:`ihm.Entity` can be extended. This because some of the modeling
packages which use these classes build up their own data model in a similar
way.
Types rather than enums
=======================
Where the underlying IHM mmCIF dictionary uses an enumeration, generally this
corresponds to separate sibling classes in this package. For example, two
datasets which differ only in their ``data_type``
`in the dictionary `_
(such as a electron microscopy density map and small angle scattering data)
are represented with two classes in this package:
:class:`ihm.dataset.EMDensityDataset` and :class:`ihm.dataset.SASDataset`.
This cleanly enforces the allowed types in the most Pythonic manner.
Hierarchy of classes
====================
The underlying IHM mmCIF dictionary is essentially structured as a set of
rows in database tables, with IDs acting as keys or pointers into other tables.
This is naturally represented in Python as a hierarchy of classes, with
members pointing to other objects as appropriate. IDs are not used to look
up other objects, and are only used internally to populate the tables.
For example, to group multiple models together, the dictionary assigns all of
the models the same `model_group id `_
while in the Python package the :class:`ihm.model.Model` objects are placed
into a :class:`ihm.model.ModelGroup` object, which acts like a simple Python
list.
The table-based representation of the dictionary does allow for objects to
exist that are not referenced by other objects, unlike the Python-based
hierarchy. Such 'orphan' objects can be referenced from orphan lists in
the top-level :class:`ihm.System` if necessary.
Equal versus identical objects
==============================
Since the Python objects are mutable, can be constructed iteratively by a
modeling package, and live in a hierarchy, it can sometimes turn out that two
Python objects while not identical (they point to different locations in
memory) are equal (their contents are the same). For example, the two
:class:`ihm.Assembly` objects, one of proteins A, B, and C, and the other of
A, C, and B, are not identical (they are different objects) but are equal
(the order of the proteins does not matter). The library will attempt to
detect such objects and consolidate them on output, describing both of them
in the mmCIF file with the same ID, to avoid meaningless duplication of rows
in the output tables. This removes some of the burden from the author of the
modeling package, which may not care about such a distinction.
mmCIF backend
=============
The classes in this package roughly correspond to
`categories `_
in the underlying IHM mmCIF dictionary. This allows for simple output of
mmCIF formatted files, but also allows for the potential future support for
other file formats that support the dictionary or a subset of it, such
as `MMTF `_.
python-ihm-2.7/docs/dictionary.rst 0000664 0000000 0000000 00000000561 15035733372 0017317 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _dictionary_module:
The :mod:`ihm.dictionary` Python module
=======================================
.. automodule:: ihm.dictionary
.. autoclass:: Dictionary
:members:
.. autoclass:: Category
:members:
.. autoclass:: ItemType
:members:
.. autoclass:: Keyword
:members:
.. autofunction:: read
.. autoexception:: ValidatorError
python-ihm-2.7/docs/dumper.rst 0000664 0000000 0000000 00000000511 15035733372 0016441 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _dumper_module:
The :mod:`ihm.dumper` Python module
===================================
.. automodule:: ihm.dumper
.. autoclass:: Dumper
:members:
.. autoclass:: Variant
:members:
.. autoclass:: IHMVariant
.. autoclass:: IgnoreVariant
.. autofunction:: set_line_wrap
.. autofunction:: write
python-ihm-2.7/docs/flr.rst 0000664 0000000 0000000 00000003425 15035733372 0015737 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _flr_module:
The :mod:`ihm.flr` Python module
=====================================
.. automodule:: ihm.flr
.. autoclass:: Probe
:members:
.. autoclass:: ProbeDescriptor
:members:
.. autoclass:: ProbeList
:members:
.. autoclass:: SampleProbeDetails
:members:
.. autoclass:: PolyProbeConjugate
:members:
.. autoclass:: PolyProbePosition
:members:
.. autoclass:: Sample
:members:
.. autoclass:: EntityAssembly
:members:
.. autoclass:: SampleCondition
:members:
.. autoclass:: Experiment
:members:
.. autoclass:: Instrument
:members:
.. autoclass:: InstSetting
:members:
.. autoclass:: ExpCondition
:members:
.. autoclass:: FRETAnalysis
:members:
.. autoclass:: LifetimeFitModel
:members:
.. autoclass:: RefMeasurementGroup
:members:
.. autoclass:: RefMeasurement
:members:
.. autoclass:: RefMeasurementLifetime
:members:
.. autoclass:: FRETDistanceRestraintGroup
:members:
.. autoclass:: FRETDistanceRestraint
:members:
.. autoclass:: FRETForsterRadius
:members:
.. autoclass:: FRETCalibrationParameters
:members:
.. autoclass:: PeakAssignment
:members:
.. autoclass:: FRETModelQuality
:members:
.. autoclass:: FRETModelDistance
:members:
.. autoclass:: FPSModeling
:members:
.. autoclass:: FPSGlobalParameters
:members:
.. autoclass:: FPSAVModeling
:members:
.. autoclass:: FPSAVParameter
:members:
.. autoclass:: FPSMPPModeling
:members:
.. autoclass:: FPSMeanProbePosition
:members:
.. autoclass:: FPSMPPAtomPositionGroup
:members:
.. autoclass:: FPSMPPAtomPosition
:members:
.. autoclass:: KineticRateFretAnalysisConnection
:members:
.. autoclass:: RelaxationTimeFretAnalysisConnection
:members:
.. autoclass:: FLRData
:members:
python-ihm-2.7/docs/format.rst 0000664 0000000 0000000 00000001016 15035733372 0016436 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _format_module:
The :mod:`ihm.format` Python module
===================================
.. automodule:: ihm.format
.. autoclass:: CifWriter
:members:
.. autoclass:: CifReader
:members:
.. autoclass:: CifTokenReader
:members:
.. autoclass:: Filter
:members:
.. autoclass:: ChangeValueFilter
.. autoclass:: ChangeFuncValueFilter
.. autoclass:: RemoveItemFilter
.. autoclass:: ChangeKeywordFilter
.. autoclass:: ReplaceCategoryFilter
.. autoexception:: CifParserError
:members:
python-ihm-2.7/docs/format_bcif.rst 0000664 0000000 0000000 00000000373 15035733372 0017426 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _format_bcif_module:
The :mod:`ihm.format_bcif` Python module
========================================
.. automodule:: ihm.format_bcif
.. autoclass:: BinaryCifWriter
:members:
.. autoclass:: BinaryCifReader
:members:
python-ihm-2.7/docs/geometry.rst 0000664 0000000 0000000 00000001235 15035733372 0017004 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _geometry_module:
The :mod:`ihm.geometry` Python module
=====================================
.. automodule:: ihm.geometry
.. autoclass:: Center
:members:
.. autoclass:: Transformation
:members:
.. autoclass:: GeometricObject
:members:
.. autoclass:: Sphere
:members:
.. autoclass:: Torus
:members:
.. autoclass:: HalfTorus
:members:
.. autoclass:: Axis
:members:
.. autoclass:: XAxis
:members:
.. autoclass:: YAxis
:members:
.. autoclass:: ZAxis
:members:
.. autoclass:: Plane
:members:
.. autoclass:: XYPlane
:members:
.. autoclass:: YZPlane
:members:
.. autoclass:: XZPlane
:members:
python-ihm-2.7/docs/index.rst 0000664 0000000 0000000 00000002025 15035733372 0016256 0 ustar 00root root 0000000 0000000 Python-IHM documentation
========================
This is a Python package to assist in handling mmCIF files compliant
with the integrative/hybrid modeling (IHM) extension.
The documentation below documents the library API. For complete worked examples,
see `the examples directory at GitHub `_
or real systems deposited using the library, such as
`Nup133 `_.
Contents
========
.. toctree::
:maxdepth: 2
introduction
usage
provenance
design
lowlevel
changes
API Reference:
.. toctree::
:maxdepth: 1
main
source
reference
location
dataset
metadata
startmodel
representation
geometry
restraint
cross_linkers
citations
protocol
analysis
model
format
format_bcif
dumper
reader
dictionary
flr
multi_state_scheme
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
python-ihm-2.7/docs/introduction.rst 0000664 0000000 0000000 00000003563 15035733372 0017700 0 ustar 00root root 0000000 0000000 Introduction
************
This package provides a mechanism to describe an integrative modeling
application with a set of Python objects. This includes
- the data used for the modeling, such as previous computional models
from comparative or integrative modeling, and experimental datasets from
X-ray crystallography, mass spectrometry, electron microscopy;
- the protocol used to generate models, such as molecular dynamics, clustering,
and rescoring;
- the actual coordinates of output models, which may be multi-scale (including
both atomic coordinates and more coarse-grained representations),
multi-state (multiple conformations and/or compositions of the system needed
to explain the input data), or ordered (such as different points in a
chemical reaction);
- grouping of multiple models into ensembles or clusters;
- validation of models, for example by scoring against data not used in the
modeling itself.
Once created, this set of Python objects can be written to an mmCIF file
that is compliant with the
`IHMCIF extension `_
to the `PDBx/mmCIF dictionary `_,
suitable for deposition in the
`PDB-IHM repository `_. The files are best viewed
in a viewer that supports IHMCIF, such as
`UCSF ChimeraX `_, although they may be
partially viewable in regular PDBx mmCIF viewers (likely only the atomic
coordinates will be visible).
The Python package can be used standalone, but is primarily intended for use
within modeling software such as `IMP `_,
or `HADDOCK `_. For example, IMP provides
`a class `_
which uses this library to convert an IMP::pmi modeling protocol into an mmCIF
file.
python-ihm-2.7/docs/location.rst 0000664 0000000 0000000 00000002316 15035733372 0016762 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _location_module:
The :mod:`ihm.location` Python module
=====================================
.. automodule:: ihm.location
.. autoclass:: Location
:members:
.. autoclass:: DatabaseLocation
:members:
.. autoclass:: EMDBLocation
:members:
.. autoclass:: PDBLocation
:members:
.. autoclass:: PDBDevLocation
:members:
.. autoclass:: ModelArchiveLocation
:members:
.. autoclass:: BMRBLocation
:members:
.. autoclass:: MassIVELocation
:members:
.. autoclass:: EMPIARLocation
:members:
.. autoclass:: SASBDBLocation
:members:
.. autoclass:: PRIDELocation
:members:
.. autoclass:: JPOSTLocation
:members:
.. autoclass:: BioGRIDLocation
:members:
.. autoclass:: ProXLLocation
:members:
.. autoclass:: IProXLocation
:members:
.. autoclass:: AlphaFoldDBLocation
:members:
.. autoclass:: ProteomeXchangeLocation
:members:
.. autoclass:: BMRbigLocation
:members:
.. autoclass:: FileLocation
:members:
.. autoclass:: InputFileLocation
:members:
.. autoclass:: OutputFileLocation
:members:
.. autoclass:: WorkflowFileLocation
:members:
.. autoclass:: VisualizationFileLocation
:members:
.. autoclass:: Repository
:members:
python-ihm-2.7/docs/lowlevel.rst 0000664 0000000 0000000 00000000660 15035733372 0017003 0 ustar 00root root 0000000 0000000 Low-level usage
***************
The library can also be used at a lower level, to extract a subset of data
from an mmCIF file. This can be done in either C or Python code.
For more information, see the :mod:`ihm.format` module, or the
`atom_reader.c `_ or
`stream_parser.py `_ examples.
python-ihm-2.7/docs/main.rst 0000664 0000000 0000000 00000003636 15035733372 0016104 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _main_module:
The :mod:`ihm` Python module
===================================
.. automodule:: ihm
.. autodata:: unknown
.. autoclass:: System
:members:
.. autoclass:: DatabaseStatus
:members:
.. autoclass:: Database
:members:
.. autoclass:: Software
:members:
.. autoclass:: Citation
:members:
.. autoclass:: Grant
:members:
.. autoclass:: ChemComp
:members:
.. autoclass:: PeptideChemComp
:members:
.. autoclass:: LPeptideChemComp
:members:
.. autoclass:: DPeptideChemComp
:members:
.. autoclass:: RNAChemComp
:members:
.. autoclass:: DNAChemComp
:members:
.. autoclass:: SaccharideChemComp
:members:
.. autoclass:: LSaccharideChemComp
:members:
.. autoclass:: LSaccharideAlphaChemComp
:members:
.. autoclass:: LSaccharideBetaChemComp
:members:
.. autoclass:: DSaccharideChemComp
:members:
.. autoclass:: DSaccharideAlphaChemComp
:members:
.. autoclass:: DSaccharideBetaChemComp
:members:
.. autoclass:: NonPolymerChemComp
:members:
.. autoclass:: WaterChemComp
:members:
.. autoclass:: Alphabet
:members:
.. autoclass:: LPeptideAlphabet
:members:
.. autoclass:: DPeptideAlphabet
:members:
.. autoclass:: RNAAlphabet
:members:
.. autoclass:: DNAAlphabet
:members:
.. autoclass:: Entity
:members:
.. autoclass:: EntityRange
:members:
.. autoclass:: AsymUnit
:members:
.. autoclass:: AsymUnitRange
:members:
.. autoclass:: WaterAsymUnit
:members:
.. autoclass:: Atom
:members:
.. autoclass:: Residue
:members:
.. autoclass:: Assembly
:members:
.. autoclass:: ChemDescriptor
:members:
.. autoclass:: Collection
:members:
.. autoclass:: BranchDescriptor
:members:
.. autoclass:: BranchLink
:members:
.. autoclass:: DataUsage
:members:
.. autoclass:: License
.. autoclass:: Disclaimer
.. autoclass:: Revision
:members:
.. autoclass:: RevisionDetails
:members:
python-ihm-2.7/docs/make.bat 0000664 0000000 0000000 00000001456 15035733372 0016031 0 ustar 00root root 0000000 0000000 @ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build
set SPHINXPROJ=Python-IHM
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
:end
popd
python-ihm-2.7/docs/metadata.rst 0000664 0000000 0000000 00000000533 15035733372 0016731 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _metadata_module:
The :mod:`ihm.metadata` Python module
=====================================
.. automodule:: ihm.metadata
.. autoclass:: Parser
:members:
.. autoclass:: MRCParser
:members:
.. autoclass:: PDBParser
:members:
.. autoclass:: CIFParser
:members:
.. autoclass:: BinaryCIFParser
:members:
python-ihm-2.7/docs/model.rst 0000664 0000000 0000000 00000001514 15035733372 0016251 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _model_module:
The :mod:`ihm.model` Python module
==================================
.. automodule:: ihm.model
.. autoclass:: Sphere
:members:
.. autoclass:: Atom
:members:
.. autoclass:: Model
:members:
.. autoclass:: ModelRepresentative
:members:
.. autoclass:: ModelGroup
:members:
.. autoclass:: State
:members:
.. autoclass:: StateGroup
:members:
.. autoclass:: Ensemble
:members:
.. autoclass:: NotModeledResidueRange
:members:
.. autoclass:: OrderedProcess
:members:
.. autoclass:: ProcessStep
:members:
.. autoclass:: ProcessEdge
:members:
.. autoclass:: LocalizationDensity
:members:
.. autoclass:: DCDWriter
:members:
.. autoclass:: Subsample
:members:
.. autoclass:: RandomSubsample
:members:
.. autoclass:: IndependentSubsample
:members:
python-ihm-2.7/docs/multi_state_scheme.rst 0000664 0000000 0000000 00000001022 15035733372 0021021 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _multi_state_scheme_module:
The :mod:`ihm.multi_state_scheme` Python module
===============================================
.. automodule:: ihm.multi_state_scheme
.. autoclass:: MultiStateScheme
:members:
.. autoclass:: Connectivity
:members:
.. autoclass:: EquilibriumConstant
:members:
.. autoclass:: PopulationEquilibriumConstant
:members:
.. autoclass:: KineticRateEquilibriumConstant
:members:
.. autoclass:: KineticRate
:members:
.. autoclass:: RelaxationTime
:members:
python-ihm-2.7/docs/protocol.rst 0000664 0000000 0000000 00000000335 15035733372 0017012 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _protocol_module:
The :mod:`ihm.protocol` Python module
=====================================
.. automodule:: ihm.protocol
.. autoclass:: Step
:members:
.. autoclass:: Protocol
:members:
python-ihm-2.7/docs/provenance.rst 0000664 0000000 0000000 00000003233 15035733372 0017311 0 ustar 00root root 0000000 0000000 Provenance
**********
The IHM dictionary is designed to capture all aspects of integrative modeling,
from the original deposited experimental data to the final validated models.
This allows for maximum reproducibility and resuability. However, many
modeling packages are only concerned with the conversion of their own inputs
to output models (for example, a model of a complex may be generated by
docking comparative models guided by some experimental data of the
entire complex). If only this last step of the procedure is captured in the
output mmCIF file (in this case, without any information on how the comparative
models were themselves obtained) the chain is broken and the outputs cannot
be reproduced.
One solution to this problem is to diligently ensure that every input to
the modeling has been deposited in an appropriate database and always refer
to inputs using :class:`ihm.location.DatabaseLocation`. In cases where this
is not possible, the library provides some metadata parsers in the
:mod:`ihm.metadata` module. These will make a best effort to extract any
metadata from files available on the local hard drive to better describe their
provenance. For example, if the file contains headers or other information that
shows that it is merely a copy of a file deposited in an official database,
the metadata parsers will return a suitable
:class:`~ihm.location.DatabaseLocation` for the dataset. Other information,
such as the software used to generate the file, may be available in the
metadata.
For more details, see :class:`ihm.metadata.MRCParser` for electron microscopy
density maps (MRC files) or
:class:`ihm.metadata.PDBParser` for coordinate files in PDB format.
python-ihm-2.7/docs/reader.rst 0000664 0000000 0000000 00000001022 15035733372 0016405 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _reader_module:
The :mod:`ihm.reader` Python module
===================================
.. automodule:: ihm.reader
.. autofunction:: read
.. autoexception:: UnknownCategoryWarning
.. autoexception:: UnknownKeywordWarning
.. autoexception:: OldFileError
.. autoclass:: Handler
:members:
.. autoclass:: SystemReader
:members:
:inherited-members:
.. autoclass:: IDMapper
:members:
.. autoclass:: RangeIDMapper
:members:
.. autoclass:: Variant
:members:
.. autoclass:: IHMVariant
python-ihm-2.7/docs/reference.rst 0000664 0000000 0000000 00000000536 15035733372 0017112 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _reference_module:
The :mod:`ihm.reference` Python module
======================================
.. automodule:: ihm.reference
.. autoclass:: Reference
:members:
.. autoclass:: Sequence
:members:
.. autoclass:: UniProtSequence
:members:
.. autoclass:: Alignment
:members:
.. autoclass:: SeqDif
:members:
python-ihm-2.7/docs/representation.rst 0000664 0000000 0000000 00000000662 15035733372 0020216 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _representation_module:
The :mod:`ihm.representation` Python module
===========================================
.. automodule:: ihm.representation
.. autoclass:: Segment
:members:
.. autoclass:: AtomicSegment
:members:
.. autoclass:: ResidueSegment
:members:
.. autoclass:: MultiResidueSegment
:members:
.. autoclass:: FeatureSegment
:members:
.. autoclass:: Representation
:members:
python-ihm-2.7/docs/restraint.rst 0000664 0000000 0000000 00000003430 15035733372 0017163 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _restraint_module:
The :mod:`ihm.restraint` Python module
======================================
.. automodule:: ihm.restraint
.. autoclass:: PseudoSite
:members:
.. autoclass:: Restraint
:members:
.. autoclass:: RestraintGroup
:members:
.. autoclass:: EM3DRestraint
:members:
.. autoclass:: EM3DRestraintFit
:members:
.. autoclass:: EM2DRestraint
:members:
.. autoclass:: EM2DRestraintFit
:members:
.. autoclass:: SASRestraint
:members:
.. autoclass:: SASRestraintFit
:members:
.. autoclass:: DistanceRestraint
:members:
.. autoclass:: HarmonicDistanceRestraint
:members:
.. autoclass:: UpperBoundDistanceRestraint
:members:
.. autoclass:: LowerBoundDistanceRestraint
:members:
.. autoclass:: LowerUpperBoundDistanceRestraint
:members:
.. autoclass:: CrossLinkRestraint
:members:
.. autoclass:: ExperimentalCrossLink
:members:
.. autoclass:: CrossLinkPseudoSite
:members:
.. autoclass:: CrossLink
:members:
.. autoclass:: ResidueCrossLink
:members:
.. autoclass:: FeatureCrossLink
:members:
.. autoclass:: AtomCrossLink
:members:
.. autoclass:: CrossLinkFit
:members:
.. autoclass:: CrossLinkGroupFit
:members:
.. autoclass:: Feature
:members:
.. autoclass:: ResidueFeature
:members:
.. autoclass:: AtomFeature
:members:
.. autoclass:: NonPolyFeature
:members:
.. autoclass:: PseudoSiteFeature
:members:
.. autoclass:: GeometricRestraint
:members:
.. autoclass:: CenterGeometricRestraint
:members:
.. autoclass:: InnerSurfaceGeometricRestraint
:members:
.. autoclass:: OuterSurfaceGeometricRestraint
:members:
.. autoclass:: DerivedDistanceRestraint
:members:
.. autoclass:: PredictedContactRestraint
:members:
.. autoclass:: HDXRestraint
:members:
python-ihm-2.7/docs/source.rst 0000664 0000000 0000000 00000000515 15035733372 0016451 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _source_module:
The :mod:`ihm.source` Python module
=====================================
.. automodule:: ihm.source
.. autoclass:: Source
:members:
.. autoclass:: Details
:members:
.. autoclass:: Manipulated
:members:
.. autoclass:: Natural
:members:
.. autoclass:: Synthetic
:members:
python-ihm-2.7/docs/startmodel.rst 0000664 0000000 0000000 00000000706 15035733372 0017331 0 ustar 00root root 0000000 0000000 .. highlight:: rest
.. _startmodel_module:
The :mod:`ihm.startmodel` Python module
=======================================
.. automodule:: ihm.startmodel
.. autoclass:: SequenceIdentityDenominator
:members:
.. autoclass:: SequenceIdentity
:members:
.. autoclass:: Template
:members:
.. autoclass:: StartingModel
:members:
.. autoclass:: PDBHelix
:members:
.. autoclass:: SeqDif
:members:
.. autoclass:: MSESeqDif
:members:
python-ihm-2.7/docs/usage.rst 0000664 0000000 0000000 00000020620 15035733372 0016254 0 ustar 00root root 0000000 0000000 Usage
*****
Usage of the library for output consists of first creating a hierarchy of
Python objects that together describe the system, and then dumping that
hierarchy to an mmCIF file.
For a complete worked example, see the
`simple docking example `_.
The top level of the hierarchy in IHM is the :class:`ihm.System`. All other
objects are referenced from a System object.
Datasets
========
Any data used anywhere in the modeling (including in validation) can be
referenced with an :class:`ihm.dataset.Dataset`. For example,
electron microscopy data is referenced with
:class:`ihm.dataset.EMDensityDataset` and small angle scattering data with
:class:`ihm.dataset.SASDataset`.
A dataset uses an
:class:`ihm.location.Location` object to describe where it is stored.
Typically this is an :class:`ihm.location.DatabaseLocation` for something
that's deposited in a experiment-specific database such as PDB, EMDB, PRIDE,
or EMPIAR, or :class:`ihm.location.InputFileLocation` for something that's
stored as a simple file, either on the local disk or at a location described
with a DOI such as `Zenodo `_ or a publication's
supplementary information. See the
`locations example `_
for more examples.
System architecture
===================
The architecture of the system is described with a number of classes:
- :class:`ihm.Entity` describes each unique sequence.
- :class:`ihm.AsymUnit` describes each asymmetric unit (chain) in the system.
For example, a homodimer would consist of two asymmetric units, both
pointing to the same entity, while a heterodimer contains two entities.
It is also possible for an entity to exist with no asymmetric units pointing
to it - this typically corresponds to something seen in an experiment (such
as a cross-linking study) which was not modeled. Note that the IHM
extension currently contains no support for symmetry, so two chains that
are symmetrically related should each be represented as an "asymmetric"
unit.
- :class:`ihm.Assembly` groups asymmetric units and/or entities, or parts of
them. Assemblies are used to describe which parts of the system correspond
to each input source of data, or that were modeled.
- :class:`ihm.representation.Representation` describes how each part of the
system was represented in the modeling, for example
:class:`as atoms ` or
:class:`as coarse-grained spheres `.
Restraints and sampling
=======================
Restraints, that score or otherwise fit the computational model against
the input data, can be created as :class:`ihm.restraint.Restraint` objects.
These generally take as input a :class:`~ihm.dataset.Dataset` pointing to
the input data, and an :class:`~ihm.Assembly` describing which part of the
model the data corresponds to. For example, there are restraints for
:class:`3D EM ` and
:class:`small angle scattering `.
:class:`ihm.protocol.Protocol` objects describe how models were generated
from the input data. A protocol can consist of
:class:`multiple steps `, such as molecular dynamics or
Monte Carlo, followed by one or more analyses, such as clustering, filtering,
rescoring, or validation, described by :class:`ihm.analysis.Analysis` objects.
These objects generally take an :class:`~ihm.Assembly` to indicate what part
of the system was considered and a
:class:`group of datasets ` to show which data
guided the modeling or analysis.
Model coordinates
=================
:class:`ihm.model.Model` objects give the actual coordinates of the final
generated models. These point to the :class:`~ihm.Assembly` of what was
modeled, the :class:`~ihm.protocol.Protocol` describing how the modeling
was done, and the :class:`~ihm.representation.Representation` showing how
the model was represented.
Models can be grouped together for any purpose using the
:class:`ihm.model.ModelGroup` class. If a given group describes an ensemble
of models, the :class:`ihm.model.Ensemble` class allows for additional
information on the ensemble to be provided, such as
:class:`localization densities ` of parts of
the system and precision. Due to size, generally only representative models
of an ensemble are deposited in mmCIF, but the :class:`~ihm.model.Ensemble`
class allows the full ensemble to be referred to, for example in a more
compact binary format (e.g. DCD) deposited at a given DOI. Groups of models
can also be shown as corresponding to different states of the system using
the :class:`ihm.model.State` class.
Metadata
========
Metadata can also be added to the system, such as
- :class:`ihm.Citation`: publication(s) that describe this modeling or the
methods used in it.
- :class:`ihm.Software`: software packages used to process the experimental
data, generate intermediate inputs, do the modeling itself, and/or
process the output.
- :class:`ihm.Grant`: funding support for the modeling.
- :class:`ihm.reference.UniProtSequence`: information on a sequence used
in modeling, in UniProt.
Residue numbering
=================
The library keeps track of several numbering schemes to reflect the reality
of the data used in modeling:
- *Internal numbering*. Residues are always numbered sequentially starting at
1 in an :class:`~ihm.Entity`. All references to residues or residue ranges in
the library use this numbering. For polymers, this internal numbering matches
the ``seq_id`` used in the mmCIF dictionary, while for branched entities,
this matches ``num`` in the dictionary. (For other types of entities
(non-polymers, waters) ``seq_id`` is not used in mmCIF,
but the residues are still numbered sequentially from 1 in this library.)
- *Author-provided numbering*. If a different numbering scheme is used by the
authors, for example to correspond to the numbering of the original sequence
that is modeled, this can be given as an author-provided numbering for
one or more asymmetric units. See the ``auth_seq_id_map`` and
``orig_auth_seq_id_map`` parameters to :class:`~ihm.AsymUnit`. (The mapping
between author-provided and internal numbering is given in tables such
as ``pdbx_poly_seq_scheme`` in the mmCIF file.) Two maps are provided as
PDB provides for two distinct author-provided schemes; the "original"
author-provided numbering ``orig_auth_seq_id_map`` is entirely unrestricted
but is only used internally, while ``auth_seq_id_map`` must follow certain
PDB rules (and generally matches the residue numbers used in legacy PDB
files). In most cases, only ``auth_seq_id_map`` is used.
- *Starting model numbering*. If the initial state of the modeling is given
by one or more PDB files, the numbering of residues in those files may not
line up with the internal numbering. In this case an offset from starting
model numbering to internal numbering can be provided - see the ``offset``
parameter to :class:`~ihm.startmodel.StartingModel`.
- *Reference sequence numbering*. The modeled sequence may differ from that
in a database such as UniProt, which is itself numbered sequentially from 1
(for example, the modeled sequence may be a subset of the UniProt sequence,
such that the first modeled residue is not the first residue in UniProt).
The correspondence between the internal and reference sequences is given
with :class:`ihm.reference.Alignment` objects.
Output
======
Once the hierarchy of classes is complete, it can be freely inspected or
modified. All the classes are simple lightweight Python objects, generally
with the relevant data available as member variables. For example, modeling
packages such as `IMP `_ will typically
generate an IHM hierarchy from their own internal data models, but in many
cases some information relevant to IHM (such as
the :class:`associated publication `) cannot be determined
automatically and can be filled in by adding more objects to the hierarchy.
The complete hierarchy can be written out to an mmCIF or BinaryCIF file using
the :func:`ihm.dumper.write` function.
Input
=====
Hierarchies of IHM classes can also be read from mmCIF or BinaryCIF files.
This is done using the :func:`ihm.reader.read` function, which returns a list of
:class:`ihm.System` objects.
python-ihm-2.7/examples/ 0000775 0000000 0000000 00000000000 15035733372 0015304 5 ustar 00root root 0000000 0000000 python-ihm-2.7/examples/atom_reader.c 0000664 0000000 0000000 00000007031 15035733372 0017733 0 ustar 00root root 0000000 0000000 /* This is a simple demonstration of using the C mmCIF parser
directly from C code. It will read the named mmCIF file and
print the name and coordinates of each atom in the file.
It is probably most instructive to read the comments in this file starting
at the bottom (main function) and working back up.
Compile with something like
gcc -g -Wall atom_reader.c ../src/ihm_format.c -I ../src/ -o atom_reader
*/
#include
#include
#include
#include
#include "ihm_format.h"
/* Data that is passed to our callback function */
struct atom_site_data {
struct ihm_keyword *id, *x, *y, *z;
};
/* Callback function called for each data item in atom_site */
static void atom_site_handler(struct ihm_reader *reader, void *data,
struct ihm_error **err)
{
struct atom_site_data *ad = data;
/* Here we assume that data is actually present in the file for each keyword.
More generally, we should query the in_file, omitted, and unknown flags
in the ihm_keyword struct to handle missing keywords or those that have
the '.' or '?' values, respectively */
printf("Atom %s at %s,%s,%s\n", ad->id->data, ad->x->data, ad->y->data,
ad->z->data);
}
/* Register a callback function with the ihm_reader to handle the atom_site
category */
static void add_atom_site_handler(struct ihm_reader *reader)
{
struct atom_site_data *data = malloc(sizeof(struct atom_site_data));
/* Register a callback for the atom_site category. 'data' will be passed
to it (and 'data' will be freed with 'free' when we're done) */
struct ihm_category *c = ihm_category_new(reader, "_atom_site",
atom_site_handler, NULL, NULL, data,
free);
/* Ask the reader to extract a set of keywords from the atom_site
category. ihm_keywords are stored in the ihm_category and are automatically
freed when no longer needed. The actual values are stored in the
ihm_keyword objects, so we give our callback a pointer to each one so
it can get the values. */
data->id = ihm_keyword_new(c, "label_atom_id");
data->x = ihm_keyword_new(c, "cartn_x");
data->y = ihm_keyword_new(c, "cartn_y");
data->z = ihm_keyword_new(c, "cartn_z");
}
static void read_mmcif_filedesc(int fd)
{
int more_data;
/* IHM error indicator. NULL corresponds to no error. If a function fails
this will be set to non-NULL */
struct ihm_error *err = NULL;
/* Point an ihm_reader object to the file */
struct ihm_file *fh = ihm_file_new_from_fd(fd);
struct ihm_reader *reader = ihm_reader_new(fh);
/* Add callback functions that will handle file data */
add_atom_site_handler(reader);
/* Actually read the file. more_data will be set TRUE on return iff the
file contains more data blocks after this one. */
if (!ihm_read_file(reader, &more_data, &err)) {
fprintf(stderr, "IHM error: %s\n", err->msg);
ihm_error_free(err);
ihm_reader_free(reader);
exit(1);
}
ihm_reader_free(reader);
}
static void read_mmcif_filename(const char *fname)
{
int fd;
printf("Reading atoms from %s\n", fname);
fd = open(fname, O_RDONLY);
if (fd >= 0) {
read_mmcif_filedesc(fd);
close(fd);
} else {
fprintf(stderr, "Could not open %s: %s\n", fname, strerror(errno));
exit(1);
}
}
int main(int argc, char *argv[])
{
if (argc != 2) {
fprintf(stderr, "Usage: atom_reader filename.cif\n");
return 1;
}
read_mmcif_filename(argv[1]);
return 0;
}
python-ihm-2.7/examples/ligands_water.py 0000664 0000000 0000000 00000007341 15035733372 0020506 0 ustar 00root root 0000000 0000000 # This example demonstrates how non-polymeric entities (ligands, water)
# are handled by the Python IHM library. See the simple-docking.py example
# for an introduction to the library.
import ihm
import ihm.dumper
import ihm.protocol
import ihm.representation
import ihm.model
system = ihm.System()
# An entity corresponding to an amino acid (polyalanine) sequence
entity_protein = ihm.Entity('AAA', description='Subunit A')
# An entity corresponding to an RNA sequence
entity_rna = ihm.Entity('ACG', alphabet=ihm.RNAAlphabet,
description='RNA chain')
# An entity corresponding to a DNA sequence
entity_dna = ihm.Entity(['DA', 'DC', 'DG'], alphabet=ihm.DNAAlphabet,
description='DNA chain')
# Non-polymers such as ligands or water should each live in their own Entity:
# A ligand entity (in this case, heme)
heme = ihm.NonPolymerChemComp("HEM", name='PROTOPORPHYRIN IX CONTAINING FE',
formula='C34 H32 Fe N4 O4')
entity_heme = ihm.Entity([heme], description='Heme')
# Water
entity_h2o = ihm.Entity([ihm.WaterChemComp()], description='Water')
system.entities.extend((entity_protein, entity_rna, entity_dna, entity_heme,
entity_h2o))
# Next, we define asymmetric units for everything we modeled.
# Here, we have a single instance of each protein, RNA and DNA, two hemes,
# plus crystal waters. Note that waters must use the WaterAsymUnit class rather
# than AsymUnit, as the number of waters in the unit must be specified.
asym_protein = ihm.AsymUnit(entity_protein, details='Subunit A')
asym_rna = ihm.AsymUnit(entity_rna, details='RNA chain')
asym_dna = ihm.AsymUnit(entity_dna, details='DNA chain')
asym_heme1 = ihm.AsymUnit(entity_heme, details='First heme')
asym_heme2 = ihm.AsymUnit(entity_heme, details='Second heme')
asym_h2o = ihm.WaterAsymUnit(entity_h2o, number=2, details='Crystal waters')
system.asym_units.extend((asym_protein, asym_rna, asym_dna, asym_heme1,
asym_heme2, asym_h2o))
# Just as in the simple-docking.py example, we can add models with coordinates.
# Here we define an atomic model containing just the two hemes and the water.
assembly = ihm.Assembly((asym_heme1, asym_heme2, asym_h2o),
name="Modeled assembly")
rep = ihm.representation.Representation(
[ihm.representation.AtomicSegment(asym_heme1, rigid=False),
ihm.representation.AtomicSegment(asym_heme2, rigid=False),
ihm.representation.AtomicSegment(asym_h2o, rigid=False)])
protocol = ihm.protocol.Protocol(name='Modeling')
class MyModel(ihm.model.Model):
def get_atoms(self):
# seq_id only makes sense for polymers and waters;
# for ligands it should be None
yield ihm.model.Atom(asym_unit=asym_heme1, type_symbol='FE', het=True,
seq_id=None, atom_id='FE', x=0., y=0., z=0.)
yield ihm.model.Atom(asym_unit=asym_heme2, type_symbol='FE', het=True,
seq_id=None, atom_id='FE', x=10., y=10., z=10.)
yield ihm.model.Atom(asym_unit=asym_h2o, type_symbol='O', het=True,
seq_id=1, atom_id='O', x=20., y=20., z=20.)
yield ihm.model.Atom(asym_unit=asym_h2o, type_symbol='O', het=True,
seq_id=2, atom_id='O', x=30., y=30., z=30.)
# We have only a single model in a single state:
model = MyModel(assembly=assembly, protocol=protocol, representation=rep,
name='Best model')
model_group = ihm.model.ModelGroup([model], name='All models')
state = ihm.model.State([model_group])
system.state_groups.append(ihm.model.StateGroup([state]))
# Once the system is complete, we can write it out to an mmCIF file:
with open('output.cif', 'w') as fh:
ihm.dumper.write(fh, [system])
python-ihm-2.7/examples/locations.py 0000664 0000000 0000000 00000006235 15035733372 0017657 0 ustar 00root root 0000000 0000000 # This example demonstrates a variety of ways an IHM mmCIF file can point
# to external resources, such as script files, modeling trajectories, or
# electron microscopy density maps. These may be too big to fit efficiently
# in the mmCIF file, or may already be deposited in an experiment-specific
# database (so it makes no sense to copy them).
import ihm.location
import ihm.dataset
import ihm.dumper
system = ihm.System()
# To point to an external file, we use one of the classes in the ihm.location
# module. Here we reference this Python script itself on the local disk (output
# paths in the mmCIF file will be relative to the current working directory):
loc = ihm.location.WorkflowFileLocation(
"locations.py",
details="The Python script used to generate this mmCIF file")
# Add the location to the system, so it gets output to the mmCIF file
system.locations.append(loc)
# For public mmCIF files, external files need to also be in a public location,
# for example, in an archive file stored at a service such as Zenodo that
# assigns a DOI. To handle this, we use a Repository object:
r = ihm.location.Repository(
doi='10.5281/zenodo.820724',
url='https://zenodo.org/record/820724/files/archive.zip')
loc = ihm.location.OutputFileLocation("densities/subunitA.mrc", repo=r)
system.locations.append(loc)
# Users of the mmCIF can then obtain the file subunitA.mrc by downloading
# archive.zip from the given DOI or URL, unzipping it, and then looking in the
# densities directory. Multiple files can share the same repository.
# Note that this URL is for example purposes only (there isn't really an
# 'archive.zip' at that DOI).
# Datasets are the most common users of external files. For example, to refer
# to an input PDB file in the current directory:
loc = ihm.location.InputFileLocation("simple.pdb", details="Input PDB file")
d = ihm.dataset.PDBDataset(loc)
# Add the dataset to the mmCIF file. (Normally a dataset would be added to the
# object that uses it, such as a restraint. If we want to include a dataset
# that isn't referenced from anything else, as in this example, we can add it
# to the 'orphan' list.)
system.orphan_datasets.append(d)
# Generally, datasets will be deposited in an experiment-specific database.
# We can point to such a database using a subclass of DatabaseLocation, for
# example to point to PDB:
loc = ihm.location.PDBLocation('1abc')
system.orphan_datasets.append(ihm.dataset.PDBDataset(loc))
# If the current working directory is itself a checkout of a repository which
# is archived at a DOI, we can retroactively update all 'local' paths added
# above to point to this DOI. After calling update_locations_in_repositories(),
# all files under the parent directory (..) are assumed to be available in
# the python-ihm.zip archive. For example, simple.pdb can be found as
# python-ihm-v0.1/examples/simple.pdb in the archive.
r = ihm.location.Repository(
doi='10.5281/zenodo.802915',
url='https://zenodo.org/record/802915/files/python-ihm.zip',
top_directory="python-ihm-v0.1", root="..")
system.update_locations_in_repositories([r])
# Write out everything to an mmCIF file
with open('output.cif', 'w') as fh:
ihm.dumper.write(fh, [system])
python-ihm-2.7/examples/mini.cif 0000664 0000000 0000000 00000013035 15035733372 0016725 0 ustar 00root root 0000000 0000000 data_model
#
_exptl.method 'model, MODELLER Version 9.24 2020/08/21 11:54:31'
#
_modeller.version 9.24
#
loop_
_struct_asym.id
_struct_asym.entity_id
_struct_asym.details
A 1 ?
B 2 ?
#
loop_
_entity_poly_seq.entity_id
_entity_poly_seq.num
_entity_poly_seq.mon_id
1 1 VAL
1 2 GLY
1 3 GLN
1 4 GLN
1 5 TYR
1 6 SER
1 7 SER
2 1 ASP
2 2 GLU
#
loop_
_atom_site.group_PDB
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_asym_id
_atom_site.auth_asym_id
_atom_site.label_seq_id
_atom_site.auth_seq_id
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.label_entity_id
_atom_site.id
_atom_site.pdbx_PDB_model_num
ATOM N N . VAL A A 1 2 ? 115.846 27.965 -26.370 1.000 141.830 1 1 1
ATOM C CA . VAL A A 1 2 ? 114.370 27.980 -26.088 1.000 143.490 1 2 1
ATOM C C . VAL A A 1 2 ? 113.517 27.504 -27.287 1.000 143.910 1 3 1
ATOM O O . VAL A A 1 2 ? 113.885 27.746 -28.441 1.000 146.600 1 4 1
ATOM C CB . VAL A A 1 2 ? 113.901 29.406 -25.683 1.000 143.750 1 5 1
ATOM C CG1 . VAL A A 1 2 ? 115.030 30.438 -25.931 1.000 144.590 1 6 1
ATOM C CG2 . VAL A A 1 2 ? 112.669 29.783 -26.486 1.000 144.500 1 7 1
ATOM N N . GLY A A 2 3 ? 112.371 26.869 -27.012 1.000 142.200 1 8 1
ATOM C CA . GLY A A 2 3 ? 111.506 26.368 -28.075 1.000 137.530 1 9 1
ATOM C C . GLY A A 2 3 ? 111.719 24.869 -28.275 1.000 135.820 1 10 1
ATOM O O . GLY A A 2 3 ? 110.768 24.093 -28.268 1.000 134.380 1 11 1
ATOM N N . GLN A A 3 4 ? 112.989 24.479 -28.392 1.000 134.310 1 12 1
ATOM C CA . GLN A A 3 4 ? 113.468 23.113 -28.639 1.000 128.420 1 13 1
ATOM C C . GLN A A 3 4 ? 113.556 22.956 -30.163 1.000 121.240 1 14 1
ATOM O O . GLN A A 3 4 ? 113.552 23.977 -30.840 1.000 127.090 1 15 1
ATOM C CB . GLN A A 3 4 ? 112.614 22.038 -27.919 1.000 132.340 1 16 1
ATOM C CG . GLN A A 3 4 ? 113.028 21.943 -26.407 1.000 135.370 1 17 1
ATOM C CD . GLN A A 3 4 ? 112.604 20.667 -25.677 1.000 138.260 1 18 1
ATOM O OE1 . GLN A A 3 4 ? 112.836 19.543 -26.150 1.000 141.450 1 19 1
ATOM N NE2 . GLN A A 3 4 ? 112.006 20.839 -24.497 1.000 139.310 1 20 1
ATOM N N . GLN A A 4 5 ? 113.648 21.739 -30.710 1.000 124.970 1 21 1
ATOM C CA . GLN A A 4 5 ? 113.808 21.534 -32.168 1.000 117.620 1 22 1
ATOM C C . GLN A A 4 5 ? 114.778 22.519 -32.833 1.000 112.980 1 23 1
ATOM O O . GLN A A 4 5 ? 114.677 23.727 -32.677 1.000 116.850 1 24 1
ATOM C CB . GLN A A 4 5 ? 112.456 21.545 -32.905 1.000 121.870 1 25 1
ATOM C CG . GLN A A 4 5 ? 111.763 20.153 -32.917 1.000 123.750 1 26 1
ATOM C CD . GLN A A 4 5 ? 110.863 19.874 -34.145 1.000 123.650 1 27 1
ATOM O OE1 . GLN A A 4 5 ? 110.040 20.712 -34.537 1.000 122.500 1 28 1
ATOM N NE2 . GLN A A 4 5 ? 111.008 18.674 -34.737 1.000 122.090 1 29 1
ATOM N N . TYR A A 5 6 ? 115.713 21.980 -33.598 1.000 109.460 1 30 1
ATOM C CA . TYR A A 5 6 ? 116.743 22.770 -34.259 1.000 103.700 1 31 1
ATOM C C . TYR A A 5 6 ? 116.348 23.366 -35.602 1.000 100.320 1 32 1
ATOM O O . TYR A A 5 6 ? 115.530 22.799 -36.311 1.000 98.760 1 33 1
ATOM C CB . TYR A A 5 6 ? 117.973 21.876 -34.402 1.000 104.580 1 34 1
ATOM C CG . TYR A A 5 6 ? 119.003 22.282 -35.425 1.000 105.030 1 35 1
ATOM C CD1 . TYR A A 5 6 ? 119.591 23.546 -35.395 1.000 106.020 1 36 1
ATOM C CD2 . TYR A A 5 6 ? 119.450 21.366 -36.380 1.000 105.180 1 37 1
ATOM C CE1 . TYR A A 5 6 ? 120.606 23.890 -36.289 1.000 106.990 1 38 1
ATOM C CE2 . TYR A A 5 6 ? 120.461 21.694 -37.276 1.000 106.420 1 39 1
ATOM C CZ . TYR A A 5 6 ? 121.039 22.958 -37.226 1.000 107.110 1 40 1
ATOM O OH . TYR A A 5 6 ? 122.057 23.290 -38.095 1.000 107.500 1 41 1
ATOM N N . SER A A 6 7 ? 116.921 24.519 -35.944 1.000 96.290 1 42 1
ATOM C CA . SER A A 6 7 ? 116.626 25.161 -37.229 1.000 93.490 1 43 1
ATOM C C . SER A A 6 7 ? 117.900 25.595 -37.944 1.000 91.900 1 44 1
ATOM O O . SER A A 6 7 ? 118.767 26.246 -37.352 1.000 91.810 1 45 1
ATOM C CB . SER A A 6 7 ? 115.732 26.388 -37.048 1.000 93.090 1 46 1
ATOM O OG . SER A A 6 7 ? 116.503 27.521 -36.705 1.000 92.330 1 47 1
ATOM N N . SER A A 7 8 ? 117.999 25.245 -39.224 1.000 89.750 1 48 1
ATOM C CA . SER A A 7 8 ? 119.165 25.590 -40.036 1.000 87.320 1 49 1
ATOM C C . SER A A 7 8 ? 119.224 27.089 -40.277 1.000 84.820 1 50 1
ATOM O O . SER A A 7 8 ? 120.074 27.594 -41.008 1.000 84.020 1 51 1
ATOM C CB . SER A A 7 8 ? 119.112 24.859 -41.383 1.000 88.180 1 52 1
ATOM O OG . SER A A 7 8 ? 117.956 25.221 -42.117 1.000 88.850 1 53 1
ATOM N N . ASP B B 1 3 ? 71.339 57.678 52.031 1.000 152.010 2 54 1
ATOM C CA . ASP B B 1 3 ? 70.427 58.819 51.717 1.000 152.390 2 55 1
ATOM C C . ASP B B 1 3 ? 70.144 58.821 50.222 1.000 151.960 2 56 1
ATOM O O . ASP B B 1 3 ? 70.984 59.245 49.435 1.000 151.590 2 57 1
ATOM C CB . ASP B B 1 3 ? 71.083 60.142 52.119 1.000 153.250 2 58 1
ATOM C CG . ASP B B 1 3 ? 71.660 60.105 53.526 1.000 154.120 2 59 1
ATOM O OD1 . ASP B B 1 3 ? 72.652 59.371 53.741 1.000 154.200 2 60 1
ATOM O OD2 . ASP B B 1 3 ? 71.119 60.804 54.415 1.000 154.250 2 61 1
ATOM N N . GLU B B 2 4 ? 68.956 58.362 49.837 1.000 151.910 2 62 1
ATOM C CA . GLU B B 2 4 ? 68.584 58.274 48.425 1.000 152.090 2 63 1
ATOM C C . GLU B B 2 4 ? 68.584 59.573 47.616 1.000 151.320 2 64 1
ATOM O O . GLU B B 2 4 ? 67.786 59.730 46.686 1.000 150.840 2 65 1
ATOM C CB . GLU B B 2 4 ? 67.218 57.585 48.274 1.000 153.600 2 66 1
ATOM C CG . GLU B B 2 4 ? 66.035 58.328 48.890 1.000 155.740 2 67 1
ATOM C CD . GLU B B 2 4 ? 64.690 57.699 48.526 1.000 156.760 2 68 1
ATOM O OE1 . GLU B B 2 4 ? 64.487 56.498 48.819 1.000 156.940 2 69 1
ATOM O OE2 . GLU B B 2 4 ? 63.835 58.409 47.947 1.000 157.060 2 70 1
HETATM FE FE1 . SF4 C C . 1 ? 14.698 20.785 10.230 1.00 13.78 3 71 1
python-ihm-2.7/examples/non_standard_residues.py 0000664 0000000 0000000 00000004736 15035733372 0022245 0 ustar 00root root 0000000 0000000 # This example demonstrates how to add non-standard residues to
# sequences handled by the Python IHM library. See the simple-docking.py
# example for an introduction to the library.
import ihm
import ihm.dumper
system = ihm.System()
# IHM contains definitions for standard amino and nucleic acids, plus
# a few common non-standard residues such as MSE and UNK.
# To create a new non-standard residue, we first need to create a chemical
# component for it. In this case, we add a definition for norvaline, an
# isomer of valine. IHM provides a ChemComp class for this purpose and a
# number of more specialized subclasses. Since norvaline is a chiral peptide,
# here we define it in its L- form using the LPeptideChemComp class.
#
# 'id' should match the officially defined name of the component, as defined
# in the chemical component dictionary: https://www.wwpdb.org/data/ccd
# (See also https://www3.rcsb.org/ligand/NVA)
# 'code' is used to populate the primary sequence in the output mmCIF file.
# For non-standard residues it should normally match 'id'.
# 'code_canonical' is the one-letter code of the closest standard residue.
# Here we use 'V', valine.
norvaline = ihm.LPeptideChemComp(id='NVA', code='NVA', code_canonical='V',
name='NORVALINE', formula='C5 H11 N O2')
# The Entity constructor takes a sequence of either or both one-letter codes
# and ChemComp objects, so now we can make a sequence containing both
# alanine and norvaline:
entity1 = ihm.Entity(['A', 'A', norvaline, 'A'], description='First entity')
# If a particular non-standard residue is commonly used in your own software,
# and you have assigned a one-letter code for it, you can subclass
# the ihm Alphabet class appropriately. Here we extend the normal set of
# one-letter codes (uppercase) for standard L- amino acids to add 'n' for
# norvaline:
class MyAlphabet(ihm.LPeptideAlphabet):
# Alphabet contains a _comps dictionary that is a simple mapping from
# codes (usually one-letter) to ChemComp objects
_comps = {}
_comps.update(ihm.LPeptideAlphabet._comps)
_comps['n'] = norvaline
# Now we can pass a primary sequence using our custom alphabet to include
# norvaline alongside standard cysteine:
entity2 = ihm.Entity('CCnC', alphabet=MyAlphabet, description="Second entity")
system.entities.extend((entity1, entity2))
# Once the system is complete, we can write it out to an mmCIF file:
with open('output.cif', 'w') as fh:
ihm.dumper.write(fh, [system])
python-ihm-2.7/examples/simple-docking.py 0000664 0000000 0000000 00000015343 15035733372 0020571 0 ustar 00root root 0000000 0000000 # This example demonstrates the use of the Python IHM library to generate
# an mmCIF file for a very simple integrative docking study. Two subunits,
# A and B, each of which is fitted against small angle X-ray (SAXS) data, are
# docked together into a complex, AB, which is fitted against an electron
# microscopy density map.
import ihm
import ihm.location
import ihm.dataset
import ihm.representation
import ihm.restraint
import ihm.protocol
import ihm.model
import ihm.dumper
# First, we create a system, which contains everything we know about the
# modeling. A single mmCIF file can contain multiple Systems, but in most
# cases we use just one:
system = ihm.System()
# Next, we describe the input data we used, using dataset classes.
# Each source of data has a location, such as a file on disk or a database
# entry, and a type. In this example we used EM density data, which we'll
# say lives in the EMDB database:
loc = ihm.location.EMDBLocation('EMDB-1234')
em_dataset = ihm.dataset.EMDensityDataset(loc)
# We also used two SAXS profiles, which we'll say live in SASBDB:
saxsA_dataset = ihm.dataset.SASDataset(ihm.location.SASBDBLocation('SASDB123'))
saxsB_dataset = ihm.dataset.SASDataset(ihm.location.SASBDBLocation('SASDB456'))
# Where datasets are derived from some other data, it is helpful to also point
# back to that primary data. In this case, let's say the EM density was
# derived from a set of EM micrographs, deposited in the EMPIAR database:
m = ihm.dataset.EMMicrographsDataset(ihm.location.EMPIARLocation('EMPIAR-123'))
em_dataset.parents.append(m)
# Next, define the entities for each unique sequence in the system
# (here represented as polyalanines):
entityA = ihm.Entity('AAA', description='Subunit A')
entityB = ihm.Entity('AAAAAA', description='Subunit B')
system.entities.extend((entityA, entityB))
# Next, we define asymmetric units for everything we modeled.
# These roughly correspond to chains in a traditional PDB file. Multiple
# asymmetric units may map to the same entity (for example if there are
# several copies of a given protein). Parts of the system that were seen in
# an experiment but were not modeled are represented as entities to which no
# asymmetric units map.
asymA = ihm.AsymUnit(entityA, details='Subunit A')
asymB = ihm.AsymUnit(entityB, details='Subunit B')
system.asym_units.extend((asymA, asymB))
# Next, we group asymmetric units (and/or entities) into assemblies.
# Here, we'll define an assembly of everything that we modeled, plus
# two subassemblies (of the subunits) that the SAXS data applies to:
modeled_assembly = ihm.Assembly((asymA, asymB), name='Modeled assembly')
assemblyA = ihm.Assembly((asymA,), name='Subunit A')
assemblyB = ihm.Assembly((asymB,), name='Subunit B')
# Define how the system was represented. Multiple representations of the
# system are possible, and can overlap. Here we'll say we represent A
# atomically as a rigid body and B as 3 flexible coarse-grained spheres:
rep = ihm.representation.Representation(
[ihm.representation.AtomicSegment(asymA, rigid=True),
ihm.representation.FeatureSegment(asymB, rigid=False,
primitive='sphere', count=3)])
# Set up restraints on the system. First, two on the subunits that use
# the SAXS data; we'll say we used the FoXS software to do this fit:
saxsA_rsr = ihm.restraint.SASRestraint(
dataset=saxsA_dataset, assembly=assemblyA,
fitting_method='FoXS', fitting_atom_type='Heavy atoms')
saxsB_rsr = ihm.restraint.SASRestraint(
dataset=saxsB_dataset, assembly=assemblyB,
fitting_method='FoXS', fitting_atom_type='Heavy atoms')
system.restraints.extend((saxsA_rsr, saxsB_rsr))
# Next, the EM restraint applied to the entire system:
em_rsr = ihm.restraint.EM3DRestraint(
dataset=em_dataset, assembly=modeled_assembly)
system.restraints.append(em_rsr)
# Now we add information about how the modeling was done by defining one
# or more protocols. Here we'll say we did simple Monte Carlo on the entire
# system using all of the experimental data:
all_datasets = ihm.dataset.DatasetGroup((em_dataset, saxsA_dataset,
saxsB_dataset))
protocol = ihm.protocol.Protocol(name='Modeling')
protocol.steps.append(ihm.protocol.Step(
assembly=modeled_assembly, dataset_group=all_datasets,
method='Monte Carlo', name='Production sampling',
num_models_begin=0, num_models_end=1000, multi_scale=True))
# Finally we can add coordinates for the deposited models. Typically these
# will be stored in our own software's data structures somewhere (for this
# example in simple lists 'atoms' and 'spheres'):
atoms = [('A', 1, 'C', 'CA', 1., 2., 3.),
('A', 2, 'C', 'CA', 4., 5., 6.),
('A', 3, 'C', 'CA', 7., 8., 9.)]
spheres = [('B', 1, 2, 1., 2., 3., 1.2),
('B', 3, 4, 4., 5., 6., 1.2),
('B', 5, 6, 7., 8., 9., 1.2)]
# Rather than storing another copy of the coordinates in the IHM library
# (which could use a lot of memory), we need to provide a mechanism to
# translate them into the IHM data model. We do this straightforwardly by
# subclassing the IHM Model class and overriding the get_atoms
# and get_spheres methods:
class MyModel(ihm.model.Model):
# Map our asym unit names A and B to IHM asym_unit objects:
asym_unit_map = {'A': asymA, 'B': asymB}
def get_atoms(self):
for asym, seq_id, type_symbol, atom_id, x, y, z in atoms:
yield ihm.model.Atom(asym_unit=self.asym_unit_map[asym],
type_symbol=type_symbol, seq_id=seq_id,
atom_id=atom_id, x=x, y=y, z=z)
def get_spheres(self):
for asym, seq_id_start, seq_id_end, x, y, z, radius in spheres:
yield ihm.model.Sphere(asym_unit=self.asym_unit_map[asym],
seq_id_range=(seq_id_start, seq_id_end),
x=x, y=y, z=z, radius=radius)
model = MyModel(assembly=modeled_assembly, protocol=protocol,
representation=rep, name='Best scoring model')
# Note that the model was scored against all three restraints
saxsA_rsr.fits[model] = ihm.restraint.SASRestraintFit(chi_value=1.4)
saxsB_rsr.fits[model] = ihm.restraint.SASRestraintFit(chi_value=2.1)
em_rsr.fits[model] = ihm.restraint.EM3DRestraintFit(
cross_correlation_coefficient=0.9)
# Similar models can be grouped together. Here we only have a single model
# in the group
model_group = ihm.model.ModelGroup([model], name='All models')
# Groups are then placed into states, which can in turn be grouped. In this
# case we have only a single state:
state = ihm.model.State([model_group])
system.state_groups.append(ihm.model.StateGroup([state]))
# Once the system is complete, we can write it out to an mmCIF file:
with open('output.cif', 'w') as fh:
ihm.dumper.write(fh, [system])
python-ihm-2.7/examples/simple.pdb 0000664 0000000 0000000 00000002420 15035733372 0017262 0 ustar 00root root 0000000 0000000 ATOM 1 N ALA 1 17.807 17.608 5.019 1.00 17.18 5FD1 135
ATOM 2 CA ALA 1 17.121 17.162 6.197 1.00 15.60 5FD1 136
ATOM 3 C ALA 1 18.085 17.018 7.343 1.00 14.54 5FD1 137
ATOM 4 O ALA 1 19.244 16.654 7.119 1.00 15.42 5FD1 138
ATOM 5 CB ALA 1 16.496 15.827 5.961 1.00 16.91 5FD1 139
ATOM 6 N PHE 2 17.637 17.305 8.563 1.00 14.35 5FD1 140
ATOM 7 CA PHE 2 18.425 17.005 9.748 1.00 14.39 5FD1 141
ATOM 8 C PHE 2 17.911 15.673 10.298 1.00 12.39 5FD1 142
ATOM 9 O PHE 2 16.799 15.252 9.994 1.00 12.59 5FD1 143
ATOM 10 CB PHE 2 18.304 18.163 10.740 1.00 13.38 5FD1 144
ATOM 11 CG PHE 2 19.393 19.213 10.475 1.00 14.14 5FD1 145
ATOM 12 CD1 PHE 2 19.373 19.980 9.320 1.00 14.21 5FD1 146
ATOM 13 CD2 PHE 2 20.410 19.419 11.375 1.00 14.04 5FD1 147
ATOM 14 CE1 PHE 2 20.346 20.929 9.090 1.00 13.22 5FD1 148
ATOM 15 CE2 PHE 2 21.378 20.374 11.132 1.00 14.13 5FD1 149
ATOM 16 CZ PHE 2 21.361 21.133 9.992 1.00 13.09 5FD1 150
python-ihm-2.7/examples/stream_parser.py 0000664 0000000 0000000 00000002603 15035733372 0020526 0 ustar 00root root 0000000 0000000 # This example demonstrates the use of the Python IHM library at a low
# level, to parse an mmCIF file and extract a subset of its data.
# This particular example just extracts the atomic coordinates.
import ihm.format
# Make an object to handle a given mmCIF category in the file; it will
# be called for each line in the loop construct.
class AtomSiteHandler(object):
# If a given keyword is not in the file, or has the special
# mmCIF omitted (.) or unknown (?) value, the corresponding argument
# to __call__ will be given these values:
not_in_file = omitted = None
unknown = ihm.unknown
# Extract the group_PDB, Cartn_x, Cartn_y, Cartn_z keywords from
# the mmCIF category (mmCIF keywords are case-insensitive, but the
# Python arguments here should be lowercase).
def __call__(self, group_pdb, cartn_x, cartn_y, cartn_z):
if group_pdb == 'ATOM':
print("Atom at %s, %s, %s" % (cartn_x, cartn_y, cartn_z))
ash = AtomSiteHandler()
with open('mini.cif') as fh:
# Extract keywords from the _atom_site mmCIF category using the
# AtomSiteHandler defined above
c = ihm.format.CifReader(fh, category_handler={'_atom_site': ash})
# Read the first data block in mini.cif
# (This will return True as long as there are more blocks, so it can
# be put in a while loop instead if you want to read all data blocks.)
c.read_file()
python-ihm-2.7/examples/token_reader.py 0000664 0000000 0000000 00000002604 15035733372 0020322 0 ustar 00root root 0000000 0000000 # This example demonstrates the use of the Python IHM library at a very
# low level, to perform housekeeping tasks on an mmCIF file without
# making large changes to its structure, and preserving whitespace,
# case, and comments.
# Note that unlike higher-level interfaces, the tokenizer can generate
# invalid mmCIF if used incorrectly. It is recommended that the resulting
# mmCIF files are run through a validator, as in the `validate_pdb_dev.py`
# example.
import ihm.format
filters = [
# Change chain ID 'B' to 'Z' by altering the _struct_asym table
ihm.format.ChangeValueFilter('_struct_asym.id', old='B', new='Z'),
# Note that the tokenizer does not parse parent-child relationships
# or understand the underlying dictionary. So we must also change other
# tables that reference chain IDs. Here we change the label_asym_id keyword
# in *any* table (typically in _atom_site).
ihm.format.ChangeValueFilter('.label_asym_id', old='B', new='Z'),
# Remove the non-standard _modeller.version data item from the file
ihm.format.RemoveItemFilter('_modeller.version')]
# Read the input file as a set of tokens, modify them using the filters
# above, and write a new file:
with open('mini.cif') as fh_in:
r = ihm.format.CifTokenReader(fh_in)
with open('output.cif', 'w') as fh_out:
for token in r.read_file(filters):
fh_out.write(token.as_mmcif())
python-ihm-2.7/examples/validate_pdb_ihm.py 0000664 0000000 0000000 00000003646 15035733372 0021142 0 ustar 00root root 0000000 0000000 # This example demonstrates the use of the Python IHM library's validator.
# A structure is downloaded from the PDB-IHM database and checked against
# the PDBx and IHM dictionaries for compliance. This validator can be used
# to perform basic integrity checking against any mmCIF dictionary; for an
# example of using it to validate homology models against the ModelCIF
# dictionary, see
# https://github.com/ihmwg/python-modelcif/blob/main/examples/validate_modbase.py.
import io
import ihm.reader
import ihm.dictionary
import urllib.request
# Read in the PDBx dictionary from wwPDB as a Dictionary object
fh = urllib.request.urlopen(
'http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic')
d_pdbx = ihm.dictionary.read(fh)
fh.close()
# Also read in the IHM dictionary
fh = urllib.request.urlopen(
'http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_ihm.dic')
d_ihm = ihm.dictionary.read(fh)
fh.close()
# Deposited integrative models should conform to both the PDBx dictionary
# (used to define basic structural information such as residues and chains)
# and the IHM dictionary (used for information specific to integrative
# modeling). Make a dictionary that combines the PDBx and IHM dictionaries
# using the + operator.
pdbx_ihm = d_pdbx + d_ihm
# Validate a structure against PDBx+IHM.
# A correct structure here should result in no output; an invalid structure
# will result in a ValidatorError Python exception.
# Here, a structure from PDB-IHM (which should be valid) is used.
acc = '8zz1'
cif = urllib.request.urlopen('https://pdb-ihm.org/cif/%s.cif' % acc).read()
# The encoding for mmCIF files isn't strictly defined, so first try UTF-8
# and if that fails, strip out any non-ASCII characters. This ensures that
# we handle accented characters in string fields correctly.
try:
fh = io.StringIO(cif.decode('utf-8'))
except UnicodeDecodeError:
fh = io.StringIO(cif.decode('ascii', errors='ignore'))
pdbx_ihm.validate(fh)
python-ihm-2.7/ihm/ 0000775 0000000 0000000 00000000000 15035733372 0014243 5 ustar 00root root 0000000 0000000 python-ihm-2.7/ihm/__init__.py 0000664 0000000 0000000 00000236325 15035733372 0016367 0 ustar 00root root 0000000 0000000 """Representation of an IHM mmCIF file as a set of Python classes.
Generally class names correspond to mmCIF table names and class
attributes to mmCIF attributes (with prefixes like `pdbx_` stripped).
For example, the data item _entity.details is found in the
:class:`Entity` class, as the `details` member.
Ordinals and IDs are generally not used in this representation (instead,
pointers to objects are used).
"""
import itertools
import numbers
import re
import sys
import urllib.request
import json
import collections
from . import util
__version__ = '2.7'
class __UnknownValue:
# Represent the mmCIF 'unknown' special value
def __str__(self):
return '?'
__repr__ = __str__
def __bool__(self):
return False
# Needs to be hashable so that classes like Software (that might
# use unknown values as attributes) are hashable
def __hash__(self):
return 0
# Unknown value is a singleton and should only compare equal to itself
def __eq__(self, other):
return self is other
def __lt__(self, other):
return False
__gt__ = __lt__
__le__ = __ge__ = __eq__
#: A value that isn't known. Note that this is distinct from a value that
#: is deliberately omitted, which is represented by Python None.
unknown = __UnknownValue()
def _remove_identical(gen):
"""Return only unique objects from `gen`.
Objects that are identical are only returned once, although multiple
non-identical objects that compare equal may be returned."""
seen_objs = {}
for obj in gen:
if id(obj) in seen_objs:
continue
seen_objs[id(obj)] = None
yield obj
class System:
"""Top-level class representing a complete modeled system.
:param str title: Title (longer text description) of the system.
:param str id: Unique identifier for this system in the mmCIF file.
:param str model_details: Detailed description of the system, like an
abstract.
:param databases: If this system is part of one or more official
databases (e.g. PDB, SwissModel), details of
the database identifiers.
:type databases: sequence of :class:`Database`
"""
structure_determination_methodology = "integrative"
def __init__(self, title=None, id='model', model_details=None,
databases=[]):
self.id = id
self.title = title
self.model_details = model_details
self.databases = []
self.databases.extend(databases)
#: Information about data processing and entry status.
#: See :class:`DatabaseStatus`.
self.database_status = DatabaseStatus()
#: List of plain text comments. These will be added to the top of
#: the mmCIF file.
self.comments = []
#: List of all software used in the modeling. See :class:`Software`.
self.software = []
#: List of all authors of this system, as a list of strings (last name
#: followed by initials, e.g. "Smith, A.J."). When writing out a file,
#: if this list is empty, the set of all citation authors (see
#: :class:`Citation`) is used instead.
self.authors = []
#: List of all grants that supported this work. See :class:`Grant`.
self.grants = []
#: List of all citations. See :class:`Citation`.
self.citations = []
#: All entities used in the system. See :class:`Entity`.
self.entities = []
#: All asymmetric units used in the system. See :class:`AsymUnit`.
self.asym_units = []
#: Collections (if any) to which this entry belongs.
#: These are used to group depositions of related entries.
#: See :class:`Collection`.
self.collections = []
#: Revision/update history. See :class:`Revision`.
self.revisions = []
#: Information on usage of the data. See :class:`DataUsage`.
self.data_usage = []
#: All orphaned chemical descriptors in the system.
#: See :class:`ChemDescriptor`. This can be used to track descriptors
#: that are not otherwise used - normally one is assigned to a
#: :class:`ihm.restraint.CrossLinkRestraint`.
self.orphan_chem_descriptors = []
#: All orphaned assemblies in the system. See :class:`Assembly`.
#: This can be used to keep track of all assemblies that are not
#: otherwise used - normally one is assigned to a
#: :class:`~ihm.model.Model`,
#: :class:`ihm.protocol.Step`, or
#: :class:`~ihm.restraint.Restraint`.
self.orphan_assemblies = []
#: The assembly of the entire system. By convention this is always
#: the first assembly in the mmCIF file (assembly_id=1). Note that
#: currently this isn't filled in on output until dumper.write()
#: is called. See :class:`Assembly`.
self.complete_assembly = Assembly((), name='Complete assembly',
description='All known components')
#: Locations of all extra resources.
#: See :class:`~ihm.location.Location`.
self.locations = []
#: All orphaned datasets.
#: This can be used to keep track of all datasets that are not
#: otherwise used - normally a dataset is assigned to a
#: :class:`~ihm.dataset.DatasetGroup`,
#: :class:`~ihm.startmodel.StartingModel`,
#: :class:`~ihm.restraint.Restraint`,
#: :class:`~ihm.startmodel.Template`,
#: or as the parent of another :class:`~ihm.dataset.Dataset`.
#: See :class:`~ihm.dataset.Dataset`.
self.orphan_datasets = []
#: All orphaned groups of datasets.
#: This can be used to keep track of all dataset groups that are not
#: otherwise used - normally a group is assigned to a
#: :class:`~ihm.protocol.Protocol`.
#: See :class:`~ihm.dataset.DatasetGroup`.
self.orphan_dataset_groups = []
#: All orphaned representations of the system.
#: This can be used to keep track of all representations that are not
#: otherwise used - normally one is assigned to a
#: :class:`~ihm.model.Model`.
#: See :class:`~ihm.representation.Representation`.
self.orphan_representations = []
#: All orphaned starting models for the system.
#: This can be used to keep track of all starting models that are not
#: otherwise used - normally one is assigned to an
#: :class:`ihm.representation.Segment`.
#: See :class:`~ihm.startmodel.StartingModel`.
self.orphan_starting_models = []
#: All restraints on the system.
#: See :class:`~ihm.restraint.Restraint`.
self.restraints = []
#: All restraint groups.
#: See :class:`~ihm.restraint.RestraintGroup`.
self.restraint_groups = []
#: All orphaned modeling protocols.
#: This can be used to keep track of all protocols that are not
#: otherwise used - normally a protocol is assigned to a
#: :class:`~ihm.model.Model`.
#: See :class:`~ihm.protocol.Protocol`.
self.orphan_protocols = []
#: All ensembles.
#: See :class:`~ihm.model.Ensemble`.
self.ensembles = []
#: All ordered processes.
#: See :class:`~ihm.model.OrderedProcess`.
self.ordered_processes = []
#: All state groups (collections of models).
#: See :class:`~ihm.model.StateGroup`.
self.state_groups = []
#: All orphaned geometric objects.
#: This can be used to keep track of all objects that are not
#: otherwise used - normally an object is assigned to a
#: :class:`~ihm.restraint.GeometricRestraint`.
#: See :class:`~ihm.geometry.GeometricObject`.
self.orphan_geometric_objects = []
#: All orphaned features.
#: This can be used to keep track of all features that are not
#: otherwise used - normally a feature is assigned to a
#: :class:`~ihm.restraint.GeometricRestraint`.
#: See :class:`~ihm.restraint.Feature`.
self.orphan_features = []
#: All orphaned pseudo sites.
#: This can be used to keep track of all pseudo sites that are not
#: otherwise used - normally a site is used in a
#: :class:`~ihm.restraint.PseudoSiteFeature` or a
#: :class:`~ihm.restraint.CrossLinkPseudoSite`.
self.orphan_pseudo_sites = []
#: Contains the fluorescence (FLR) part.
#: See :class:`~ihm.flr.FLRData`.
self.flr_data = []
#: All multi-state schemes
#: See :class:`~ihm.multi_state_scheme.MultiStateScheme`.
self.multi_state_schemes = []
self._orphan_centers = []
self._orphan_dataset_transforms = []
self._orphan_geometric_transforms = []
self._orphan_relaxation_times = []
self._orphan_repos = []
self._orphan_chem_comps = []
_database_status = property(lambda self: self.database_status._map)
def _make_complete_assembly(self):
"""Fill in the complete assembly with all asym units"""
# Clear out any existing components
self.complete_assembly[:] = []
# Include all asym units
for asym in self.asym_units:
self.complete_assembly.append(asym)
def _all_models(self):
"""Iterate over all Models in the system"""
# todo: raise an error if a model is present in multiple groups
for group in self._all_model_groups():
seen_models = {}
for model in group:
if model in seen_models:
continue
seen_models[model] = None
yield group, model
def update_locations_in_repositories(self, repos):
"""Update all :class:`~ihm.location.Location` objects in the system
that lie within a checked-out :class:`~ihm.location.Repository`
to point to that repository.
This is intended for the use case where the current working
directory is a checkout of a repository which is archived somewhere
with a DOI. Locations can then be simply constructed pointing to
local files, and retroactively updated with this method to point
to the DOI if appropriate.
For each Location, if it points to a local file that is below the
`root` of one of the `repos`, update it to point to that repository.
If is under multiple roots, pick the one that gives the shortest
path. For example, if run in a subdirectory `foo` of a repository
archived as `repo.zip`, the local path `simple.pdb` will
be updated to be `repo-top/foo/simple.pdb` in `repo.zip`::
l = ihm.location.InputFileLocation("simple.pdb")
system.locations.append(l)
r = ihm.location.Repository(doi='1.2.3.4',
url='https://example.com/repo.zip',)
top_directory="repo-top", root="..")
system.update_locations_in_repositories([r])
"""
import ihm.location
for loc in self._all_locations():
if isinstance(loc, ihm.location.FileLocation):
ihm.location.Repository._update_in_repos(loc, repos)
def report(self, fh=sys.stdout):
"""Print a summary report of this system. This can be used to
more easily spot errors or inconsistencies. It will also warn
about missing data that may not be technically required for a
compliant mmCIF file, but is usually expected to be present.
:param file fh: The file handle to print the report to, if not
standard output.
"""
import ihm.report
r = ihm.report.Reporter(self, fh)
r.report()
def _all_restraints(self):
"""Iterate over all Restraints in the system.
Duplicates may be present."""
def _all_restraints_in_groups():
for rg in self.restraint_groups:
for r in rg:
yield r
return itertools.chain(self.restraints, _all_restraints_in_groups())
def _all_chem_descriptors(self):
"""Iterate over all ChemDescriptors in the system.
Duplicates may be present."""
return itertools.chain(
self.orphan_chem_descriptors,
(restraint.linker for restraint in self._all_restraints()
if hasattr(restraint, 'linker') and restraint.linker),
(itertools.chain.from_iterable(
f._all_flr_chemical_descriptors() for f in self.flr_data)))
def _all_model_groups(self, only_in_states=True):
"""Iterate over all ModelGroups in the system.
If only_in_states is True, only return ModelGroups referenced
by a State object; otherwise, also include ModelGroups referenced
by an OrderedProcess or Ensemble."""
# todo: raise an error if a modelgroup is present in multiple states
seen_model_groups = []
for state_group in self.state_groups:
for state in state_group:
for model_group in state:
seen_model_groups.append(model_group)
yield model_group
for mssc in self._all_multi_state_scheme_connectivities():
for model_group in mssc.begin_state:
if model_group not in seen_model_groups:
seen_model_groups.append(model_group)
yield model_group
if mssc.end_state:
for model_group in mssc.end_state:
if model_group not in seen_model_groups:
seen_model_groups.append(model_group)
yield model_group
if not only_in_states:
for ensemble in self.ensembles:
if ensemble.model_group:
yield ensemble.model_group
for ss in ensemble.subsamples:
if ss.model_group:
yield ss.model_group
for proc in self.ordered_processes:
for step in proc.steps:
for edge in step:
yield edge.group_begin
yield edge.group_end
def _all_representations(self):
"""Iterate over all Representations in the system.
This includes all Representations referenced from other objects,
plus any orphaned Representations. Duplicates are filtered out."""
return _remove_identical(itertools.chain(
self.orphan_representations,
(model.representation for group, model in self._all_models()
if model.representation)))
def _all_segments(self):
for representation in self._all_representations():
for segment in representation:
yield segment
def _all_starting_models(self):
"""Iterate over all StartingModels in the system.
This includes all StartingModels referenced from other objects, plus
any orphaned StartingModels. Duplicates are filtered out."""
return _remove_identical(itertools.chain(
self.orphan_starting_models,
(segment.starting_model for segment in self._all_segments()
if segment.starting_model)))
def _all_protocols(self):
"""Iterate over all Protocols in the system.
This includes all Protocols referenced from other objects, plus
any orphaned Protocols. Duplicates are filtered out."""
return _remove_identical(itertools.chain(
self.orphan_protocols,
(model.protocol for group, model in self._all_models()
if model.protocol)))
def _all_protocol_steps(self):
for protocol in self._all_protocols():
for step in protocol.steps:
yield step
def _all_analysis_steps(self):
for protocol in self._all_protocols():
for analysis in protocol.analyses:
for step in analysis.steps:
yield step
def _all_assemblies(self):
"""Iterate over all Assemblies in the system.
This includes all Assemblies referenced from other objects, plus
any orphaned Assemblies. Duplicates may be present."""
return itertools.chain(
# Complete assembly is always first
(self.complete_assembly,),
self.orphan_assemblies,
(model.assembly for group, model in self._all_models()
if model.assembly),
(step.assembly for step in self._all_protocol_steps()
if step.assembly),
(step.assembly for step in self._all_analysis_steps()
if step.assembly),
(restraint.assembly
for restraint in self._all_restraints() if restraint.assembly))
def _all_dataset_groups(self):
"""Iterate over all DatasetGroups in the system.
This includes all DatasetGroups referenced from other objects, plus
any orphaned groups. Duplicates may be present."""
return itertools.chain(
self.orphan_dataset_groups,
(step.dataset_group for step in self._all_protocol_steps()
if step.dataset_group),
(step.dataset_group for step in self._all_analysis_steps()
if step.dataset_group),
(rt.dataset_group for rt in self._all_relaxation_times()
if rt.dataset_group),
(kr.dataset_group for kr in self._all_kinetic_rates()
if kr.dataset_group),
(mssc.dataset_group for mssc in
self._all_multi_state_scheme_connectivities()
if mssc.dataset_group))
def _all_templates(self):
"""Iterate over all Templates in the system."""
for startmodel in self._all_starting_models():
for template in startmodel.templates:
yield template
def _all_datasets_except_parents(self):
"""Iterate over all Datasets except those referenced only
as the parent of another Dataset. Duplicates may be present."""
def _all_datasets_in_groups():
for dg in self._all_dataset_groups():
for d in dg:
yield d
return itertools.chain(
self.orphan_datasets,
_all_datasets_in_groups(),
(sm.dataset for sm in self._all_starting_models()
if sm.dataset),
(restraint.dataset for restraint in self._all_restraints()
if restraint.dataset),
(template.dataset for template in self._all_templates()
if template.dataset))
def _all_datasets(self):
"""Iterate over all Datasets in the system.
This includes all Datasets referenced from other objects, plus
any orphaned datasets. Duplicates may be present."""
def _all_datasets_and_parents(d):
for p in d.parents:
# Handle transformed datasets
if hasattr(p, 'dataset'):
pd = p.dataset
else:
pd = p
for alld in _all_datasets_and_parents(pd):
yield alld
yield d
for d in self._all_datasets_except_parents():
for alld in _all_datasets_and_parents(d):
yield alld
def _all_densities(self):
for ensemble in self.ensembles:
for density in ensemble.densities:
yield density
def _all_locations(self):
"""Iterate over all Locations in the system.
This includes all Locations referenced from other objects, plus
any referenced from the top-level system.
Duplicates may be present."""
def _all_ensemble_locations():
for ensemble in self.ensembles:
if ensemble.file:
yield ensemble.file
for ss in ensemble.subsamples:
if ss.file:
yield ss.file
return itertools.chain(
self.locations,
(dataset.location for dataset in self._all_datasets()
if hasattr(dataset, 'location') and dataset.location),
_all_ensemble_locations(),
(density.file for density in self._all_densities()
if density.file),
(sm.script_file for sm in self._all_starting_models()
if sm.script_file),
(template.alignment_file for template in self._all_templates()
if template.alignment_file),
(step.script_file for step in self._all_protocol_steps()
if step.script_file),
(step.script_file for step in self._all_analysis_steps()
if step.script_file),
(rt.external_file for rt in self._all_relaxation_times()
if rt.external_file),
(kr.external_file for kr in self._all_kinetic_rates()
if kr.external_file))
def _all_geometric_objects(self):
"""Iterate over all GeometricObjects in the system.
This includes all GeometricObjects referenced from other objects,
plus any referenced from the top-level system.
Duplicates may be present."""
return itertools.chain(
self.orphan_geometric_objects,
(restraint.geometric_object
for restraint in self._all_restraints()
if hasattr(restraint, 'geometric_object')
and restraint.geometric_object))
def _all_features(self):
"""Iterate over all Features in the system.
This includes all Features referenced from other objects,
plus any referenced from the top-level system.
Duplicates may be present."""
def _all_restraint_features():
for r in self._all_restraints():
if hasattr(r, '_all_features'):
for feature in r._all_features:
if feature:
yield feature
return itertools.chain(self.orphan_features, _all_restraint_features())
def _all_pseudo_sites(self):
"""Iterate over all PseudoSites in the system.
This includes all PseudoSites referenced from other objects,
plus any referenced from the top-level system.
Duplicates may be present."""
def _all_restraint_sites():
for r in self._all_restraints():
if hasattr(r, 'cross_links'):
for xl in r.cross_links:
if xl.pseudo1:
for x in xl.pseudo1:
yield x.site
if xl.pseudo2:
for x in xl.pseudo2:
yield x.site
return itertools.chain(self.orphan_pseudo_sites,
_all_restraint_sites(),
(f.site for f in self._all_features()
if hasattr(f, 'site') and f.site))
def _all_software(self):
"""Iterate over all Software in the system.
This includes all Software referenced from other objects, plus
any referenced from the top-level system.
Duplicates may be present."""
return (itertools.chain(
self.software,
(sm.software for sm in self._all_starting_models()
if sm.software),
(step.software for step in self._all_protocol_steps()
if step.software),
(step.software for step in self._all_analysis_steps()
if step.software),
(r.software for r in self._all_restraints()
if hasattr(r, 'software') and r.software)))
def _all_citations(self):
"""Iterate over all Citations in the system.
This includes all Citations referenced from other objects, plus
any referenced from the top-level system.
Duplicates are filtered out."""
return _remove_identical(itertools.chain(
self.citations,
(software.citation for software in self._all_software()
if software.citation),
(restraint.fitting_method_citation_id
for restraint in self._all_restraints()
if hasattr(restraint, 'fitting_method_citation_id')
and restraint.fitting_method_citation_id)))
def _all_entity_ranges(self):
"""Iterate over all Entity ranges in the system (these may be
:class:`Entity`, :class:`AsymUnit`, :class:`EntityRange` or
:class:`AsymUnitRange` objects).
Note that we don't include self.entities or self.asym_units here,
as we only want ranges that were actually used.
Duplicates may be present."""
return (itertools.chain(
(sm.asym_unit for sm in self._all_starting_models()),
(seg.asym_unit for seg in self._all_segments()),
(comp for a in self._all_assemblies() for comp in a),
(comp for f in self._all_features()
for comp in f._all_entities_or_asyms()),
(d.asym_unit for d in self._all_densities())))
def _all_multi_state_schemes(self):
for mss in self.multi_state_schemes:
yield mss
def _all_multi_state_scheme_connectivities(self):
"""Iterate over all multi-state scheme connectivities"""
for mss in self.multi_state_schemes:
for mssc in mss.get_connectivities():
yield mssc
def _all_kinetic_rates(self):
"""Iterate over all kinetic rates within multi-state schemes"""
return _remove_identical(itertools.chain(
(mssc.kinetic_rate for mssc in
self._all_multi_state_scheme_connectivities()
if mssc.kinetic_rate),
(c.kinetic_rate for f in
self.flr_data for c in f.kinetic_rate_fret_analysis_connections
if self.flr_data)))
def _all_relaxation_times(self):
"""Iterate over all relaxation times.
This includes relaxation times from
:class:`ihm.multi_state_scheme.MultiStateScheme`
and those assigned to connectivities in
:class:`ihm.multi_state_scheme.Connectivity`"""
seen_relaxation_times = []
for mss in self._all_multi_state_schemes():
for rt in mss.get_relaxation_times():
if rt in seen_relaxation_times:
continue
seen_relaxation_times.append(rt)
yield rt
for mssc in self._all_multi_state_scheme_connectivities():
if mssc.relaxation_time:
rt = mssc.relaxation_time
if rt in seen_relaxation_times:
continue
seen_relaxation_times.append(rt)
yield rt
# Get the relaxation times from the
# flr.RelaxationTimeFRETAnalysisConnection objects
if self.flr_data:
for f in self.flr_data:
for c in f.relaxation_time_fret_analysis_connections:
rt = c.relaxation_time
if rt in seen_relaxation_times:
continue
seen_relaxation_times.append(rt)
yield rt
for rt in self._orphan_relaxation_times:
if rt in seen_relaxation_times:
continue
seen_relaxation_times.append(rt)
yield rt
def _before_write(self):
"""Do any setup necessary before writing out to a file"""
# Here, we initialize all RestraintGroups by removing any assigned ID
for g in self.restraint_groups:
util._remove_id(g)
# Fill in complete assembly
self._make_complete_assembly()
def _check_after_write(self):
"""Make sure everything was successfully written"""
# Here, we check that all RestraintGroups were successfully dumped"""
for g in self.restraint_groups:
if len(g) > 0 and not hasattr(g, '_id'):
raise TypeError(
"RestraintGroup(%s) contains an unsupported combination "
"of Restraints. Due to limitations of the underlying "
"dictionary, all objects in a RestraintGroup must be of "
"the same type, and only certain types (currently only "
"DerivedDistanceRestraint or PredictedContactRestraint) "
"can be grouped." % g)
class DatabaseStatus:
"""Information about data processing and entry status.
This information is usually accessed via :attr:`System.database_status`.
"""
def __init__(self):
self._map = {}
status_code = property(lambda self: self._map['status_code'],
doc="The status of the entry, e.g. released.")
deposit_site = property(lambda self: self._map['deposit_site'],
doc="The site where the file was deposited.")
process_site = property(lambda self: self._map['process_site'],
doc="The site where the file was processed.")
recvd_initial_deposition_date = property(
lambda self:
util._get_iso_date(self._map['recvd_initial_deposition_date']),
doc="The date of initial deposition.")
class Database:
"""Information about a System that is part of an official database.
If a :class:`System` is part of one or more official databases
(e.g. PDB, SwissModel), this class contains details of the
database identifiers. It should be passed to the :class:`System`
constructor.
:param str id: Abbreviated name of the database (e.g. PDB).
:param str code: Identifier from the database (e.g. 1abc).
:param str doi: Digital Object Identifier of the database entry.
:param str accession: Extended accession code of the database entry.
"""
def __init__(self, id, code, doi=None, accession=None):
self.id, self.code = id, code
self.doi, self.accession = doi, accession
class Software:
"""Software used as part of the modeling protocol.
:param str name: The name of the software.
:param str classification: The major function of the software, for
example 'model building', 'sample preparation',
'data collection'.
:param str description: A longer text description of the software.
:param str location: Place where the software can be found (e.g. URL).
:param str type: Type of software (program/package/library/other).
:param str version: The version used.
:param citation: Publication describing the software.
:type citation: :class:`Citation`
Generally these objects are added to :attr:`System.software` or
passed to :class:`ihm.startmodel.StartingModel`,
:class:`ihm.protocol.Step`,
:class:`ihm.analysis.Step`, or
:class:`ihm.restraint.PredictedContactRestraint` objects.
"""
def __init__(self, name, classification, description, location,
type='program', version=None, citation=None):
self.name = name
self.classification = classification
self.description = description
self.location = location
self.type = type
self.version = version
self.citation = citation
def __str__(self):
return "" % repr(self.name)
# Software compares equal if the names and versions are the same
def _eq_vals(self):
return (self.name, self.version)
def __eq__(self, other):
return self._eq_vals() == other._eq_vals()
def __hash__(self):
return hash(self._eq_vals())
class Grant:
"""Information on funding support for the modeling.
See :attr:`System.grants`.
:param str funding_organization: The name of the organization providing
the funding, e.g. "National Institutes of Health".
:param str country: The country that hosts the funding organization,
e.g. "United States".
:param str grant_number: Identifying information for the grant, e.g.
"1R01GM072999-01".
"""
def __init__(self, funding_organization, country, grant_number):
self.funding_organization = funding_organization
self.country = country
self.grant_number = grant_number
class Citation:
"""A publication that describes the modeling.
Generally citations are added to :attr:`System.citations` or
passed to :class:`ihm.Software` or
:class:`ihm.restraint.EM3DRestraint` objects.
:param str pmid: The PubMed ID.
:param str title: Full title of the publication.
:param str journal: Abbreviated journal name.
:param volume: Journal volume as int for a plain number or str for
journals adding a label to the number (e.g. "46(W1)" for
a web server issue).
:param page_range: The page (int) or page range (as a 2-element
int tuple). Using str also works for labelled page numbers.
:param int year: Year of publication.
:param authors: All authors in order, as a list of strings (last name
followed by initials, e.g. "Smith, A.J.").
:param str doi: Digital Object Identifier of the publication.
:param bool is_primary: Denotes the most pertinent publication for the
modeling itself (as opposed to a method or piece of software used
in the protocol). Only one such publication is allowed, and it
is assigned the ID "primary" in the mmCIF file.
"""
def __init__(self, pmid, title, journal, volume, page_range, year, authors,
doi, is_primary=False):
self.title, self.journal, self.volume = title, journal, volume
self.page_range, self.year = page_range, year
self.pmid, self.doi = pmid, doi
self.authors = authors if authors is not None else []
self.is_primary = is_primary
@classmethod
def from_pubmed_id(cls, pubmed_id, is_primary=False):
"""Create a Citation from just a PubMed ID.
This is done by querying NCBI's web API, so requires network access.
:param int pubmed_id: The PubMed identifier.
:param bool is_primary: Denotes the most pertinent publication for
the modeling itself; see :class:`Citation` for more info.
:return: A new Citation for the given identifier.
:rtype: :class:`Citation`
"""
def get_doi(ref):
for art_id in ref['articleids']:
if art_id['idtype'] == 'doi':
return art_id['value']
def get_page_range(ref):
rng = ref['pages'].split('-')
if len(rng) == 2 and len(rng[1]) < len(rng[0]):
# map ranges like "2730-43" to 2730,2743 not 2730, 43
rng[1] = rng[0][:len(rng[0]) - len(rng[1])] + rng[1]
# Handle one page or empty page range
if len(rng) == 1:
rng = rng[0]
if rng == '':
rng = None
return rng
url = ('https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
'?db=pubmed&retmode=json&rettype=abstract&id=%s' % pubmed_id)
fh = urllib.request.urlopen(url)
j = json.load(fh)
fh.close()
ref = j['result'][str(pubmed_id)]
authors = [x['name'] for x in ref['authors']
if x['authtype'] == 'Author']
# PubMed authors are usually of the form "Lastname AB" but PDB uses
# "Lastname, A.B." so map one to the other if possible
r = re.compile(r'(^\w+.*?)\s+(\w+)$')
def auth_sub(m):
return m.group(1) + ", " + "".join(initial + "."
for initial in m.group(2))
authors = [r.sub(auth_sub, auth) for auth in authors]
return cls(pmid=pubmed_id, title=ref['title'],
journal=ref['source'],
volume=ref['volume'] or None,
page_range=get_page_range(ref),
year=ref['pubdate'].split()[0],
authors=authors, doi=get_doi(ref),
is_primary=is_primary)
class ChemComp:
"""A chemical component from which :class:`Entity` objects are constructed.
Usually these are amino acids (see :class:`LPeptideChemComp`) or
nucleic acids (see :class:`DNAChemComp` and :class:`RNAChemComp`),
but non-polymers such as ligands or water (see
:class:`NonPolymerChemComp` and :class:`WaterChemComp`) and saccharides
(see :class:`SaccharideChemComp`) are also supported.
For standard amino and nucleic acids, it is generally easier to use
a :class:`Alphabet` and refer to the components with their one-letter
(amino acids, RNA) or two-letter (DNA) codes.
:param str id: A globally unique identifier for this component (usually
three letters).
:param str code: A shorter identifier (usually one letter) that only
needs to be unique in the entity.
:param str code_canonical: Canonical version of `code` (which need not
be unique).
:param str name: A longer human-readable name for the component.
:param str formula: The chemical formula. This is a space-separated
list of the element symbols in the component, each followed
by an optional count (if omitted, 1 is assumed). The formula
is terminated with the formal charge (if not zero). The element
list should be sorted alphabetically, unless carbon is present,
in which case C and H precede the rest of the elements. For
example, water would be "H2 O" and arginine (with +1 formal
charge) "C6 H15 N4 O2 1".
:param str ccd: The chemical component dictionary (CCD) where
this component is defined. Can be "core" for the wwPDB CCD
(https://www.wwpdb.org/data/ccd), "ma" for the ModelArchive CCD,
or "local" for a novel component that is defined in the mmCIF
file itself. If unspecified, defaults to "core" unless
``descriptors`` is given in which case it defaults to "local".
This information is essentially ignored by python-ihm (since
the IHM dictionary has no support for custom CCDs) but is used
by python-modelcif.
:param list descriptors: When ``ccd`` is "local", this can be one or
more descriptor objects that describe the chemistry. python-ihm
does not define any, but python-modelcif does.
For example, glycine would have
``id='GLY', code='G', code_canonical='G'`` while selenomethionine would
use ``id='MSE', code='MSE', code_canonical='M'``, guanosine (RNA)
``id='G', code='G', code_canonical='G'``, and deoxyguanosine (DNA)
``id='DG', code='DG', code_canonical='G'``.
"""
type = 'other'
_element_mass = {'H': 1.008, 'C': 12.011, 'N': 14.007, 'O': 15.999,
'P': 30.974, 'S': 32.060, 'Se': 78.971, 'Fe': 55.845,
'Ac': 227.028, 'Ag': 107.868, 'Al': 26.982, 'Ar': 39.948,
'As': 74.922, 'Au': 196.966, 'B': 10.81, 'Ba': 137.327,
'Be': 9.012, 'Bi': 208.98, 'Br': 79.904, 'Ca': 40.078,
'Cd': 112.414, 'Ce': 140.116, 'Cl': 35.453, 'Co': 58.933,
'Cr': 51.996, 'Cs': 132.905, 'Cu': 63.546, 'Dy': 162.5,
'Er': 167.259, 'Eu': 151.964, 'F': 18.998, 'Ga': 69.723,
'Gd': 157.25, 'Ge': 72.53, 'He': 4.003, 'Hf': 178.486,
'Hg': 200.592, 'Ho': 164.93, 'I': 126.904, 'In': 114.818,
'Ir': 192.217, 'K': 39.098, 'Kr': 83.798, 'La': 138.905,
'Li': 6.938, 'Lu': 174.967, 'Mg': 24.305, 'Mn': 54.938,
'Mo': 95.95, 'Na': 22.99, 'Nb': 92.906, 'Nd': 144.242,
'Ne': 20.180, 'Ni': 58.693, 'Np': 237.0, 'Os': 190.23,
'Pa': 231.036, 'Pb': 207.2, 'Pd': 106.42, 'Pr': 140.908,
'Pt': 195.084, 'Ra': 226.025, 'Rb': 85.468, 'Re': 186.207,
'Rh': 102.906, 'Ru': 101.07, 'Sb': 121.760, 'Sc': 44.956,
'Si': 28.086, 'Sm': 150.36, 'Sn': 118.710, 'Sr': 87.62,
'Ta': 180.948, 'Tb': 158.925, 'Te': 127.6, 'Th': 232.038,
'Ti': 47.867, 'Tl': 204.383, 'Tm': 168.934, 'U': 238.029,
'V': 50.942, 'W': 183.84, 'Xe': 131.293, 'Y': 88.906,
'Yb': 173.045, 'Zn': 65.38, 'Zr': 91.224}
def __init__(self, id, code, code_canonical, name=None, formula=None,
ccd=None, descriptors=None):
self.id = id
self.code, self.code_canonical, self.name = code, code_canonical, name
self.formula = formula
self.ccd, self.descriptors = ccd, descriptors
def __str__(self):
return ('<%s.%s(%s)>'
% (self.__class__.__module__, self.__class__.__name__,
self.id))
def __get_weight(self):
# Calculate weight from formula
if self.formula in (None, unknown):
return
spl = self.formula.split()
# Remove formal charge if present
if len(spl) > 0 and spl[-1].isdigit():
del spl[-1]
r = re.compile(r'(\D+)(\d*)$')
weight = 0.
for s in spl:
m = r.match(s)
if m is None:
raise ValueError("Bad formula fragment: %s" % s)
emass = self._element_mass.get(m.group(1), None)
if emass:
weight += emass * (int(m.group(2)) if m.group(2) else 1)
elif m.group(1) != 'X':
# If element is unknown, weight is unknown too
# Element 'X' is used for GLX/ASX and has zero weight
return None
return weight
formula_weight = property(
__get_weight,
doc="Formula weight (dalton). This is calculated automatically from "
"the chemical formula and known atomic masses.")
# Equal if all identifiers are the same
def __eq__(self, other):
return ((self.code, self.code_canonical, self.id, self.type) ==
(other.code, other.code_canonical, other.id, other.type))
def __hash__(self):
return hash((self.code, self.code_canonical, self.id, self.type))
class PeptideChemComp(ChemComp):
"""A single peptide component. Usually :class:`LPeptideChemComp` is used
instead (except for glycine) to specify chirality.
See :class:`ChemComp` for a description of the parameters."""
type = 'peptide linking'
class LPeptideChemComp(PeptideChemComp):
"""A single peptide component with (normal) L- chirality.
See :class:`ChemComp` for a description of the parameters."""
type = 'L-peptide linking'
class DPeptideChemComp(PeptideChemComp):
"""A single peptide component with (unusual) D- chirality.
See :class:`ChemComp` for a description of the parameters."""
type = 'D-peptide linking'
class DNAChemComp(ChemComp):
"""A single DNA component.
See :class:`ChemComp` for a description of the parameters."""
type = 'DNA linking'
class RNAChemComp(ChemComp):
"""A single RNA component.
See :class:`ChemComp` for a description of the parameters."""
type = 'RNA linking'
class SaccharideChemComp(ChemComp):
"""A saccharide chemical component. Usually a subclass that specifies
the chirality and linkage (e.g. :class:`LSaccharideBetaChemComp`)
is used.
:param str id: A globally unique identifier for this component.
:param str name: A longer human-readable name for the component.
:param str formula: The chemical formula. See :class:`ChemComp` for
more details.
:param str ccd: The chemical component dictionary (CCD) where
this component is defined. See :class:`ChemComp` for
more details.
:param list descriptors: Information on the component's chemistry.
See :class:`ChemComp` for more details.
"""
type = "saccharide"
def __init__(self, id, name=None, formula=None, ccd=None,
descriptors=None):
super().__init__(
id, id, id, name=name, formula=formula,
ccd=ccd, descriptors=descriptors)
class LSaccharideChemComp(SaccharideChemComp):
"""A single saccharide component with L-chirality and unspecified linkage.
See :class:`SaccharideChemComp` for a description of the parameters."""
type = "L-saccharide"
class LSaccharideAlphaChemComp(LSaccharideChemComp):
"""A single saccharide component with L-chirality and alpha linkage.
See :class:`SaccharideChemComp` for a description of the parameters."""
type = "L-saccharide, alpha linking"
class LSaccharideBetaChemComp(LSaccharideChemComp):
"""A single saccharide component with L-chirality and beta linkage.
See :class:`SaccharideChemComp` for a description of the parameters."""
type = "L-saccharide, beta linking"
class DSaccharideChemComp(SaccharideChemComp):
"""A single saccharide component with D-chirality and unspecified linkage.
See :class:`SaccharideChemComp` for a description of the parameters."""
type = "D-saccharide"
class DSaccharideAlphaChemComp(DSaccharideChemComp):
"""A single saccharide component with D-chirality and alpha linkage.
See :class:`SaccharideChemComp` for a description of the parameters."""
type = "D-saccharide, alpha linking"
class DSaccharideBetaChemComp(DSaccharideChemComp):
"""A single saccharide component with D-chirality and beta linkage.
See :class:`SaccharideChemComp` for a description of the parameters."""
type = "D-saccharide, beta linking"
class NonPolymerChemComp(ChemComp):
"""A non-polymer chemical component, such as a ligand or a non-standard
residue (for crystal waters, use :class:`WaterChemComp`).
:param str id: A globally unique identifier for this component.
:param str code_canonical: Canonical one-letter identifier. This is
used for non-standard residues and should be the one-letter code
of the closest standard residue (or by default, 'X').
:param str name: A longer human-readable name for the component.
:param str formula: The chemical formula. See :class:`ChemComp` for
more details.
:param str ccd: The chemical component dictionary (CCD) where
this component is defined. See :class:`ChemComp` for
more details.
:param list descriptors: Information on the component's chemistry.
See :class:`ChemComp` for more details.
"""
type = "non-polymer"
def __init__(self, id, code_canonical='X', name=None, formula=None,
ccd=None, descriptors=None):
super().__init__(
id, id, code_canonical, name=name, formula=formula,
ccd=ccd, descriptors=descriptors)
class WaterChemComp(NonPolymerChemComp):
"""The chemical component for crystal water.
"""
def __init__(self):
super().__init__('HOH', name='WATER', formula="H2 O")
class Alphabet:
"""A mapping from codes (usually one-letter, or two-letter for DNA) to
chemical components.
These classes can be used to construct sequences of components
when creating an :class:`Entity`. They can also be used like a Python
dict to get standard components, e.g.::
a = ihm.LPeptideAlphabet()
met = a['M']
gly = a['G']
See :class:`LPeptideAlphabet`, :class:`RNAAlphabet`,
:class:`DNAAlphabet`.
"""
def __getitem__(self, key):
return self._comps[key]
def __contains__(self, key):
return key in self._comps
keys = property(lambda self: self._comps.keys())
values = property(lambda self: self._comps.values())
items = property(lambda self: self._comps.items())
class LPeptideAlphabet(Alphabet):
"""A mapping from one-letter amino acid codes (e.g. H, M) to
L-amino acids (as :class:`LPeptideChemComp` objects, except for achiral
glycine which maps to :class:`PeptideChemComp`). Some other common
modified residues are also included (e.g. MSE). For these their full
name rather than a one-letter code is used.
"""
_comps = dict([code, LPeptideChemComp(id, code, code, name,
formula)]
for code, id, name, formula in [
('A', 'ALA', 'ALANINE', 'C3 H7 N O2'),
('C', 'CYS', 'CYSTEINE', 'C3 H7 N O2 S'),
('D', 'ASP', 'ASPARTIC ACID', 'C4 H7 N O4'),
('E', 'GLU', 'GLUTAMIC ACID', 'C5 H9 N O4'),
('F', 'PHE', 'PHENYLALANINE', 'C9 H11 N O2'),
('H', 'HIS', 'HISTIDINE', 'C6 H10 N3 O2 1'),
('I', 'ILE', 'ISOLEUCINE', 'C6 H13 N O2'),
('K', 'LYS', 'LYSINE', 'C6 H15 N2 O2 1'),
('L', 'LEU', 'LEUCINE', 'C6 H13 N O2'),
('M', 'MET', 'METHIONINE', 'C5 H11 N O2 S'),
('N', 'ASN', 'ASPARAGINE', 'C4 H8 N2 O3'),
('P', 'PRO', 'PROLINE', 'C5 H9 N O2'),
('Q', 'GLN', 'GLUTAMINE', 'C5 H10 N2 O3'),
('R', 'ARG', 'ARGININE', 'C6 H15 N4 O2 1'),
('S', 'SER', 'SERINE', 'C3 H7 N O3'),
('T', 'THR', 'THREONINE', 'C4 H9 N O3'),
('V', 'VAL', 'VALINE', 'C5 H11 N O2'),
('W', 'TRP', 'TRYPTOPHAN', 'C11 H12 N2 O2'),
('Y', 'TYR', 'TYROSINE', 'C9 H11 N O3'),
('B', 'ASX', 'ASP/ASN AMBIGUOUS', 'C4 H6 N O2 X2'),
('Z', 'GLX', 'GLU/GLN AMBIGUOUS', 'C5 H8 N O2 X2'),
('U', 'SEC', 'SELENOCYSTEINE', 'C3 H7 N O2 Se')])
_comps['G'] = PeptideChemComp('GLY', 'G', 'G', name='GLYCINE',
formula="C2 H5 N O2")
# common non-standard L-amino acids
_comps.update([id, LPeptideChemComp(id, id, canon, name, formula)]
for id, canon, name, formula in [
('MSE', 'M', 'SELENOMETHIONINE', 'C5 H11 N O2 Se'),
('UNK', 'X', 'UNKNOWN', 'C4 H9 N O2')])
class DPeptideAlphabet(Alphabet):
"""A mapping from D-amino acid codes (e.g. DHI, MED) to
D-amino acids (as :class:`DPeptideChemComp` objects, except for achiral
glycine which maps to :class:`PeptideChemComp`). See
:class:`LPeptideAlphabet` for more details.
"""
_comps = dict([code, DPeptideChemComp(code, code, canon, name, formula)]
for canon, code, name, formula in [
('A', 'DAL', 'D-ALANINE', 'C3 H7 N O2'),
('C', 'DCY', 'D-CYSTEINE', 'C3 H7 N O2 S'),
('D', 'DAS', 'D-ASPARTIC ACID', 'C4 H7 N O4'),
('E', 'DGL', 'D-GLUTAMIC ACID', 'C5 H9 N O4'),
('F', 'DPN', 'D-PHENYLALANINE', 'C9 H11 N O2'),
('H', 'DHI', 'D-HISTIDINE', 'C6 H10 N3 O2 1'),
('I', 'DIL', 'D-ISOLEUCINE', 'C6 H13 N O2'),
('K', 'DLY', 'D-LYSINE', 'C6 H14 N2 O2'),
('L', 'DLE', 'D-LEUCINE', 'C6 H13 N O2'),
('M', 'MED', 'D-METHIONINE', 'C5 H11 N O2 S'),
('N', 'DSG', 'D-ASPARAGINE', 'C4 H8 N2 O3'),
('P', 'DPR', 'D-PROLINE', 'C5 H9 N O2'),
('Q', 'DGN', 'D-GLUTAMINE', 'C5 H10 N2 O3'),
('R', 'DAR', 'D-ARGININE', 'C6 H15 N4 O2 1'),
('S', 'DSN', 'D-SERINE', 'C3 H7 N O3'),
('T', 'DTH', 'D-THREONINE', 'C4 H9 N O3'),
('V', 'DVA', 'D-VALINE', 'C5 H11 N O2'),
('W', 'DTR', 'D-TRYPTOPHAN', 'C11 H12 N2 O2'),
('Y', 'DTY', 'D-TYROSINE', 'C9 H11 N O3')])
_comps['G'] = PeptideChemComp('GLY', 'G', 'G', name='GLYCINE',
formula="C2 H5 N O2")
class RNAAlphabet(Alphabet):
"""A mapping from one-letter nucleic acid codes (e.g. A) to
RNA (as :class:`RNAChemComp` objects)."""
_comps = dict([id, RNAChemComp(id, id, id, name, formula)]
for id, name, formula in [
('A', "ADENOSINE-5'-MONOPHOSPHATE", 'C10 H14 N5 O7 P'),
('C', "CYTIDINE-5'-MONOPHOSPHATE", 'C9 H14 N3 O8 P'),
('G', "GUANOSINE-5'-MONOPHOSPHATE", 'C10 H14 N5 O8 P'),
('U', "URIDINE-5'-MONOPHOSPHATE", 'C9 H13 N2 O9 P')])
class DNAAlphabet(Alphabet):
"""A mapping from two-letter nucleic acid codes (e.g. DA) to
DNA (as :class:`DNAChemComp` objects)."""
_comps = dict([code, DNAChemComp(code, code, canon, name, formula)]
for code, canon, name, formula in [
('DA', 'A', "2'-DEOXYADENOSINE-5'-MONOPHOSPHATE",
'C10 H14 N5 O6 P'),
('DC', 'C', "2'-DEOXYCYTIDINE-5'-MONOPHOSPHATE",
'C9 H14 N3 O7 P'),
('DG', 'G', "2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE",
'C10 H14 N5 O7 P'),
('DT', 'T', "THYMIDINE-5'-MONOPHOSPHATE",
'C10 H15 N2 O8 P')])
class EntityRange:
"""Part of an entity. Usually these objects are created from
an :class:`Entity`, e.g. to get a range covering residues 4 through
7 in `entity` use::
entity = ihm.Entity(sequence=...)
rng = entity(4,7)
"""
def __init__(self, entity, seq_id_begin, seq_id_end):
if not entity.is_polymeric():
raise TypeError("Can only create ranges for polymeric entities")
self.entity = entity
self.seq_id_range = (seq_id_begin, seq_id_end)
util._check_residue_range(self.seq_id_range, self.entity)
def __eq__(self, other):
try:
return (self.entity is other.entity
and self.seq_id_range == other.seq_id_range)
except AttributeError:
return False
def __hash__(self):
return hash((id(self.entity), self.seq_id_range))
# Use same ID as the original entity
_id = property(lambda self: self.entity._id)
class Atom:
"""A single atom in an entity or asymmetric unit. Usually these objects
are created by calling :meth:`Residue.atom`.
Note that this class does not store atomic coordinates of a given
atom in a given model; for that, see :class:`ihm.model.Atom`.
"""
__slots__ = ['residue', 'id']
def __init__(self, residue, id):
self.residue, self.id = residue, id
entity = property(lambda self: self.residue.entity)
asym = property(lambda self: self.residue.asym)
seq_id = property(lambda self: self.residue.seq_id)
class Residue:
"""A single residue in an entity or asymmetric unit. Usually these objects
are created by calling :meth:`Entity.residue` or
:meth:`AsymUnit.residue`.
"""
__slots__ = ['entity', 'asym', 'seq_id', '_range_id']
def __init__(self, seq_id, entity=None, asym=None):
self.entity = entity
self.asym = asym
if entity is None and asym:
self.entity = asym.entity
self.seq_id = seq_id
if self.entity is not None and self.entity.is_polymeric():
util._check_residue(self)
def atom(self, atom_id):
"""Get a :class:`~ihm.Atom` in this residue with the given name."""
return Atom(residue=self, id=atom_id)
def _get_auth_seq_id(self):
return self.asym._get_auth_seq_id_ins_code(self.seq_id)[0]
auth_seq_id = property(_get_auth_seq_id,
doc="Author-provided seq_id; only makes sense "
"for asymmetric units")
def _get_ins_code(self):
return self.asym._get_auth_seq_id_ins_code(self.seq_id)[1]
ins_code = property(_get_ins_code,
doc="Insertion code; only makes sense "
"for asymmetric units")
def _get_comp(self):
return self.entity.sequence[self.seq_id - 1]
comp = property(_get_comp,
doc="Chemical component (residue type)")
# Allow passing residues where a range is requested
# (e.g. to ResidueFeature)
seq_id_range = property(lambda self: (self.seq_id, self.seq_id))
class Entity:
"""Represent a CIF entity (with a unique sequence)
:param sequence sequence: The primary sequence, as a sequence of
:class:`ChemComp` objects, and/or codes looked up in `alphabet`.
:param alphabet: The mapping from code to chemical components to use
(it is not necessary to instantiate this class).
:type alphabet: :class:`Alphabet`
:param str description: A short text name for the sequence.
:param str details: Longer text describing the sequence.
:param source: The method by which the sample for this entity was
produced.
:type source: :class:`ihm.source.Source`
:param references: Information about this entity stored in external
databases (for example the sequence in UniProt)
:type references: sequence of :class:`ihm.reference.Reference` objects
The sequence for an entity can be specified explicitly as a list of
chemical components, or (more usually) as a list or string of codes,
or a mixture of both.
For example::
# Construct with a string of one-letter amino acid codes
protein = ihm.Entity('AHMD')
# Some less common amino acids (e.g. MSE) have three-letter codes
protein_with_mse = ihm.Entity(['A', 'H', 'MSE', 'D'])
# Can use a non-default alphabet to make DNA or RNA sequences
dna = ihm.Entity(('DA', 'DC'), alphabet=ihm.DNAAlphabet)
rna = ihm.Entity('AC', alphabet=ihm.RNAAlphabet)
# Can pass explicit ChemComp objects by looking them up in Alphabets
dna_al = ihm.DNAAlphabet()
rna_al = ihm.RNAAlphabet()
dna_rna_hybrid = ihm.Entity((dna_al['DG'], rna_al['C']))
# For unusual components (e.g. modified residues or ligands),
# new ChemComp objects can be constructed
psu = ihm.RNAChemComp(id='PSU', code='PSU', code_canonical='U',
name="PSEUDOURIDINE-5'-MONOPHOSPHATE",
formula='C9 H13 N2 O9 P')
rna_with_psu = ihm.Entity(('A', 'C', psu), alphabet=ihm.RNAAlphabet)
For more examples, see the
`ligands and water example `_.
All entities should be stored in the top-level System object;
see :attr:`System.entities`.
""" # noqa: E501
_force_polymer = None
_hint_branched = None
# Set to False to allow invalid seq_ids for residue or residue_range;
# this is done, for example, when reading a file.
_range_check = True
def __get_type(self):
if self.is_polymeric():
return 'polymer'
elif self.is_branched():
return 'branched'
else:
return 'water' if self.sequence[0].code == 'HOH' else 'non-polymer'
type = property(__get_type)
def __get_src_method(self):
if self.source:
return self.source.src_method
elif self.type == 'water':
return 'nat'
else:
return 'man'
def __set_src_method(self, val):
raise TypeError("src_method is read-only; assign an appropriate "
"subclass of ihm.source.Source to source instead")
src_method = property(__get_src_method, __set_src_method)
def __get_weight(self):
weight = 0.
for s in self.sequence:
w = s.formula_weight
# If any component's weight is unknown, the total is too
if w:
weight += w
else:
return None
return weight
formula_weight = property(
__get_weight,
doc="Formula weight (dalton). This is calculated automatically "
"from that of the chemical components.")
def __init__(self, sequence, alphabet=LPeptideAlphabet,
description=None, details=None, source=None, references=[]):
def get_chem_comp(s):
if isinstance(s, ChemComp):
return s
else:
return alphabet._comps[s]
self.sequence = tuple(get_chem_comp(s) for s in sequence)
self.description, self.details = description, details
self.source = source
self.references = []
self.references.extend(references)
#: String descriptors of branched chemical structure.
#: These generally only make sense for oligosaccharide entities,
#: and should be a list of :class:`~ihm.BranchDescriptor` objects.
self.branch_descriptors = []
#: Any links between components in a branched entity.
#: This is a list of :class:`~ihm.BranchLink` objects.
self.branch_links = []
def __str__(self):
return "" % self.description
def is_polymeric(self):
"""Return True iff this entity represents a polymer, such as an
amino acid sequence or DNA/RNA chain (and not a ligand or water)"""
return (self._force_polymer or
(len(self.sequence) == 0 and not self._hint_branched) or
len(self.sequence) > 1
and any(isinstance(x, (PeptideChemComp, DNAChemComp,
RNAChemComp)) for x in self.sequence))
def is_branched(self):
"""Return True iff this entity is branched (generally
an oligosaccharide)"""
return ((len(self.sequence) > 1
and isinstance(self.sequence[0], SaccharideChemComp)) or
(len(self.sequence) == 0 and self._hint_branched))
def residue(self, seq_id):
"""Get a :class:`Residue` at the given sequence position"""
return Residue(entity=self, seq_id=seq_id)
# Entities are considered identical if they have the same sequence,
# unless they are branched
def __eq__(self, other):
if not isinstance(other, Entity):
return False
if self.is_branched() or other.is_branched():
return self is other
else:
return self.sequence == other.sequence
def __hash__(self):
if self.is_branched():
return hash(id(self))
else:
return hash(self.sequence)
def __call__(self, seq_id_begin, seq_id_end):
return EntityRange(self, seq_id_begin, seq_id_end)
def __get_seq_id_range(self):
if self.is_polymeric() or self.is_branched():
return (1, len(self.sequence))
else:
# Nonpolymers don't have the concept of seq_id
return (None, None)
seq_id_range = property(__get_seq_id_range, doc="Sequence range")
class AsymUnitRange:
"""Part of an asymmetric unit. Usually these objects are created from
an :class:`AsymUnit`, e.g. to get a range covering residues 4 through
7 in `asym` use::
asym = ihm.AsymUnit(entity)
rng = asym(4,7)
"""
def __init__(self, asym, seq_id_begin, seq_id_end):
if asym.entity is not None and not asym.entity.is_polymeric():
raise TypeError("Can only create ranges for polymeric entities")
self.asym = asym
self.seq_id_range = (seq_id_begin, seq_id_end)
util._check_residue_range(self.seq_id_range, self.entity)
def __eq__(self, other):
try:
return (self.asym is other.asym
and self.seq_id_range == other.seq_id_range)
except AttributeError:
return False
def __hash__(self):
return hash((id(self.asym), self.seq_id_range))
# Use same ID and entity as the original asym unit
_id = property(lambda self: self.asym._id)
_ordinal = property(lambda self: self.asym._ordinal)
entity = property(lambda self: self.asym.entity)
details = property(lambda self: self.asym.details)
class AsymUnitSegment:
"""An aligned part of an asymmetric unit.
Usually these objects are created from
an :class:`AsymUnit`, e.g. to get a segment covering residues 1 through
3 in `asym` use::
asym = ihm.AsymUnit(entity)
seg = asym.segment('--ACG', 1, 3)
"""
def __init__(self, asym, gapped_sequence, seq_id_begin, seq_id_end):
self.asym = asym
self.gapped_sequence = gapped_sequence
self.seq_id_range = (seq_id_begin, seq_id_end)
class AsymUnit:
"""An asymmetric unit, i.e. a unique instance of an Entity that
was modeled.
Note that this class should not be used to describe crystal waters;
for that, see :class:`ihm.WaterAsymUnit`.
:param entity: The unique sequence of this asymmetric unit.
:type entity: :class:`Entity`
:param str details: Longer text description of this unit.
:param auth_seq_id_map: Mapping from internal 1-based consecutive
residue numbering (`seq_id`) to PDB "author-provided" numbering
(`auth_seq_id` plus an optional `ins_code`). This can be either
be an int offset, in which case
``auth_seq_id = seq_id + auth_seq_id_map`` with no insertion
codes, or a mapping type (dict, list, tuple) in which case
``auth_seq_id = auth_seq_id_map[seq_id]`` with no insertion
codes, or
``auth_seq_id, ins_code = auth_seq_id_map[seq_id]`` - i.e. the
output of the mapping is either the author-provided number, or a
2-element tuple containing that number and an insertion code.
(Note that if a `list` or `tuple` is used for the mapping, the
first element in the list or tuple does
**not** correspond to the first residue and will never be used -
since `seq_id` can never be zero.) The default if
not specified, or not in the mapping, is for
``auth_seq_id == seq_id`` and for no insertion codes to be used.
:param str id: User-specified ID (usually a string of one or more
upper-case letters, e.g. A, B, C, AA). If not specified,
IDs are automatically assigned alphabetically.
:param str strand_id: PDB or "author-provided" strand/chain ID.
If not specified, it will be the same as the regular ID.
:param orig_auth_seq_id_map: Mapping from internal 1-based consecutive
residue numbering (`seq_id`) to original "author-provided"
numbering. This differs from `auth_seq_id_map` as the original
numbering need not follow any defined scheme, while
`auth_seq_id_map` must follow certain PDB-defined rules. This
can be any mapping type (dict, list, tuple) in which case
``orig_auth_seq_id = orig_auth_seq_id_map[seq_id]``. If the
mapping is None (the default), or a given `seq_id` cannot be
found in the mapping, ``orig_auth_seq_id = auth_seq_id``.
This mapping is only used in the various `scheme` tables, such
as ``pdbx_poly_seq_scheme``.
See :attr:`System.asym_units`.
"""
number_of_molecules = 1
def __init__(self, entity, details=None, auth_seq_id_map=0, id=None,
strand_id=None, orig_auth_seq_id_map=None):
if (entity is not None and entity.type == 'water'
and not isinstance(self, WaterAsymUnit)):
raise TypeError("Use WaterAsymUnit instead for creating waters")
self.entity, self.details = entity, details
self.auth_seq_id_map = auth_seq_id_map
self.orig_auth_seq_id_map = orig_auth_seq_id_map
self.id = id
self._strand_id = strand_id
#: For branched entities read from files, mapping from provisional
#: to final internal numbering (`seq_id`), or None if no mapping is
#: necessary. See :meth:`ihm.model.Model.add_atom`.
self.num_map = None
def _get_auth_seq_id_ins_code(self, seq_id):
if isinstance(self.auth_seq_id_map, numbers.Integral):
return seq_id + self.auth_seq_id_map, None
else:
try:
ret = self.auth_seq_id_map[seq_id]
if isinstance(ret, (numbers.Integral, str)):
return ret, None
else:
return ret
except (KeyError, IndexError):
return seq_id, None
def _get_pdb_auth_seq_id_ins_code(self, seq_id):
pdb_seq_num, ins_code = self._get_auth_seq_id_ins_code(seq_id)
if self.orig_auth_seq_id_map is None:
auth_seq_num = pdb_seq_num
else:
auth_seq_num = self.orig_auth_seq_id_map.get(seq_id, pdb_seq_num)
return pdb_seq_num, auth_seq_num, ins_code
def __call__(self, seq_id_begin, seq_id_end):
return AsymUnitRange(self, seq_id_begin, seq_id_end)
def residue(self, seq_id):
"""Get a :class:`Residue` at the given sequence position"""
return Residue(asym=self, seq_id=seq_id)
def segment(self, gapped_sequence, seq_id_begin, seq_id_end):
"""Get an object representing the alignment of part of this sequence.
:param str gapped_sequence: Sequence of the segment, including gaps.
:param int seq_id_begin: Start of the segment.
:param int seq_id_end: End of the segment.
"""
# todo: cache so we return the same object for same parameters
return AsymUnitSegment(self, gapped_sequence, seq_id_begin, seq_id_end)
seq_id_range = property(lambda self: self.entity.seq_id_range,
doc="Sequence range")
sequence = property(lambda self: self.entity.sequence,
doc="Primary sequence")
strand_id = property(lambda self: self._strand_id or self._id,
doc="PDB or author-provided strand/chain ID")
class WaterAsymUnit(AsymUnit):
"""A collection of crystal waters, all with the same "chain" ID.
:param int number: The number of water molecules in this unit.
For more information on this class and the rest of the parameters,
see :class:`AsymUnit`.
"""
def __init__(self, entity, number, details=None, auth_seq_id_map=0,
id=None, strand_id=None, orig_auth_seq_id_map=None):
if entity.type != 'water':
raise TypeError(
"WaterAsymUnit can only be used for water entities")
super().__init__(
entity, details=details, auth_seq_id_map=auth_seq_id_map,
id=id, strand_id=strand_id,
orig_auth_seq_id_map=orig_auth_seq_id_map)
self.number = number
self._water_sequence = [entity.sequence[0]] * number
seq_id_range = property(lambda self: (1, self.number),
doc="Sequence range")
sequence = property(lambda self: self._water_sequence,
doc="Primary sequence")
number_of_molecules = property(lambda self: self.number,
doc="Number of molecules")
class Assembly(list):
"""A collection of parts of the system that were modeled or probed
together.
:param sequence elements: Initial set of parts of the system.
:param str name: Short text name of this assembly.
:param str description: Longer text that describes this assembly.
This is implemented as a simple list of asymmetric units (or parts of
them), i.e. a list of :class:`AsymUnit` and/or :class:`AsymUnitRange`
objects. An Assembly is typically assigned to one or more of
- :class:`~ihm.model.Model`
- :class:`ihm.protocol.Step`
- :class:`ihm.analysis.Step`
- :class:`~ihm.restraint.Restraint`
See also :attr:`System.complete_assembly`
and :attr:`System.orphan_assemblies`.
Note that any duplicate assemblies will be pruned on output."""
#: :class:`Assembly` that is the immediate parent in a hierarchy, or `None`
parent = None
def __init__(self, elements=(), name=None, description=None):
super().__init__(elements)
self.name, self.description = name, description
def _signature(self):
"""Get a Python object that represents this Assembly. Notably, two
Assemblies that cover the part of the system (even if the
components are in a different order) will have the same signature.
Signatures are also hashable, unlike the Assembly itself."""
d = collections.defaultdict(list)
for a in self:
# a might be an AsymUnit or an AsymUnitRange
asym = a.asym if hasattr(a, 'asym') else a
d[asym].append(a.seq_id_range)
ret = []
# asyms might not have IDs yet, so just put them in a consistent order
for asym in sorted(d.keys(), key=lambda x: id(x)):
ranges = d[asym]
# Non-polymers have no ranges
if all(r == (None, None) for r in ranges):
ret.append((asym, None))
else:
ret.append((asym, tuple(util._combine_ranges(d[asym]))))
return tuple(ret)
class ChemDescriptor:
"""Description of a non-polymeric chemical component used in the
experiment. For example, this might be a fluorescent probe or
cross-linking agent. This class describes the chemical structure of
the component, for example with a SMILES or INCHI descriptor, so that
it is uniquely defined. A descriptor is typically assigned to a
:class:`ihm.restraint.CrossLinkRestraint`.
See :mod:`ihm.cross_linkers` for chemical descriptors of some
commonly-used cross-linking agents.
:param str auth_name: Author-provided name
:param str chem_comp_id: If this chemical is listed in the Chemical
Component Dictionary, its three-letter identifier
:param str chemical_name: The systematic (IUPAC) chemical name
:param str common_name: Common name for the component
:param str smiles: SMILES string
:param str smiles_canonical: Canonical SMILES string
:param str inchi: IUPAC INCHI descriptor
:param str inchi_key: Hashed INCHI key
See also :attr:`System.orphan_chem_descriptors`.
"""
def __init__(self, auth_name, chem_comp_id=None, chemical_name=None,
common_name=None, smiles=None, smiles_canonical=None,
inchi=None, inchi_key=None):
self.auth_name, self.chem_comp_id = auth_name, chem_comp_id
self.chemical_name, self.common_name = chemical_name, common_name
self.smiles, self.smiles_canonical = smiles, smiles_canonical
self.inchi, self.inchi_key = inchi, inchi_key
class Collection:
"""A collection of entries belonging to single deposition or group.
These are used by the archive to group multiple related entries,
e.g. all entries deposited as part of a given study, or all
models for a genome. An entry (:class:`System`) can belong to
multiple collections.
:param str id: Unique identifier (assigned by the archive).
:param str name: Short name for the collection.
:param str details: Longer description of the collection.
See also :attr:`System.collections`.
"""
def __init__(self, id, name=None, details=None):
self.id, self.name, self.details = id, name, details
class BranchDescriptor:
"""String descriptor of branched chemical structure.
These generally only make sense for oligosaccharide entities.
See :attr:`Entity.branch_descriptors`.
:param str text: The value of this descriptor.
:param str type: The type of the descriptor; one of
"Glycam Condensed Core Sequence", "Glycam Condensed Sequence",
"LINUCS", or "WURCS".
:param str program: The name of the program or library used to compute
the descriptor.
:param str program_version: The version of the program or library
used to compute the descriptor.
"""
def __init__(self, text, type, program=None, program_version=None):
self.text, self.type = text, type
self.program, self.program_version = program, program_version
class BranchLink:
"""A link between components in a branched entity.
These generally only make sense for oligosaccharide entities.
See :attr:`Entity.branch_links`.
:param int num1: 1-based index of the first component.
:param str atom_id1: Name of the first atom in the linkage.
:param str leaving_atom_id1: Name of the first leaving atom.
:param int num2: 1-based index of the second component.
:param str atom_id2: Name of the second atom in the linkage.
:param str leaving_atom_id2: Name of the second leaving atom.
:param str order: Bond order (e.g. sing, doub, trip).
:param str details: More information about this link.
"""
def __init__(self, num1, atom_id1, leaving_atom_id1, num2, atom_id2,
leaving_atom_id2, order=None, details=None):
self.num1, self.atom_id1 = num1, atom_id1
self.num2, self.atom_id2 = num2, atom_id2
self.leaving_atom_id1 = leaving_atom_id1
self.leaving_atom_id2 = leaving_atom_id2
self.order, self.details = order, details
class DataUsage:
"""Information on how the data in the file can be used.
Do not use this class itself, but one of its subclasses, either
:class:`License` or :class:`Disclaimer`. DataUsage objects are
stored in :data:`ihm.System.data_usage`.
:param str details: Information about the data usage.
:param str name: An optional well-known name for the usage.
:param str url: An optional URL providing more information.
"""
type = 'other'
def __init__(self, details, name=None, url=None):
self.details, self.name, self.url = details, name, url
class License(DataUsage):
"""A license describing how the data in the file can be used.
See :class:`DataUsage` for more information."""
type = 'license'
class Disclaimer(DataUsage):
"""A disclaimer relating to usage of the data in the file.
See :class:`DataUsage` for more information."""
type = 'disclaimer'
class Revision:
"""Represent part of the history of a :class:`System`.
:param str data_content_type: The type of file that was changed.
:param int major: Major version number.
:param int minor: Minor version number.
:param date: Release date.
:type date: :class:`datetime.date`
Generally these objects are added to :attr:`System.revisions`.
"""
def __init__(self, data_content_type, minor, major, date):
self.data_content_type = data_content_type
self.minor, self.major = minor, major
self.date = date
#: More details of the changes, as :class:`RevisionDetails` objects
self.details = []
#: Collection of categories (as strings) updated with this revision
self.groups = []
#: Categories (as strings) updated with this revision
self.categories = []
#: Items (as strings) updated with this revision
self.items = []
class RevisionDetails:
"""More information on the changes in a given :class:`Revision`.
:param str provider: The provider (author, repository) of the revision.
:param str type: Classification of the revision.
:param str description: Additional details describing the revision.
These objects are typically stored in :attr:`Revision.details`.
"""
def __init__(self, provider, type, description):
self.provider = provider
self.type = type
self.description = description
python-ihm-2.7/ihm/analysis.py 0000664 0000000 0000000 00000007016 15035733372 0016444 0 ustar 00root root 0000000 0000000 """Classes for handling the analysis of a modeling run.
"""
from ihm.util import _text_choice_property
class Step:
"""A single step in an :class:`Analysis`.
Normally one of the more specific derived classes is used;
see :class:`FilterStep`, :class:`ClusterStep`, :class:`RescoreStep`,
:class:`ValidationStep`, and :class:`EmptyStep`, although this base
class can be used for a generic 'other' step.
:param str feature: feature energy/score;RMSD;dRMSD;other
:param int num_models_begin: The number of models at the beginning
of the step
:param int num_models_end: The number of models at the end of the step
:param assembly: The part of the system analyzed in this step
:type assembly: :class:`~ihm.Assembly`
:param dataset_group: The collection of datasets used in this analysis,
if applicable
:type dataset_group: :class:`~ihm.dataset.DatasetGroup`
:param software: The software used in this step
:type software: :class:`~ihm.Software`
:param script_file: Reference to the external file containing the
script used in this step (usually a
:class:`~ihm.location.WorkflowFileLocation`).
:type script_file: :class:`~ihm.location.Location`
:param str details: Additional text describing this step
"""
type = 'other'
def _get_report(self):
return ("%s (%s->%s models)"
% (self.type, self.num_models_begin, self.num_models_end))
def __init__(self, feature, num_models_begin, num_models_end,
assembly=None, dataset_group=None, software=None,
script_file=None, details=None):
self.assembly, self.dataset_group = assembly, dataset_group
self.feature, self.software = feature, software
self.num_models_begin = num_models_begin
self.num_models_end = num_models_end
self.script_file = script_file
self.details = details
feature = _text_choice_property(
"feature", ["energy/score", "RMSD", "dRMSD", "other", "none"],
doc="The feature used in the analysis, if applicable")
class FilterStep(Step):
"""A single filtering step in an :class:`Analysis`.
See :class:`Step` for a description of the parameters.
"""
type = 'filter'
class ClusterStep(Step):
"""A single clustering step in an :class:`Analysis`.
See :class:`Step` for a description of the parameters.
"""
type = 'cluster'
class RescoreStep(Step):
"""A single rescoring step in an :class:`Analysis`.
See :class:`Step` for a description of the parameters.
"""
type = 'rescore'
class ValidationStep(Step):
"""A single validation step in an :class:`Analysis`.
See :class:`Step` for a description of the parameters.
"""
type = 'validation'
class EmptyStep(Step):
"""A 'do nothing' step in an :class:`Analysis`.
This can be used if modeling outputs were used directly without
any kind of analysis.
"""
type = 'none'
def __init__(self):
super().__init__(
feature='none', num_models_begin=None, num_models_end=None)
class Analysis:
"""Analysis of a modeling run.
Each analysis consists of a number of steps (e.g. filtering,
rescoring, clustering). A modeling run may be followed by any number
of separate analyses.
See :attr:`ihm.protocol.Protocol.analyses`.
"""
def __init__(self):
#: All analysis steps (:class:`Step` objects)
self.steps = []
python-ihm-2.7/ihm/citations.py 0000664 0000000 0000000 00000012555 15035733372 0016622 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""Citations for some commonly-used software packages.
Each of these is an instance of the :class:`ihm.Citation` class,
and so can be used anywhere these objects are required, generally for
:class:`ihm.Software`.
"""
import ihm
imp = ihm.Citation(
pmid='22272186',
title='Putting the pieces together: integrative modeling platform '
'software for structure determination of macromolecular assemblies',
journal='PLoS Biol', volume=10, page_range='e1001244', year=2012,
authors=['Russel, D.', 'Lasker, K.', 'Webb, B.', 'Velázquez-Muriel, J.',
'Tjioe, E.', 'Schneidman-Duhovny, D.', 'Peterson, B.',
'Sali, A.'],
doi='10.1371/journal.pbio.1001244')
pmi = ihm.Citation(
pmid='31396911',
title='Modeling Biological Complexes Using Integrative Modeling Platform.',
journal='Methods Mol Biol', volume=2022, page_range=(353, 377), year=2019,
authors=['Saltzberg, D.', 'Greenberg, C.H.', 'Viswanath, S.',
'Chemmama, I.', 'Webb, B.', 'Pellarin, R.', 'Echeverria, I.',
'Sali, A.'],
doi='10.1007/978-1-4939-9608-7_15')
modeller = ihm.Citation(
pmid='8254673',
title='Comparative protein modelling by satisfaction of '
'spatial restraints.',
journal='J Mol Biol', volume=234, page_range=(779, 815), year=1993,
authors=['Sali, A.', 'Blundell, T.L.'], doi='10.1006/jmbi.1993.1626')
psipred = ihm.Citation(
pmid='10493868',
title='Protein secondary structure prediction based on position-specific '
'scoring matrices.',
journal='J Mol Biol', volume=292, page_range=(195, 202), year=1999,
authors=['Jones, D.T.'], doi='10.1006/jmbi.1999.3091')
disopred = ihm.Citation(
pmid='25391399',
title='DISOPRED3: precise disordered region predictions with annotated '
'protein-binding activity.',
journal='Bioinformatics', volume=31, page_range=(857, 863), year=2015,
authors=['Jones, D.T.', 'Cozzetto D'], doi='10.1093/bioinformatics/btu744')
hhpred = ihm.Citation(
pmid='15980461',
title='The HHpred interactive server for protein homology detection '
'and structure prediction.',
journal='Nucleic Acids Res', volume=33, page_range=('W244', 'W248'),
year=2005, authors=['Söding, J.', 'Biegert, A.', 'Lupas, A.N.'],
doi='10.1093/nar/gki408')
relion = ihm.Citation(
pmid='23000701',
title='RELION: implementation of a Bayesian approach to cryo-EM '
'structure determination.',
journal='J Struct Biol', volume=180, page_range=(519, 530), year=2012,
authors=['Scheres, S.H.'], doi='10.1016/j.jsb.2012.09.006')
phyre2 = ihm.Citation(
pmid='25950237',
title='The Phyre2 web portal for protein modeling, prediction '
'and analysis.',
journal='Nat Protoc', volume=10, page_range=('845', '858'), year=2015,
authors=['Kelley, L.A.', 'Mezulis, S.', 'Yates, C.M.', 'Wass, M.N.',
'Sternberg, M.J.'],
doi='10.1038/nprot.2015.053')
swiss_model = ihm.Citation(
pmid='29788355',
title='SWISS-MODEL: homology modelling of protein structures '
'and complexes.',
journal='Nucleic Acids Res', volume=46, page_range=('W296', 'W303'),
year=2018,
authors=['Waterhouse, A.', 'Bertoni, M.', 'Bienert, S.', 'Studer, G.',
'Tauriello, G.', 'Gumienny, R.', 'Heer, F.T.', 'de Beer, T.A.P.',
'Rempfer, C.', 'Bordoli, L.', 'Lepore, R.', 'Schwede, T.'],
doi='10.1093/nar/gky427')
alphafold2 = ihm.Citation(
pmid='34265844',
title='Highly accurate protein structure prediction with AlphaFold.',
journal='Nature', volume=596, page_range=(583, 589), year=2021,
authors=['Jumper, J.', 'Evans, R.', 'Pritzel, A.', 'Green, T.',
'Figurnov, M.', 'Ronneberger, O.', 'Tunyasuvunakool, K.',
'Bates, R.', 'Zidek, A.', 'Potapenko, A.', 'Bridgland, A.',
'Meyer, C.', 'Kohl, S.A.A.', 'Ballard, A.J.', 'Cowie, A.',
'Romera-Paredes, B.', 'Nikolov, S.', 'Jain, R.', 'Adler, J.',
'Back, T.', 'Petersen, S.', 'Reiman, D.', 'Clancy, E.',
'Zielinski, M.', 'Steinegger, M.', 'Pacholska, M.',
'Berghammer, T.', 'Bodenstein, S.', 'Silver, D.', 'Vinyals, O.',
'Senior, A.W.', 'Kavukcuoglu, K.', 'Kohli, P.', 'Hassabis, D.'],
doi='10.1038/s41586-021-03819-2')
colabfold = ihm.Citation(
pmid='35637307',
title='ColabFold: making protein folding accessible to all.',
journal='Nature Methods', volume=19, page_range=(679, 682), year=2022,
authors=['Mirdita, M.', 'Schuetze, K.', 'Moriwaki, Y.', 'Heo, L.',
'Ovchinnikov, S.', 'Steinegger, M.'],
doi='10.1038/s41592-022-01488-1')
qmeandisco = ihm.Citation(
pmid='31697312',
title='QMEANDisCo-distance constraints applied on model quality '
'estimation.',
journal='Bioinformatics',
volume=36,
page_range=(1765, 1771),
year=2019,
authors=['Studer, G.', 'Rempfer, C.', 'Waterhouse, A.M.', 'Gumienny, R.',
'Haas, J.', 'Schwede, T.'],
doi='10.1093/bioinformatics/btz828')
mmseqs2 = ihm.Citation(
pmid='30615063',
title='MMseqs2 desktop and local web server app for fast, interactive '
'sequence searches.',
journal='Bioinformatics',
volume=35,
page_range=(2856, 2858),
year=2019,
authors=['Mirdita, M.', 'Steinegger, M.', 'Soeding, J.'],
doi='10.1093/bioinformatics/bty1057')
python-ihm-2.7/ihm/cross_linkers.py 0000664 0000000 0000000 00000007153 15035733372 0017503 0 ustar 00root root 0000000 0000000 """Chemical descriptors of commonly-used cross-linkers.
Each of these is an instance of the :class:`ihm.ChemDescriptor` class,
and so can be used anywhere these objects are required, generally for
:class:`ihm.restraint.CrossLinkRestraint`.
"""
import ihm
dss = ihm.ChemDescriptor(
'DSS', chemical_name='disuccinimidyl suberate',
smiles='C1CC(=O)N(C1=O)OC(=O)CCCCCCC(=O)ON2C(=O)CCC2=O',
inchi='1S/C16H20N2O8/c19-11-7-8-12(20)17(11)25-15(23)5-'
'3-1-2-4-6-16(24)26-18-13(21)9-10-14(18)22/h1-10H2',
inchi_key='ZWIBGKZDAWNIFC-UHFFFAOYSA-N')
dsg = ihm.ChemDescriptor(
'DSG', chemical_name='disuccinimidyl glutarate',
smiles='C1CC(=O)N(C1=O)OC(=O)CCCC(=O)ON2C(=O)CCC2=O',
inchi='1S/C13H14N2O8/c16-8-4-5-9(17)14(8)22-12(20)2-1-3-'
'13(21)23-15-10(18)6-7-11(15)19/h1-7H2',
inchi_key='LNQHREYHFRFJAU-UHFFFAOYSA-N')
bs3 = ihm.ChemDescriptor(
'BS3', chemical_name='bissulfosuccinimidyl suberate',
smiles='C1C(C(=O)N(C1=O)OC(=O)CCCCCCC(=O)ON2C(=O)CC(C2=O)S(=O)'
'(=O)O)S(=O)(=O)O',
inchi='1S/C16H20N2O14S2/c19-11-7-9(33(25,26)27)15(23)17(11)31'
'-13(21)5-3-1-2-4-6-14(22)32-18-12(20)8-10(16(18)24)'
'34(28,29)30/h9-10H,1-8H2,(H,25,26,27)(H,28,29,30)',
inchi_key='VYLDEYYOISNGST-UHFFFAOYSA-N')
dsso = ihm.ChemDescriptor(
'DSSO', chemical_name='disuccinimidyl sulfoxide',
smiles='O=C(CCS(CCC(ON1C(CCC1=O)=O)=O)=O)ON2C(CCC2=O)=O',
inchi='1S/C14H16N2O9S/c17-9-1-2-10(18)15(9)24-13(21)5-7-'
'26(23)8-6-14(22)25-16-11(19)3-4-12(16)20/h1-8H2',
inchi_key='XJSVVHDQSGMHAJ-UHFFFAOYSA-N')
edc = ihm.ChemDescriptor(
'EDC', chemical_name='1-ethyl-3-(3-dimethylaminopropyl)carbodiimide',
smiles='CCN=C=NCCCN(C)C',
inchi='1S/C8H17N3/c1-4-9-8-10-6-5-7-11(2)3/h4-7H2,1-3H3',
inchi_key='LMDZBCPBFSXMTL-UHFFFAOYSA-N')
dhso = ihm.ChemDescriptor(
'DHSO', chemical_name='dihydrazide sulfoxide',
smiles='NNC(=O)CC[S](=O)CCC(=O)NN',
inchi='1S/C6H14N4O3S/c7-9-5(11)1-3-14(13)4-2-6(12)10-8'
'/h1-4,7-8H2,(H,9,11)(H,10,12)',
inchi_key='XTCXQISMAWBOOT-UHFFFAOYSA-N')
bmso = ihm.ChemDescriptor(
'BMSO', chemical_name='bismaleimide sulfoxide',
smiles='O=C(CC[S](=O)CCC(=O)NCCN1C(=O)C=CC1=O)NCCN2C(=O)C=CC2=O',
inchi='1S/C18H22N4O7S/c23-13(19-7-9-21-15(25)1-2-16(21)26)5-'
'11-30(29)12-6-14(24)20-8-10-22-17(27)3-4-18(22)28/h1-'
'4H,5-12H2,(H,19,23)(H,20,24)',
inchi_key='PUNDHDZIOGBGHG-UHFFFAOYSA-N')
sda = ihm.ChemDescriptor(
'SDA', chemical_name="succinimidyl 4,4'-azipentanoate",
smiles='CC1(N=N1)CCC(ON2C(CCC2=O)=O)=O',
inchi='1S/C9H11N3O4/c1-9(10-11-9)5-4-8(15)16-12-6(13)2-3-'
'7(12)14/h2-5H2,1H3',
inchi_key=' SYYLQNPWAPHRFV-UHFFFAOYSA-N')
photo_leucine = ihm.ChemDescriptor(
'L-Photo-Leucine', chemical_name='L-Photo-Leucine',
smiles='CC1(C[C@H](N)C(O)=O)N=N1',
inchi='1S/C5H9N3O2/c1-5(7-8-5)'
'2-3(6)4(9)10/h3H,2,6H2,1H3,(H,9,10)/t3-/m0/s1',
inchi_key='MJRDGTVDJKACQZ-VKHMYHEASA-N')
dsbu = ihm.ChemDescriptor(
'DSBU', chemical_name='disuccinimidyl dibutyric urea',
smiles='O=C(NCCCC(=O)ON1C(=O)CCC1=O)NCCCC(=O)ON2C(=O)CCC2=O',
inchi='S/C17H22N4O9/c22-11-5-6-12(23)20(11)29-15(26)'
'3-1-9-18-17(28)19-10-2-4-16(27)30-21-13(24)7-8-14(21)'
'25/h1-10H2,(H2,18,19,28)',
inchi_key='XZSQCCZQFXUQCY-UHFFFAOYSA-N')
phoX = ihm.ChemDescriptor(
'DSPP', chemical_name='(3,5-bis(((2,5-dioxopyrrolidin-1-yl)oxy)'
'carbonyl) phenyl)phosphonic acid')
tbuphoX = ihm.ChemDescriptor(
'TBDSPP', chemical_name='tert-butyl disuccinimidyl'
'phenyl phosphonate, tBu-PhoX')
python-ihm-2.7/ihm/dataset.py 0000664 0000000 0000000 00000016577 15035733372 0016262 0 ustar 00root root 0000000 0000000 # coding=utf-8
"""Classes for handling experimental datasets used by mmCIF models.
"""
class Dataset:
"""A set of input data, for example, a crystal structure or EM map.
:param location: a pointer to where the
dataset is stored. This is usually a subclass of
:class:`~ihm.location.DatabaseLocation` if the dataset is
deposited in a database such as PDB or EMDB, or
:class:`~ihm.location.InputFileLocation` if the dataset is stored
in an external file.
:type location: :class:`ihm.location.Location`
:param str details: Text giving more information about the dataset.
"""
_eq_keys = ['_locations']
_allow_duplicates = False
# Datasets compare equal iff they are the same class, have the
# same attributes, and allow_duplicates=False
def _eq_vals(self):
if self._allow_duplicates:
return id(self)
else:
return tuple([self.__class__]
+ [getattr(self, x) for x in self._eq_keys])
def __eq__(self, other):
return self._eq_vals() == other._eq_vals()
def __hash__(self):
return hash(self._eq_vals())
data_type = 'Other'
def __init__(self, location, details=None):
# The dictionary actually allows for multiple locations for a given
# dataset. Support this via a private attribute so we can at least
# handle reading existing files. 'location' just references the
# first location in this list.
self._locations = ()
self.location, self.details = location, details
#: A list of :class:`Dataset` and/or :class:`TransformedDataset`
#: objects from which this one was derived.
#: For example, a 3D EM map may be derived from a set of 2D images.
self.parents = []
def _add_location(self, loc):
if self.location is None:
self.location = loc
else:
self._locations += (loc,)
def __get_location(self):
return self._locations[0]
def __set_location(self, val):
self._locations = (val, )
location = property(__get_location, __set_location,
doc="A pointer to where the dataset is stored")
def add_primary(self, dataset):
"""Add another Dataset from which this one was ultimately derived,
i.e. it is added as a parent, unless a parent already exists,
in which case it is added as a grandparent, and so on."""
root = self
while root.parents:
if len(root.parents) > 1:
raise ValueError("This dataset has multiple parents - don't "
"know which one to add to")
root = root.parents[0]
root.parents.append(dataset)
class TransformedDataset:
"""A :class:`Dataset` that should be rotated or translated before using.
This is typically used for derived datasets
(see :attr:`Dataset.parents`) where the derived dataset lies in a
different dataset from the parent (for example, it was moved to better
align with the model's reference frame or other experimental data).
The transformation that places the derived dataset on the parent
is recorded here.
:param dataset: The (parent) dataset.
:type dataset: :class:`Dataset`
:param transform: The rotation and translation that places a
derived dataset on this dataset.
:type transform: :class:`ihm.geometry.Transformation`
"""
def __init__(self, dataset, transform):
self.dataset, self.transform = dataset, transform
class DatasetGroup(list):
"""A set of :class:`Dataset` objects that are handled together.
This is implemented as a simple list.
:param sequence elements: Initial set of datasets.
:param str name: Short text name of this group.
:param str application: Text that shows how this group is used.
:param str details: Longer text that describes this group.
Normally a group is passed to one or more
:class:`~ihm.protocol.Protocol` or :class:`~ihm.analysis.Analysis`
objects, although unused groups can still be included in the file
if desired by adding them to :attr:`ihm.System.orphan_dataset_groups`.
"""
# For backwards compatibility with earlier versions of this class which
# didn't specify name/application/details
name = application = details = None
def __init__(self, elements=(), name=None, application=None, details=None):
super().__init__(elements)
self.name, self.application = name, application
self.details = details
class CXMSDataset(Dataset):
"""Processed cross-links from a CX-MS experiment"""
data_type = 'Crosslinking-MS data'
class MassSpecDataset(Dataset):
"""Raw mass spectrometry files such as peaklists"""
data_type = 'Mass Spectrometry data'
class HDXDataset(Dataset):
"""Data from a hydrogen/deuterium exchange experiment"""
data_type = 'H/D exchange data'
class PDBDataset(Dataset):
"""An experimentally-determined 3D structure as a set of a coordinates,
usually in a PDB or mmCIF file"""
data_type = 'Experimental model'
class ComparativeModelDataset(Dataset):
"""A 3D structure determined by comparative modeling"""
data_type = 'Comparative model'
class IntegrativeModelDataset(Dataset):
"""A 3D structure determined by integrative modeling"""
data_type = 'Integrative model'
class DeNovoModelDataset(Dataset):
"""A 3D structure determined by de novo modeling"""
data_type = 'De Novo model'
class NMRDataset(Dataset):
"""A nuclear magnetic resonance (NMR) dataset"""
data_type = 'NMR data'
class MutagenesisDataset(Dataset):
"""Mutagenesis data"""
data_type = 'Mutagenesis data'
class EMDensityDataset(Dataset):
"""A 3D electron microscopy dataset"""
data_type = '3DEM volume'
class EMMicrographsDataset(Dataset):
"""Raw 2D electron micrographs"""
data_type = 'EM raw micrographs'
class EM2DClassDataset(Dataset):
"""2DEM class average"""
data_type = '2DEM class average'
class SASDataset(Dataset):
"""SAS data"""
data_type = 'SAS data'
class FRETDataset(Dataset):
"""Single molecule data from a Förster resonance energy transfer
(FRET) experiment"""
data_type = 'Single molecule FRET data'
class EnsembleFRETDataset(Dataset):
"""Ensemble data from a Förster resonance energy transfer
(FRET) experiment"""
data_type = 'Ensemble FRET data'
class YeastTwoHybridDataset(Dataset):
"""Yeast two-hybrid data"""
data_type = 'Yeast two-hybrid screening data'
class GeneticInteractionsDataset(Dataset):
"""Quantitative measurements of genetic interactions"""
data_type = 'Quantitative measurements of genetic interactions'
class EPRDataset(Dataset):
"""Electron paramagnetic resonance (EPR) data"""
data_type = 'EPR data'
class XRayDiffractionDataset(Dataset):
"""Data from X-ray diffraction"""
data_type = 'X-ray diffraction data'
class HydroxylRadicalFootprintingDataset(Dataset):
"""Data from hydroxyl radical footprinting"""
data_type = 'Hydroxyl radical footprinting data'
class DNAFootprintingDataset(Dataset):
"""Data from DNA footprinting"""
data_type = 'DNA footprinting data'
class PredictedContactsDataset(Dataset):
"""A collection of predicted contacts"""
data_type = 'Predicted contacts'
python-ihm-2.7/ihm/dictionary.py 0000664 0000000 0000000 00000040662 15035733372 0016772 0 ustar 00root root 0000000 0000000 """Classes to read in and represent an mmCIF extension dictionary"""
import ihm.reader
import ihm.format
import ihm.format_bcif
import re
import itertools
from ihm.reader import Handler
# Handle special values for CIF data items ('.', '?', or missing entirely)
class _CifSpecialValue:
pass
class _NotInFileCif(_CifSpecialValue):
pass
class _OmittedCif(_CifSpecialValue):
pass
class _UnknownCif(_CifSpecialValue):
pass
class _KeywordEnumeration(set):
"""Set of possible values for a keyword. Can be case insensitive."""
def __init__(self):
super().__init__()
self.case_sensitive = True
self._upper_set = None
def add(self, item):
self._upper_set = None # Invalidate upper_set
super().add(item)
def __contains__(self, item):
if self.case_sensitive:
return super().__contains__(item)
else:
if self._upper_set is None:
self._upper_set = set(x.upper() for x in self)
return item.upper() in self._upper_set
class ValidatorError(Exception):
"""Exception raised if a file fails to validate.
See :meth:`Dictionary.validate`."""
pass
class _ValidatorCategoryHandler(Handler):
# Handle special values for CIF data items ('.', '?', or missing entirely)
# explicitly, rather the default behavior (mapping to None or '?')
not_in_file = _NotInFileCif()
omitted = _OmittedCif()
unknown = _UnknownCif()
def __init__(self, sysr, category):
super().__init__(sysr)
self.category = '_' + category.name
self.category_obj = category
self._keys = [k.lower() for k in category.keywords.keys()]
self.link_keys = set()
li = sysr.dictionary.linked_items
for link in itertools.chain(li.keys(), li.values()):
cat, key = link.split('.')
if cat == self.category:
self.link_keys.add(key)
def __call__(self, *args):
self.sysr.validate_data(self.category_obj, self._keys, args,
self.link_keys)
class _ValidatorReader:
"""Track information used for validation while reading an mmCIF file"""
def __init__(self, dictionary):
self.dictionary = dictionary
self._seen_categories = set()
self._unknown_categories = set()
self._unknown_keywords = set()
# Keep track of all values (IDs) seen for keys that are involved in
# parent-child relationships
self._seen_ids = {}
li = dictionary.linked_items
for link in itertools.chain(li.keys(), li.values()):
self._seen_ids[link] = set()
self.errors = []
def validate_data(self, category, keywords, args, link_keys):
self._seen_categories.add(category.name)
for key, value in zip(keywords, args):
if key in link_keys and not isinstance(value, _CifSpecialValue):
self._seen_ids["_%s.%s" % (category.name, key)].add(value)
kwobj = category.keywords[key]
if kwobj.mandatory:
if isinstance(value, _UnknownCif):
self.errors.append("Mandatory keyword %s.%s cannot have "
"value '?'" % (category.name, key))
elif isinstance(value, _NotInFileCif):
self.errors.append("Mandatory keyword %s.%s cannot be "
"missing from the file"
% (category.name, key))
if isinstance(value, _CifSpecialValue):
continue
if kwobj.enumeration and value not in kwobj.enumeration:
self.errors.append("Keyword %s.%s value %s is not a valid "
"enumerated value (options are %s)"
% (category.name, key, value,
", ".join(sorted(kwobj.enumeration))))
if kwobj.item_type and not kwobj.item_type.regex.match(str(value)):
self.errors.append("Keyword %s.%s value %s does not match "
"item type (%s) regular expression (%s)"
% (category.name, key, value,
kwobj.item_type.name,
kwobj.item_type.construct))
def _check_mandatory_categories(self):
all_categories = self.dictionary.categories
mandatory_categories = [c.name for c in all_categories.values()
if c.mandatory]
missing = set(mandatory_categories) - self._seen_categories
if missing:
self.errors.append(
"The following mandatory categories are missing "
"in the file: %s" % ", ".join(sorted(missing)))
def _check_linked_items(self):
"""Check to make sure any ID referenced by a child item is defined
in the parent"""
for child, parent in self.dictionary.linked_items.items():
if not self._seen_ids[child] <= self._seen_ids[parent]:
# Strip _ prefix from category
cat, key = parent[1:].split('.')
# Only warn about relationships where the parent is defined
# in this dictionary (e.g. a lot of IHM items point back
# to PDBx categories)
# Chemical component dictionary checks are handled elsewhere;
# the chem_comp_* categories don't need to be fully populated
if cat in self.dictionary.categories \
and not cat.startswith('chem_comp_'):
missing = sorted(self._seen_ids[child]
- self._seen_ids[parent])
self.errors.append(
"The following IDs referenced by %s "
"were not defined in the parent category (%s): %s"
% (child, parent, ", ".join(missing)))
def _check_unknown(self):
"""Report errors for any unknown keywords or categories"""
if self._unknown_categories:
self.errors.append(
"The following categories are not defined in the "
"dictionary: %s"
% ", ".join(sorted(self._unknown_categories)))
if self._unknown_keywords:
self.errors.append(
"The following keywords are not defined in the dictionary: %s"
% ", ".join(sorted(self._unknown_keywords)))
def report_errors(self):
self._check_mandatory_categories()
self._check_linked_items()
self._check_unknown()
if self.errors:
raise ValidatorError("\n\n".join(self.errors))
class _UnknownCategoryHandler:
def __init__(self, sysr):
self.sysr = sysr
def __call__(self, catname, line):
self.sysr._unknown_categories.add(catname)
class _UnknownKeywordHandler:
def __init__(self, sysr):
self.sysr = sysr
def __call__(self, catname, keyname, line):
self.sysr._unknown_keywords.add("%s.%s" % (catname, keyname))
class Dictionary:
"""Representation of an mmCIF dictionary.
See :func:`read` to create a Dictionary from a file.
Multiple Dictionaries can be added together to yield a Dictionary
that includes all the data in the original Dictionaries.
See the `validator example `_
for an example of using this class.""" # noqa: E501
def __init__(self):
#: Mapping from name to :class:`Category` objects
self.categories = {}
#: Links between items; keys are children, values are parents e.g.
#: ``linked_items['_ihm_starting_model_details.asym_id'] =
#: '_struct_asym.id'``
self.linked_items = {}
def __iadd__(self, other):
for name, cat in other.categories.items():
if name in self.categories:
# If both dictionaries contain information on the same
# category, combine it
self.categories[name]._update(cat)
else:
self.categories[name] = cat
self.linked_items.update(other.linked_items)
return self
def __add__(self, other):
d = Dictionary()
d += self
d += other
return d
def validate(self, fh, format='mmCIF'):
"""Validate the given file against this dictionary.
:param file fh: The file handle to read from.
:param str format: The format of the file. This can be 'mmCIF' (the
default) for the (text-based) mmCIF format or 'BCIF' for
BinaryCIF.
:raises: :class:`ValidatorError` if the file fails to validate.
"""
reader_map = {'mmCIF': ihm.format.CifReader,
'BCIF': ihm.format_bcif.BinaryCifReader}
s = _ValidatorReader(self)
uchandler = _UnknownCategoryHandler(s)
ukhandler = _UnknownKeywordHandler(s)
r = reader_map[format](fh, {}, unknown_category_handler=uchandler,
unknown_keyword_handler=ukhandler)
handlers = [_ValidatorCategoryHandler(s, cat)
for cat in self.categories.values()]
r.category_handler = dict((h.category, h) for h in handlers)
# Read all data blocks
while r.read_file():
pass
s.report_errors()
class Category:
"""Representation of a single category in a :class:`Dictionary`."""
def __init__(self):
#: Category name
self.name = None
#: Human-readable text
self.description = None
#: Mapping from name to :class:`Keyword` objects
self.keywords = {}
#: True iff this category is required in a compliant mmCIF file
self.mandatory = None
def _update(self, other):
"""Update with information from another Category object"""
assert other.name == self.name
self.keywords.update(other.keywords)
self.description = self.description or other.description
if self.mandatory is None:
# e.g. if other.mandatory is False and self.mandatory is None
# we want to use False; "None or False" returns None.
self.mandatory = other.mandatory
else:
self.mandatory = self.mandatory or other.mandatory
class _DoNothingRegEx:
"""A mock regex object which always matches"""
def match(self, value):
return True
class ItemType:
"""Represent the type of a data item.
This keeps the set of valid strings for values of a given
:class:`Keyword`. For example, integer values can only contain
the digits 0-9 with an optional +/- prefix."""
def __init__(self, name, primitive_code, construct):
self.name = name
# The dictionary only defines matches against ASCII characters.
# Extend this to match any Unicode "word" character so we don't
# fail to validate as soon as we see an accented character.
self.construct = construct.replace('A-Za-z0-9', r'\w')
self.primitive_code = primitive_code
# Ensure that regex matches the entire value
try:
self.regex = re.compile(self.construct + '$')
except re.error:
# Some CIF regexes aren't valid Python regexes; skip these
self.regex = _DoNothingRegEx()
case_sensitive = property(lambda x: x.primitive_code != 'uchar',
doc='True iff this type is case sensitive')
class Keyword:
"""Representation of a single keyword in a :class:`Category`."""
def __init__(self):
#: Keyword name
self.name = None
#: True iff this keyword is required in a compliant mmCIF file
self.mandatory = None
#: Set of acceptable values, or None
self.enumeration = None
#: :class:`ItemType` for this keyword, or None
self.item_type = None
class _DictionaryReader:
"""Track information for a Dictionary being read from a file."""
def __init__(self):
self.dictionary = Dictionary()
self.item_types = {} # Mapping from name to ItemType object
self._reset_category()
self._reset_keyword()
def _reset_category(self):
self.category = Category()
self.category_good = False
def _reset_keyword(self):
self._keyword_info = []
self._keyword_item_type = None
self._keyword_enumeration = None
self.keyword_good = False
def end_save_frame(self):
if self.keyword_good:
for (name, category, mandatory) in self._keyword_info:
k = Keyword()
k.name, k.mandatory = name.lower(), mandatory
k.enumeration = self._keyword_enumeration
k.item_type = self._keyword_item_type
# If the owning category does not exist, make it; this can
# happen if we extend something in the core dictionary
# (e.g. atom_site.ihm_model_id)
if category not in self.dictionary.categories:
c = Category()
c.name = category
self.dictionary.categories[c.name] = c
else:
c = self.dictionary.categories[category]
c.keywords[k.name] = k
self._reset_keyword()
if self.category_good:
c = self.category
if c.name in self.dictionary.categories:
# Handle case where keywords were defined before category
self.dictionary.categories[c.name]._update(c)
else:
self.dictionary.categories[c.name] = c
self._reset_category()
class _CategoryHandler(Handler):
category = '_category'
def __call__(self, id, description, mandatory_code: bool):
c = self.sysr.category
c.name, c.description = id, description
c.mandatory = mandatory_code
self.sysr.category_good = True
def end_save_frame(self):
self.sysr.end_save_frame()
class _ItemHandler(Handler):
category = '_item'
def __call__(self, name, category_id, mandatory_code: bool):
cat, name = name.split('.')
ki = self.sysr._keyword_info
# If category_id is missing, strip leading _ from the keyword's
# own category name and use that instead
if category_id is None:
category_id = cat[1:]
ki.append((name, category_id, mandatory_code))
self.sysr.keyword_good = True
class _ItemEnumerationHandler(Handler):
category = '_item_enumeration'
def __call__(self, value):
if self.sysr._keyword_enumeration is None:
self.sysr._keyword_enumeration = _KeywordEnumeration()
self.sysr._keyword_enumeration.add(value)
class _ItemTypeListHandler(Handler):
category = '_item_type_list'
def __call__(self, code, primitive_code, construct):
it = ItemType(code, primitive_code, construct)
self.sysr.item_types[it.name] = it
class _ItemTypeHandler(Handler):
category = '_item_type'
def __call__(self, code):
self.sysr._keyword_item_type = code
def finalize(self):
for c in self.sysr.dictionary.categories.values():
for k in c.keywords.values():
if k.item_type is not None:
# Map unrecognized type codes to None
# For example, the ihm dictionary often uses the
# 'atcode' type which is not defined in the dictionary
# itself (but presumably is in the base PDBx dict)
k.item_type = self.sysr.item_types.get(k.item_type)
if k.item_type is not None and k.enumeration:
k.enumeration.case_sensitive = k.item_type.case_sensitive
class _ItemLinkedHandler(Handler):
category = '_item_linked'
def __call__(self, child_name, parent_name):
self.sysr.dictionary.linked_items[child_name] = parent_name
def read(fh):
"""Read dictionary data from the mmCIF file handle `fh`.
:return: The dictionary data.
:rtype: :class:`Dictionary`
"""
r = ihm.format.CifReader(fh, {})
s = _DictionaryReader()
handlers = [_CategoryHandler(s), _ItemHandler(s),
_ItemEnumerationHandler(s),
_ItemTypeListHandler(s), _ItemTypeHandler(s),
_ItemLinkedHandler(s)]
r.category_handler = dict((h.category, h) for h in handlers)
r.read_file()
for h in handlers:
h.finalize()
return s.dictionary
python-ihm-2.7/ihm/dumper.py 0000664 0000000 0000000 00000562455 15035733372 0016132 0 ustar 00root root 0000000 0000000 """Utility classes to dump out information in mmCIF or BinaryCIF format"""
import re
import os
import numbers
import collections
import operator
import itertools
import warnings
import datetime
import ihm.format
import ihm.format_bcif
import ihm.model
import ihm.representation
import ihm.source
import ihm.multi_state_scheme
import ihm.flr
import ihm.dataset
from . import util
from . import location
from . import restraint
from . import geometry
def _is_subrange(rng1, rng2):
"""Return True iff rng1 is wholly inside rng2"""
# Nonpolymers should have an empty range
if rng1 == (None, None) or rng2 == (None, None):
return rng1 == rng2
else:
return rng1[0] >= rng2[0] and rng1[1] <= rng2[1]
class Dumper:
"""Base class for helpers to dump output to mmCIF or BinaryCIF.
See :func:`write`."""
# Set to False to disable dump-time sanity checks
_check = True
def __init__(self):
pass
def finalize(self, system):
"""Called for all dumpers prior to :meth:`dump`.
This can be used to assign numeric IDs to objects, check for
sanity, etc."""
pass
def dump(self, system, writer):
"""Use `writer` to write information about `system` to
mmCIF or BinaryCIF.
:param system: The :class:`ihm.System` object containing all
information about the system.
:type system: :class:`ihm.System`
:param writer: Utility class to write data to the output file.
:type writer: :class:`ihm.format.CifWriter` or
:class:`ihm.format_bcif.BinaryCifWriter`.
"""
pass
def _get_transform(rot_matrix, tr_vector):
"""Return a dict encoding a transform, suitable for passing to
loop.write()"""
if rot_matrix in (None, ihm.unknown):
rm = [[rot_matrix for _ in range(3)] for _ in range(3)]
else:
# mmCIF writer usually outputs floats to 3 decimal
# places, but we need more precision for rotation
# matrices
rm = [["%.6f" % e for e in rot_matrix[i]] for i in range(3)]
if tr_vector in (None, ihm.unknown):
tr_vector = [tr_vector for _ in range(3)]
return {'rot_matrix11': rm[0][0], 'rot_matrix21': rm[1][0],
'rot_matrix31': rm[2][0], 'rot_matrix12': rm[0][1],
'rot_matrix22': rm[1][1], 'rot_matrix32': rm[2][1],
'rot_matrix13': rm[0][2], 'rot_matrix23': rm[1][2],
'rot_matrix33': rm[2][2], 'tr_vector1': tr_vector[0],
'tr_vector2': tr_vector[1], 'tr_vector3': tr_vector[2]}
class _EntryDumper(Dumper):
def dump(self, system, writer):
# Write CIF header (so this dumper should always be first)
writer.start_block(re.subn('[^0-9a-zA-Z_-]', '', system.id)[0])
with writer.category("_entry") as lp:
lp.write(id=system.id)
class _CollectionDumper(Dumper):
def dump(self, system, writer):
with writer.loop("_ihm_entry_collection",
["id", "name", "details"]) as lp:
for c in system.collections:
lp.write(id=c.id, name=c.name, details=c.details)
class _AuditConformDumper(Dumper):
URL = ("https://raw.githubusercontent.com/" +
"ihmwg/IHMCIF/%s/dist/mmcif_ihm.dic")
def dump(self, system, writer):
with writer.category("_audit_conform") as lp:
# Update to match the version of the IHM dictionary we support:
lp.write(dict_name="mmcif_ihm.dic", dict_version="1.28",
dict_location=self.URL % "44ed2c3")
class _StructDumper(Dumper):
def dump(self, system, writer):
with writer.category("_struct") as lp:
mth = system.structure_determination_methodology
lp.write(title=system.title, entry_id=system.id,
pdbx_structure_determination_methodology=mth,
pdbx_model_details=system.model_details)
class _CommentDumper(Dumper):
def dump(self, system, writer):
for comment in system.comments:
writer.write_comment(comment)
class _SoftwareDumper(Dumper):
def finalize(self, system):
seen_software = {}
self._software_by_id = []
for s in system._all_software():
util._remove_id(s)
for s in system._all_software():
util._assign_id(s, seen_software, self._software_by_id)
def dump(self, system, writer):
# todo: specify these attributes in only one place (e.g. in the
# Software class)
with writer.loop("_software",
["pdbx_ordinal", "name", "classification",
"description", "version", "type", "location",
"citation_id"]) as lp:
for s in self._software_by_id:
lp.write(pdbx_ordinal=s._id, name=s.name,
classification=s.classification,
description=s.description, version=s.version,
type=s.type, location=s.location,
citation_id=s.citation._id if s.citation else None)
class _CitationDumper(Dumper):
def finalize(self, system):
primaries = []
non_primaries = []
for c in system._all_citations():
(primaries if c.is_primary else non_primaries).append(c)
# Put primary citations first in list
self._all_citations = primaries + non_primaries
for nc, c in enumerate(self._all_citations):
c._id = nc + 1
if primaries:
if len(primaries) > 1:
raise ValueError(
"Multiple Citations with is_primary=True; only one can "
"be primary: %s" % primaries)
else:
primaries[0]._id = 'primary'
def dump(self, system, writer):
self.dump_citations(self._all_citations, writer)
self.dump_authors(self._all_citations, writer)
def dump_citations(self, citations, writer):
with writer.loop("_citation",
["id", "title", "journal_abbrev", "journal_volume",
"page_first", "page_last", "year",
"pdbx_database_id_PubMed",
"pdbx_database_id_DOI"]) as lp:
for c in citations:
if isinstance(c.page_range, (tuple, list)):
page_first, page_last = c.page_range
else:
page_first = c.page_range
page_last = None
lp.write(id=c._id, title=c.title, journal_abbrev=c.journal,
journal_volume=c.volume, page_first=page_first,
page_last=page_last, year=c.year,
pdbx_database_id_PubMed=c.pmid,
pdbx_database_id_DOI=c.doi)
def dump_authors(self, citations, writer):
with writer.loop("_citation_author",
["citation_id", "name", "ordinal"]) as lp:
ordinal = itertools.count(1)
for c in citations:
for a in c.authors:
lp.write(citation_id=c._id, name=a, ordinal=next(ordinal))
class _AuditAuthorDumper(Dumper):
def _get_citation_authors(self, system):
# If system.authors is empty, get the set of all citation authors
# instead
seen_authors = set()
# Only look at explicitly-added citations (since these are likely to
# describe the modeling) not that describe a method or a piece of
# software we used (system._all_citations())
for c in system.citations:
for a in c.authors:
if a not in seen_authors:
seen_authors.add(a)
yield a
def dump(self, system, writer):
authors = system.authors or self._get_citation_authors(system)
with writer.loop("_audit_author",
["name", "pdbx_ordinal"]) as lp:
for n, author in enumerate(authors):
lp.write(name=author, pdbx_ordinal=n + 1)
class _AuditRevisionDumper(Dumper):
def finalize(self, system):
for n, rev in enumerate(system.revisions):
rev._id = n + 1
def dump(self, system, writer):
self._dump_history(system, writer)
self._dump_details(system, writer)
self._dump_groups(system, writer)
self._dump_categories(system, writer)
self._dump_items(system, writer)
def _dump_history(self, system, writer):
with writer.loop("_pdbx_audit_revision_history",
["ordinal", "data_content_type", "major_revision",
"minor_revision", "revision_date"]) as lp:
for rev in system.revisions:
lp.write(ordinal=rev._id,
data_content_type=rev.data_content_type,
major_revision=rev.major, minor_revision=rev.minor,
revision_date=datetime.date.isoformat(rev.date)
if rev.date else rev.date)
def _dump_details(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_pdbx_audit_revision_details",
["ordinal", "revision_ordinal", "data_content_type",
"provider", "type", "description"]) as lp:
for rev in system.revisions:
for d in rev.details:
lp.write(ordinal=next(ordinal), revision_ordinal=rev._id,
data_content_type=rev.data_content_type,
provider=d.provider, type=d.type,
description=d.description)
def _dump_groups(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_pdbx_audit_revision_group",
["ordinal", "revision_ordinal", "data_content_type",
"group"]) as lp:
for rev in system.revisions:
for group in rev.groups:
lp.write(ordinal=next(ordinal), revision_ordinal=rev._id,
data_content_type=rev.data_content_type,
group=group)
def _dump_categories(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_pdbx_audit_revision_category",
["ordinal", "revision_ordinal", "data_content_type",
"category"]) as lp:
for rev in system.revisions:
for category in rev.categories:
lp.write(ordinal=next(ordinal), revision_ordinal=rev._id,
data_content_type=rev.data_content_type,
category=category)
def _dump_items(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_pdbx_audit_revision_item",
["ordinal", "revision_ordinal", "data_content_type",
"item"]) as lp:
for rev in system.revisions:
for item in rev.items:
lp.write(ordinal=next(ordinal), revision_ordinal=rev._id,
data_content_type=rev.data_content_type,
item=item)
class _DataUsageDumper(Dumper):
def dump(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_pdbx_data_usage",
["id", "type", "details", "url", "name"]) as lp:
for d in system.data_usage:
lp.write(id=next(ordinal), type=d.type,
details=d.details, url=d.url, name=d.name)
class _GrantDumper(Dumper):
def dump(self, system, writer):
with writer.loop("_pdbx_audit_support",
["funding_organization", "country", "grant_number",
"ordinal"]) as lp:
for n, grant in enumerate(system.grants):
lp.write(funding_organization=grant.funding_organization,
country=grant.country,
grant_number=grant.grant_number, ordinal=n + 1)
class _DatabaseDumper(Dumper):
def dump(self, system, writer):
with writer.loop("_database_2",
["database_id", "database_code",
"pdbx_database_accession", "pdbx_DOI"]) as lp:
for d in system.databases:
lp.write(database_id=d.id, database_code=d.code,
pdbx_DOI=d.doi,
pdbx_database_accession=d.accession)
class _DatabaseStatusDumper(Dumper):
def dump(self, system, writer):
with writer.category("_pdbx_database_status") as lp:
# Pass through all data items from a Python dict
lp.write(**system.database_status._map)
class _ChemCompDumper(Dumper):
def dump(self, system, writer):
comps = frozenset(itertools.chain(
(comp for e in system.entities for comp in e.sequence),
system._orphan_chem_comps))
with writer.loop("_chem_comp", ["id", "type", "name",
"formula", "formula_weight"]) as lp:
for comp in sorted(comps, key=operator.attrgetter('id')):
if comp.ccd or comp.descriptors:
raise ValueError(
"Non-default values for 'ccd' or 'descriptors' are "
"not supported by the IHM dictionary for %s" % comp)
lp.write(id=comp.id, type=comp.type, name=comp.name,
formula=comp.formula,
formula_weight=comp.formula_weight)
class _ChemDescriptorDumper(Dumper):
def finalize(self, system):
seen_desc = {}
# Assign IDs to all descriptors
self._descriptor_by_id = []
for d in system._all_chem_descriptors():
util._remove_id(d)
for d in system._all_chem_descriptors():
util._assign_id(d, seen_desc, self._descriptor_by_id)
def dump(self, system, writer):
with writer.loop(
"_ihm_chemical_component_descriptor",
["id", "auth_name", "chemical_name",
"common_name", "smiles", "smiles_canonical", "inchi",
"inchi_key"]) as lp:
# note that we don't write out chem_comp_id; this is no longer
# present in the dictionary
for d in self._descriptor_by_id:
lp.write(id=d._id, auth_name=d.auth_name,
chemical_name=d.chemical_name,
common_name=d.common_name, smiles=d.smiles,
smiles_canonical=d.smiles_canonical, inchi=d.inchi,
inchi_key=d.inchi_key)
class _EntityDumper(Dumper):
def finalize(self, system):
# Assign IDs and check for duplicates or empty entities
seen = {}
empty = []
for num, entity in enumerate(system.entities):
if self._check and entity in seen and len(entity.sequence) > 0:
raise ValueError("Duplicate entity %s found" % entity)
if len(entity.sequence) == 0:
empty.append(entity)
entity._id = num + 1
seen[entity] = None
if empty:
warnings.warn(
"At least one empty Entity (with no sequence) was found: %s"
% empty)
def dump(self, system, writer):
# Count all molecules (if any) for each entity
num_molecules = collections.defaultdict(lambda: 0)
for asym in system.asym_units:
num_molecules[asym.entity._id] += asym.number_of_molecules
with writer.loop("_entity",
["id", "type", "src_method", "pdbx_description",
"formula_weight", "pdbx_number_of_molecules",
"details"]) as lp:
for entity in system.entities:
lp.write(id=entity._id, type=entity.type,
src_method=entity.src_method,
pdbx_description=entity.description,
formula_weight=entity.formula_weight,
pdbx_number_of_molecules=num_molecules[entity._id],
details=entity.details)
def _assign_src_ids(system, srccls):
"""Assign IDs to all entity sources of type `srccls`."""
# Assign IDs
seen_src = {}
src_by_id = []
for e in system.entities:
if isinstance(e.source, srccls):
util._remove_id(e.source)
for e in system.entities:
if isinstance(e.source, srccls):
util._assign_id(e.source, seen_src, src_by_id)
class _EntitySrcGenDumper(Dumper):
def finalize(self, system):
_assign_src_ids(system, ihm.source.Manipulated)
def dump(self, system, writer):
with writer.loop(
"_entity_src_gen",
["entity_id", "pdbx_src_id", "pdbx_gene_src_ncbi_taxonomy_id",
"pdbx_gene_src_scientific_name",
"gene_src_common_name", "gene_src_strain",
"pdbx_host_org_ncbi_taxonomy_id",
"pdbx_host_org_scientific_name",
"host_org_common_name", "pdbx_host_org_strain"]) as lp:
for e in system.entities:
if isinstance(e.source, ihm.source.Manipulated):
self._dump_source(lp, e)
def _dump_source(self, lp, e):
s = e.source
# Skip output if all fields are blank
if s.gene is None and s.host is None:
return
lp.write(entity_id=e._id, pdbx_src_id=s._id,
pdbx_gene_src_ncbi_taxonomy_id=s.gene.ncbi_taxonomy_id
if s.gene else None,
pdbx_gene_src_scientific_name=s.gene.scientific_name
if s.gene else None,
gene_src_strain=s.gene.strain if s.gene else None,
gene_src_common_name=s.gene.common_name if s.gene else None,
pdbx_host_org_ncbi_taxonomy_id=s.host.ncbi_taxonomy_id
if s.host else None,
pdbx_host_org_scientific_name=s.host.scientific_name
if s.host else None,
host_org_common_name=s.host.common_name if s.host else None,
pdbx_host_org_strain=s.host.strain if s.host else None)
class _EntitySrcNatDumper(Dumper):
def finalize(self, system):
_assign_src_ids(system, ihm.source.Natural)
def dump(self, system, writer):
with writer.loop(
"_entity_src_nat",
["entity_id", "pdbx_src_id", "pdbx_ncbi_taxonomy_id",
"pdbx_organism_scientific", "common_name", "strain"]) as lp:
for e in system.entities:
s = e.source
if isinstance(s, ihm.source.Natural):
lp.write(entity_id=e._id, pdbx_src_id=s._id,
pdbx_ncbi_taxonomy_id=s.ncbi_taxonomy_id,
pdbx_organism_scientific=s.scientific_name,
common_name=s.common_name, strain=s.strain)
class _EntitySrcSynDumper(Dumper):
def finalize(self, system):
_assign_src_ids(system, ihm.source.Synthetic)
def dump(self, system, writer):
# Note that _pdbx_entity_src_syn.strain is not used in current PDB
# entries
with writer.loop(
"_pdbx_entity_src_syn",
["entity_id", "pdbx_src_id", "ncbi_taxonomy_id",
"organism_scientific", "organism_common_name"]) as lp:
for e in system.entities:
s = e.source
if isinstance(s, ihm.source.Synthetic):
lp.write(entity_id=e._id, pdbx_src_id=s._id,
ncbi_taxonomy_id=s.ncbi_taxonomy_id,
organism_scientific=s.scientific_name,
organism_common_name=s.common_name)
def _prettyprint_seq(seq, width):
"""Join the sequence of strings together and generate a set of
lines that don't exceed the provided width."""
current_width = 0
line = []
for s in seq:
if line and current_width + len(s) > width:
yield ''.join(line)
line = []
current_width = 0
line.append(s)
current_width += len(s)
if line:
yield ''.join(line)
def _get_comp_id(entity, seq_id):
"""Get the component ID for a given seq_id, or ? if it is out of range"""
if 1 <= seq_id <= len(entity.sequence):
return entity.sequence[seq_id - 1].id
else:
return ihm.unknown
class _StructRefDumper(Dumper):
def finalize(self, system):
# List of (entity, ref) by ID
self._refs_by_id = []
seen_refs = {}
align_id = itertools.count(1)
for e in system.entities:
for r in e.references:
util._remove_id(r)
for e in system.entities:
# Two refs are not considered duplicated if they relate to
# different entities, so add entity to reference signature
for r in e.references:
sig = (id(e), r._signature())
util._assign_id(r, seen_refs, self._refs_by_id, seen_obj=sig,
by_id_obj=(e, r))
for a in r._get_alignments():
a._id = next(align_id)
def _get_sequence(self, reference):
"""Get the sequence string"""
if reference.sequence in (None, ihm.unknown):
return reference.sequence
# We only want the subset of the sequence that overlaps with
# our entities
db_begin = min(a.db_begin for a in reference._get_alignments())
db_end = max(a.db_end for a in reference._get_alignments())
fullrefseq = list(util._get_codes(reference.sequence))
# Split into lines to get tidier CIF output
return "\n".join(_prettyprint_seq(
(code if len(code) == 1 else '(%s)' % code
for code in fullrefseq[db_begin - 1:db_end]), 70))
def _check_seq_dif(self, entity, ref, align):
"""Check all SeqDif objects for the Entity sequence. Return the mutated
sequence (to match the reference)."""
entseq = [comp.code_canonical for comp in entity.sequence]
for sd in align.seq_dif:
if sd.seq_id < 1 or sd.seq_id > len(entseq):
raise IndexError("SeqDif.seq_id for %s is %d, out of "
"range 1-%d"
% (entity, sd.seq_id, len(entseq)))
if (sd.monomer
and sd.monomer.code_canonical != entseq[sd.seq_id - 1]):
raise ValueError("SeqDif.monomer one-letter code (%s) does "
"not match that in %s (%s at position %d)"
% (sd.monomer.code_canonical, entity,
entseq[sd.seq_id - 1], sd.seq_id))
if sd.db_monomer:
entseq[sd.seq_id - 1] = sd.db_monomer.code_canonical
return entseq
def _get_ranges(self, entity, fullrefseq, align):
"""Get the sequence ranges for an Entity and Reference"""
return ((align.entity_begin,
len(entity.sequence) if align.entity_end is None
else align.entity_end),
(align.db_begin,
len(fullrefseq) if align.db_end is None else align.db_end))
def _check_reference_sequence(self, entity, ref):
"""Make sure that the Entity and Reference sequences match"""
for align in ref._get_alignments():
self._check_alignment(entity, ref, align)
def _check_alignment(self, entity, ref, align):
"""Make sure that an alignment makes sense"""
if ref.sequence in (None, ihm.unknown):
# We just have to trust the range if the ref sequence is blank
return
# Our sanity-checking logic doesn't currently support insertions
# or deletions
if any(sd.details in ('insertion', 'deletion')
for sd in align.seq_dif):
return
entseq = self._check_seq_dif(entity, ref, align)
# Reference sequence may contain non-standard residues, so parse them
# out; e.g. "FLGHGGN(WP9)LHFVQLAS"
fullrefseq = list(util._get_codes(ref.sequence))
def check_rng(rng, seq, rngstr, obj):
if any(r < 1 or r > len(seq) for r in rng):
raise IndexError("Alignment.%s for %s is (%d-%d), "
"out of range 1-%d"
% (rngstr, obj, rng[0], rng[1], len(seq)))
entity_rng, db_rng = self._get_ranges(entity, fullrefseq, align)
check_rng(entity_rng, entseq, "entity_begin,entity_end", entity)
check_rng(db_rng, fullrefseq, "db_begin,db_end", ref)
matchlen = min(entity_rng[1] - entity_rng[0], db_rng[1] - db_rng[0])
entseq = entseq[entity_rng[0] - 1:entity_rng[0] + matchlen - 1]
refseq = fullrefseq[db_rng[0] - 1:db_rng[0] + matchlen - 1]
# Entity sequence is canonical so likely won't match any non-standard
# residue (anything of length > 1), so just skip checks of these
def matchseq(a, b):
return a == b or len(a) > 1 or len(b) > 1
if (len(refseq) != len(entseq)
or not all(matchseq(a, b) for (a, b) in zip(refseq, entseq))):
raise ValueError(
"Reference sequence from %s does not match entity canonical"
" sequence (after mutations) for %s - you may need to "
"adjust Alignment.db_begin,db_end (%d-%d), "
"Alignment.entity_begin,entity_end (%d-%d), "
"or add to Alignment.seq_dif:\n"
"Reference: %s\nEntity: %s\n"
"Match: %s"
% (ref, entity, db_rng[0], db_rng[1],
entity_rng[0], entity_rng[1],
# Use "X" for any non-standard residue so the alignment
# lines up
''.join(x if len(x) == 1 else 'X' for x in refseq),
''.join(entseq),
''.join('*' if matchseq(a, b) else ' '
for (a, b) in zip(refseq, entseq))))
def dump(self, system, writer):
with writer.loop(
"_struct_ref",
["id", "entity_id", "db_name", "db_code", "pdbx_db_accession",
"pdbx_align_begin", "pdbx_seq_one_letter_code",
"details"]) as lp:
for e, r in self._refs_by_id:
if self._check:
self._check_reference_sequence(e, r)
db_begin = min(a.db_begin for a in r._get_alignments())
lp.write(id=r._id, entity_id=e._id, db_name=r.db_name,
db_code=r.db_code, pdbx_db_accession=r.accession,
pdbx_align_begin=db_begin, details=r.details,
pdbx_seq_one_letter_code=self._get_sequence(r))
self.dump_seq(system, writer)
self.dump_seq_dif(system, writer)
def dump_seq(self, system, writer):
def _all_alignments():
for e, r in self._refs_by_id:
for a in r._get_alignments():
yield e, r, a
with writer.loop(
"_struct_ref_seq",
["align_id", "ref_id", "seq_align_beg", "seq_align_end",
"db_align_beg", "db_align_end"]) as lp:
for e, r, a in _all_alignments():
fullrefseq = list(util._get_codes(r.sequence))
entity_rng, db_rng = self._get_ranges(e, fullrefseq, a)
matchlen = min(entity_rng[1] - entity_rng[0],
db_rng[1] - db_rng[0])
lp.write(align_id=a._id, ref_id=r._id,
seq_align_beg=entity_rng[0],
seq_align_end=entity_rng[0] + matchlen,
db_align_beg=db_rng[0],
db_align_end=db_rng[0] + matchlen)
def dump_seq_dif(self, system, writer):
ordinal = itertools.count(1)
with writer.loop(
"_struct_ref_seq_dif",
["pdbx_ordinal", "align_id", "seq_num", "db_mon_id", "mon_id",
"details"]) as lp:
for e, r in self._refs_by_id:
for a in r._get_alignments():
for sd in a.seq_dif:
lp.write(pdbx_ordinal=next(ordinal),
align_id=a._id, seq_num=sd.seq_id,
db_mon_id=sd.db_monomer.id
if sd.db_monomer else ihm.unknown,
mon_id=sd.monomer.id
if sd.monomer else ihm.unknown,
details=sd.details)
class _EntityPolyDumper(Dumper):
def __init__(self):
super().__init__()
# Determine the type of the entire entity's sequence based on the
# type(s) of all chemical components it contains
self._seq_type_map = {
frozenset(('D-peptide linking',)): 'polypeptide(D)',
frozenset(('D-peptide linking',
'peptide linking')): 'polypeptide(D)',
frozenset(('RNA linking',)): 'polyribonucleotide',
frozenset(('DNA linking',)): 'polydeoxyribonucleotide',
frozenset(('DNA linking', 'RNA linking')):
'polydeoxyribonucleotide/polyribonucleotide hybrid'}
def _get_sequence(self, entity):
"""Get the sequence for an entity as a string"""
# Split into lines to get tidier CIF output
return "\n".join(_prettyprint_seq((comp.code if len(comp.code) == 1
else '(%s)' % comp.code
for comp in entity.sequence), 70))
def _get_canon(self, entity):
"""Get the canonical sequence for an entity as a string"""
# Split into lines to get tidier CIF output
seq = "\n".join(_prettyprint_seq(
(comp.code_canonical for comp in entity.sequence), 70))
return seq
def _get_seq_type(self, entity):
"""Get the sequence type for an entity"""
all_types = frozenset(comp.type for comp in entity.sequence)
# For a mix of L-peptides and D-peptides, current PDB entries always
# seem to use 'polypeptide(L)' so let's do that too:
if 'L-peptide linking' in all_types:
return 'polypeptide(L)'
else:
return self._seq_type_map.get(all_types, 'other')
def dump(self, system, writer):
# Get all asym units (if any) for each entity
strands = collections.defaultdict(list)
for asym in system.asym_units:
strands[asym.entity._id].append(asym.strand_id)
with writer.loop("_entity_poly",
["entity_id", "type", "nstd_linkage",
"nstd_monomer", "pdbx_strand_id",
"pdbx_seq_one_letter_code",
"pdbx_seq_one_letter_code_can"]) as lp:
for entity in system.entities:
if not entity.is_polymeric():
continue
nstd = any(isinstance(x, ihm.NonPolymerChemComp)
for x in entity.sequence)
sids = strands[entity._id]
lp.write(entity_id=entity._id, type=self._get_seq_type(entity),
nstd_linkage='no',
nstd_monomer='yes' if nstd else 'no',
pdbx_strand_id=",".join(sids) if sids else None,
pdbx_seq_one_letter_code=self._get_sequence(entity),
pdbx_seq_one_letter_code_can=self._get_canon(entity))
class _EntityNonPolyDumper(Dumper):
def dump(self, system, writer):
with writer.loop("_pdbx_entity_nonpoly",
["entity_id", "name", "comp_id"]) as lp:
for entity in system.entities:
if entity.is_polymeric() or entity.is_branched():
continue
lp.write(entity_id=entity._id, name=entity.description,
comp_id=entity.sequence[0].id)
class _EntityPolySeqDumper(Dumper):
def dump(self, system, writer):
with writer.loop("_entity_poly_seq",
["entity_id", "num", "mon_id", "hetero"]) as lp:
for entity in system.entities:
if not entity.is_polymeric():
continue
for num, comp in enumerate(entity.sequence):
lp.write(entity_id=entity._id, num=num + 1, mon_id=comp.id)
class _EntityPolySegmentDumper(Dumper):
def finalize(self, system):
seen_ranges = {}
self._ranges_by_id = []
# Need to assign ranges for all starting models too
for sm in system._all_starting_models():
rng = sm.asym_unit
util._remove_id(rng, attr='_range_id')
for rng in system._all_entity_ranges():
util._remove_id(rng, attr='_range_id')
for rng in itertools.chain(system._all_entity_ranges(),
(sm.asym_unit
for sm in system._all_starting_models())):
entity = rng.entity if hasattr(rng, 'entity') else rng
if entity.is_polymeric():
util._assign_id(rng, seen_ranges, self._ranges_by_id,
attr='_range_id',
# Two ranges are considered the same if they
# have the same entity ID and refer to
# the same residue range
seen_obj=(entity._id, rng.seq_id_range))
else:
rng._range_id = None
def dump(self, system, writer):
with writer.loop("_ihm_entity_poly_segment",
["id", "entity_id", "seq_id_begin", "seq_id_end",
"comp_id_begin", "comp_id_end"]) as lp:
for rng in self._ranges_by_id:
if hasattr(rng, 'entity'):
entity = rng.entity
if self._check:
util._check_residue_range(rng.seq_id_range, entity)
else:
entity = rng
lp.write(
id=rng._range_id, entity_id=entity._id,
seq_id_begin=rng.seq_id_range[0],
seq_id_end=rng.seq_id_range[1],
comp_id_begin=_get_comp_id(entity, rng.seq_id_range[0]),
comp_id_end=_get_comp_id(entity, rng.seq_id_range[1]))
class _EntityBranchListDumper(Dumper):
def dump(self, system, writer):
with writer.loop("_pdbx_entity_branch_list",
["entity_id", "num", "comp_id", "hetero"]) as lp:
for entity in system.entities:
if not entity.is_branched():
continue
for num, comp in enumerate(entity.sequence):
lp.write(entity_id=entity._id, num=num + 1,
comp_id=comp.id)
class _EntityBranchDumper(Dumper):
def dump(self, system, writer):
# todo: we currently only support branched oligosaccharides
with writer.loop("_pdbx_entity_branch",
["entity_id", "type"]) as lp:
for entity in system.entities:
if not entity.is_branched():
continue
lp.write(entity_id=entity._id, type="oligosaccharide")
class _PolySeqSchemeDumper(Dumper):
"""Output the _pdbx_poly_seq_scheme table.
This is needed because it is a parent category of atom_site."""
def dump(self, system, writer):
with writer.loop("_pdbx_poly_seq_scheme",
["asym_id", "entity_id", "seq_id", "mon_id",
"pdb_seq_num", "auth_seq_num", "pdb_mon_id",
"auth_mon_id", "pdb_strand_id",
"pdb_ins_code"]) as lp:
for asym in system.asym_units:
entity = asym.entity
if not entity.is_polymeric():
continue
for start, end, modeled in self._get_ranges(system, asym):
for num in range(start, end + 1):
comp = entity.sequence[num - 1]
auth_comp_id = comp.id
pdb_seq_num, auth_seq_num, ins = \
asym._get_pdb_auth_seq_id_ins_code(num)
if not modeled:
# If a residue wasn't modeled, PDB convention is
# to state ? for auth_seq_num, pdb_mon_id,
# auth_mon_id.
# See, e.g., https://files.rcsb.org/view/8QB4.cif
auth_comp_id = ihm.unknown
auth_seq_num = ihm.unknown
elif auth_seq_num is ihm.unknown:
# If we don't know the seq num, we can't know
# the component ID either
auth_comp_id = ihm.unknown
lp.write(asym_id=asym._id,
pdb_strand_id=asym.strand_id,
entity_id=entity._id, seq_id=num,
pdb_seq_num=pdb_seq_num,
auth_seq_num=auth_seq_num, mon_id=comp.id,
pdb_mon_id=auth_comp_id,
auth_mon_id=auth_comp_id, pdb_ins_code=ins)
def _get_ranges(self, system, asym):
"""Get a list of (seq_id_begin, seq_id_end, modeled) residue ranges
for the given asym. The list is guaranteed to be sorted and to cover
all residues in the asym. `modeled` is True if no Model has any
residue in that range in a NotModeledResidueRange."""
_all_modeled = []
num_models = 0
for group, model in system._all_models():
num_models += 1
# Handle Model-like objects with no not-modeled member (e.g.
# older versions of python-modelcif)
if hasattr(model, 'not_modeled_residue_ranges'):
ranges = model.not_modeled_residue_ranges
else:
ranges = []
# Get a sorted non-overlapping list of all not-modeled ranges
_all_not_modeled = util._combine_ranges(
(rr.seq_id_begin, rr.seq_id_end)
for rr in ranges if rr.asym_unit is asym)
# Invert to get a list of modeled ranges for this model
_all_modeled.extend(util._invert_ranges(_all_not_modeled,
len(asym.entity.sequence)))
# If no models, there are no "not modeled residues", so say everything
# was modeled
if num_models == 0:
_all_modeled = [(1, len(asym.entity.sequence))]
return util._pred_ranges(util._combine_ranges(_all_modeled),
len(asym.entity.sequence))
class _NonPolySchemeDumper(Dumper):
"""Output the _pdbx_nonpoly_scheme table.
For now we assume we're using auth_seq_num==pdb_seq_num."""
def dump(self, system, writer):
with writer.loop("_pdbx_nonpoly_scheme",
["asym_id", "entity_id", "mon_id", "ndb_seq_num",
"pdb_seq_num", "auth_seq_num",
"auth_mon_id", "pdb_strand_id",
"pdb_ins_code"]) as lp:
for asym in system.asym_units:
entity = asym.entity
if entity.is_polymeric() or entity.is_branched():
continue
for num, comp in enumerate(asym.sequence):
pdb_seq_num, auth_seq_num, ins = \
asym._get_pdb_auth_seq_id_ins_code(num + 1)
# ndb_seq_num is described as the "NDB/RCSB residue
# number". We don't have one of those but real PDBs
# usually seem to just count sequentially from 1, so
# we'll do that too.
lp.write(asym_id=asym._id, pdb_strand_id=asym.strand_id,
entity_id=entity._id,
ndb_seq_num=num + 1,
pdb_seq_num=pdb_seq_num,
auth_seq_num=auth_seq_num,
mon_id=comp.id,
auth_mon_id=comp.id, pdb_ins_code=ins)
class _BranchSchemeDumper(Dumper):
def dump(self, system, writer):
with writer.loop("_pdbx_branch_scheme",
["asym_id", "entity_id", "mon_id", "num",
"pdb_seq_num", "pdb_ins_code", "auth_seq_num",
"auth_mon_id", "pdb_mon_id", "pdb_asym_id"]) as lp:
for asym in system.asym_units:
entity = asym.entity
if not entity.is_branched():
continue
for num, comp in enumerate(asym.sequence):
pdb_seq_num, auth_seq_num, ins = \
asym._get_pdb_auth_seq_id_ins_code(num + 1)
# Assume num counts sequentially from 1 (like seq_id)
lp.write(asym_id=asym._id, pdb_asym_id=asym.strand_id,
entity_id=entity._id,
num=num + 1,
pdb_seq_num=pdb_seq_num, pdb_ins_code=ins,
auth_seq_num=auth_seq_num,
mon_id=comp.id, auth_mon_id=comp.id,
pdb_mon_id=comp.id)
class _BranchDescriptorDumper(Dumper):
def dump(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_pdbx_entity_branch_descriptor",
["ordinal", "entity_id", "descriptor", "type",
"program", "program_version"]) as lp:
for entity in system.entities:
for d in entity.branch_descriptors:
lp.write(ordinal=next(ordinal), entity_id=entity._id,
descriptor=d.text, type=d.type, program=d.program,
program_version=d.program_version)
class _BranchLinkDumper(Dumper):
def dump(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_pdbx_entity_branch_link",
["link_id", "entity_id", "entity_branch_list_num_1",
"comp_id_1", "atom_id_1", "leaving_atom_id_1",
"entity_branch_list_num_2", "comp_id_2", "atom_id_2",
"leaving_atom_id_2", "value_order",
"details"]) as lp:
for entity in system.entities:
for lnk in entity.branch_links:
lp.write(
link_id=next(ordinal), entity_id=entity._id,
entity_branch_list_num_1=lnk.num1,
comp_id_1=entity.sequence[lnk.num1 - 1].id,
atom_id_1=lnk.atom_id1,
leaving_atom_id_1=lnk.leaving_atom_id1,
entity_branch_list_num_2=lnk.num2,
comp_id_2=entity.sequence[lnk.num2 - 1].id,
atom_id_2=lnk.atom_id2,
leaving_atom_id_2=lnk.leaving_atom_id2,
value_order=lnk.order, details=lnk.details)
class _AsymIDProvider:
"""Provide unique asym IDs"""
def __init__(self, seen_ids):
self.seen_ids = seen_ids
self.ids = util._AsymIDs()
self.index = -1
def get_next_id(self):
"""Get the next unique ID"""
self.index += 1
while self.ids[self.index] in self.seen_ids:
self.index += 1
# Note that we don't need to add our own IDs to seen_ids since
# they are already guaranteed to be unique
return self.ids[self.index]
class _StructAsymDumper(Dumper):
def finalize(self, system):
# Handle user-assigned IDs first
seen_asym_ids = set()
duplicates = set()
for asym in system.asym_units:
if asym.id is not None:
if asym.id in seen_asym_ids:
duplicates.add(asym.id)
asym._id = asym.id
seen_asym_ids.add(asym.id)
if duplicates:
raise ValueError("One or more duplicate asym (chain) IDs "
"detected - %s" % ", ".join(sorted(duplicates)))
ordinal = itertools.count(1)
# Assign remaining asym IDs
id_prov = _AsymIDProvider(seen_asym_ids)
for asym in system.asym_units:
if asym.id is None:
asym._id = id_prov.get_next_id()
asym._ordinal = next(ordinal)
def dump(self, system, writer):
with writer.loop("_struct_asym",
["id", "entity_id", "details"]) as lp:
for asym in system.asym_units:
lp.write(id=asym._id, entity_id=asym.entity._id,
details=asym.details)
class _AssemblyDumperBase(Dumper):
def finalize(self, system):
# Sort each assembly by entity id/asym id/range
def component_key(comp):
return (comp.entity._id, comp._ordinal, comp.seq_id_range)
for a in system._all_assemblies():
a.sort(key=component_key)
seen_assemblies = {}
# Assign IDs to all assemblies; duplicate assemblies (same signature)
# get same ID
self._assembly_by_id = []
description_by_id = {}
all_assemblies = list(system._all_assemblies())
seen_assembly_ids = {}
for a in all_assemblies:
# Assembly isn't hashable but its signature is
sig = a._signature()
if sig not in seen_assemblies:
self._assembly_by_id.append(a)
seen_assemblies[sig] = a._id = len(self._assembly_by_id)
description_by_id[a._id] = []
else:
a._id = seen_assemblies[sig]
if a.description and id(a) not in seen_assembly_ids:
descs = description_by_id[a._id]
# Don't duplicate descriptions
if len(descs) == 0 or descs[-1] != a.description:
descs.append(a.description)
seen_assembly_ids[id(a)] = None
# If multiple assemblies map to the same ID, give them all the same
# composite description
for a_id, description in description_by_id.items():
description_by_id[a_id] = ' & '.join(description) \
if description else None
for a in all_assemblies:
a.description = description_by_id[a._id]
class _AssemblyDumper(_AssemblyDumperBase):
def dump(self, system, writer):
self.dump_summary(system, writer)
self.dump_details(system, writer)
def dump_summary(self, system, writer):
with writer.loop("_ihm_struct_assembly",
["id", "name", "description"]) as lp:
for a in self._assembly_by_id:
lp.write(id=a._id, name=a.name, description=a.description)
def dump_details(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_struct_assembly_details",
["id", "assembly_id", "parent_assembly_id",
"entity_description", "entity_id", "asym_id",
"entity_poly_segment_id"]) as lp:
for a in self._assembly_by_id:
for comp in a:
entity = comp.entity if hasattr(comp, 'entity') else comp
lp.write(
id=next(ordinal), assembly_id=a._id,
# if no hierarchy then assembly is self-parent
parent_assembly_id=a.parent._id if a.parent else a._id,
entity_description=entity.description,
entity_id=entity._id,
asym_id=comp._id if hasattr(comp, 'entity') else None,
entity_poly_segment_id=comp._range_id)
class _ExternalReferenceDumper(Dumper):
"""Output information on externally referenced files
(i.e. anything that refers to a Location that isn't
a DatabaseLocation)."""
class _LocalFiles:
reference_provider = None
reference_type = 'Supplementary Files'
reference = None
refers_to = 'Other'
url = None
details = None
def __init__(self, top_directory):
self.top_directory = top_directory
def _get_full_path(self, path):
return os.path.relpath(path, start=self.top_directory)
def finalize(self, system):
# Keep only locations that don't point into databases (these are
# handled elsewhere)
self._refs = [x for x in system._all_locations()
if not isinstance(x, location.DatabaseLocation)]
# Assign IDs to all locations and repos (including the None repo, which
# is for local files)
seen_refs = {}
seen_repos = {}
self._ref_by_id = []
self._repo_by_id = []
# Special dummy repo for repo=None (local files)
self._local_files = self._LocalFiles(os.getcwd())
for r in self._refs:
util._remove_id(r)
if r.repo:
util._remove_id(r.repo)
for r in system._orphan_repos:
util._remove_id(r)
for r in self._refs:
# Assign a unique ID to the reference
util._assign_id(r, seen_refs, self._ref_by_id)
# Assign a unique ID to the repository
util._assign_id(r.repo or self._local_files,
seen_repos, self._repo_by_id)
for r in system._orphan_repos:
util._assign_id(r, seen_repos, self._repo_by_id)
def dump(self, system, writer):
self.dump_repos(writer)
self.dump_refs(writer)
def dump_repos(self, writer):
with writer.loop("_ihm_external_reference_info",
["reference_id", "reference_provider",
"reference_type", "reference", "refers_to",
"associated_url", "details"]) as lp:
for repo in self._repo_by_id:
lp.write(reference_id=repo._id,
reference_provider=repo.reference_provider,
reference_type=repo.reference_type,
reference=repo.reference, refers_to=repo.refers_to,
associated_url=repo.url, details=repo.details)
def dump_refs(self, writer):
with writer.loop("_ihm_external_files",
["id", "reference_id", "file_path", "content_type",
"file_format", "file_size_bytes", "details"]) as lp:
for r in self._ref_by_id:
repo = r.repo or self._local_files
if r.path is None:
file_path = None
else:
file_path = self._posix_path(repo._get_full_path(r.path))
lp.write(id=r._id, reference_id=repo._id,
file_path=file_path, content_type=r.content_type,
file_format=r.file_format,
file_size_bytes=r.file_size, details=r.details)
# On Windows systems, convert native paths to POSIX-like (/-separated)
# paths
if os.sep == '/':
def _posix_path(self, path):
return path
else:
def _posix_path(self, path):
return path.replace(os.sep, '/')
class _DatasetDumper(Dumper):
def finalize(self, system):
def _all_transforms(dataset):
for p in dataset.parents:
if isinstance(p, ihm.dataset.TransformedDataset):
yield p.transform
seen_datasets = {}
seen_transforms = {}
# Assign IDs to all datasets and transforms
self._dataset_by_id = []
self._transform_by_id = []
for d in system._all_datasets():
for t in _all_transforms(d):
# Can't use default _id attribute here since a given transform
# may be used by both a dataset and a geometric object, and
# since they live in different tables they need different IDs
util._remove_id(t, attr='_dtid')
util._remove_id(d)
for t in system._orphan_dataset_transforms:
util._remove_id(t, attr='_dtid')
for d in system._all_datasets():
util._assign_id(d, seen_datasets, self._dataset_by_id)
for t in _all_transforms(d):
util._assign_id(t, seen_transforms, self._transform_by_id,
attr='_dtid')
for t in system._orphan_dataset_transforms:
util._assign_id(t, seen_transforms, self._transform_by_id,
attr='_dtid')
# Assign IDs to all groups and remove duplicates
seen_group_ids = {}
self._dataset_group_by_id = []
for g in system._all_dataset_groups():
ids = tuple(sorted(d._id for d in g))
if ids not in seen_group_ids:
self._dataset_group_by_id.append(g)
g._id = len(self._dataset_group_by_id)
seen_group_ids[ids] = g
else:
g._id = seen_group_ids[ids]._id
def dump(self, system, writer):
with writer.loop("_ihm_dataset_list",
["id", "data_type", "database_hosted",
"details"]) as lp:
for d in self._dataset_by_id:
lp.write(id=d._id, data_type=d.data_type, details=d.details,
database_hosted=any(isinstance(
loc, location.DatabaseLocation)
for loc in d._locations))
self.dump_groups(writer)
self.dump_other(writer)
self.dump_rel_dbs(writer)
self.dump_related(system, writer)
self.dump_related_transform(system, writer)
def dump_groups(self, writer):
self.dump_group_summary(writer)
self.dump_group_links(writer)
def dump_group_summary(self, writer):
with writer.loop("_ihm_dataset_group",
["id", "name", "application", "details"]) as lp:
for g in self._dataset_group_by_id:
lp.write(id=g._id, name=g.name, application=g.application,
details=g.details)
def dump_group_links(self, writer):
with writer.loop("_ihm_dataset_group_link",
["group_id", "dataset_list_id"]) as lp:
for g in self._dataset_group_by_id:
# Don't duplicate IDs, and output in sorted order
for dataset_id in sorted(set(d._id for d in g)):
lp.write(group_id=g._id, dataset_list_id=dataset_id)
def dump_other(self, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_dataset_external_reference",
["id", "dataset_list_id", "file_id"]) as lp:
for d in self._dataset_by_id:
for loc in d._locations:
if (loc is not None and
not isinstance(loc, location.DatabaseLocation)):
lp.write(id=next(ordinal), dataset_list_id=d._id,
file_id=loc._id)
def dump_rel_dbs(self, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_dataset_related_db_reference",
["id", "dataset_list_id", "db_name",
"accession_code", "version", "details"]) as lp:
for d in self._dataset_by_id:
for loc in d._locations:
if (loc is not None
and isinstance(loc, location.DatabaseLocation)):
lp.write(id=next(ordinal), dataset_list_id=d._id,
db_name=loc.db_name,
accession_code=loc.access_code,
version=loc.version,
details=loc.details)
def dump_related(self, system, writer):
with writer.loop("_ihm_related_datasets",
["dataset_list_id_derived",
"dataset_list_id_primary",
"transformation_id"]) as lp:
for derived in self._dataset_by_id:
ids = set()
for p in derived.parents:
if isinstance(p, ihm.dataset.TransformedDataset):
ids.add((p.dataset._id, p.transform._dtid))
else:
ids.add((p._id, None))
# Don't duplicate IDs, and sort by parent ID (cannot sort
# by transform ID because it might be None and we can't
# compare None with int)
for pid, tid in sorted(ids, key=operator.itemgetter(0)):
lp.write(dataset_list_id_derived=derived._id,
dataset_list_id_primary=pid,
transformation_id=tid)
def dump_related_transform(self, system, writer):
with writer.loop(
"_ihm_data_transformation",
["id",
"rot_matrix[1][1]", "rot_matrix[2][1]", "rot_matrix[3][1]",
"rot_matrix[1][2]", "rot_matrix[2][2]", "rot_matrix[3][2]",
"rot_matrix[1][3]", "rot_matrix[2][3]", "rot_matrix[3][3]",
"tr_vector[1]", "tr_vector[2]", "tr_vector[3]"]) as lp:
for t in self._transform_by_id:
if self._check:
util._check_transform(t)
lp.write(id=t._dtid,
**_get_transform(t.rot_matrix, t.tr_vector))
class _ModelRepresentationDumper(Dumper):
def finalize(self, system):
# Assign IDs to representations and segments
for nr, r in enumerate(system._all_representations()):
r._id = nr + 1
for ns, s in enumerate(r):
s._id = ns + 1
def dump(self, system, writer):
self.dump_summary(system, writer)
self.dump_details(system, writer)
def dump_summary(self, system, writer):
with writer.loop("_ihm_model_representation",
["id", "name", "details"]) as lp:
for r in system._all_representations():
lp.write(id=r._id, name=r.name, details=r.details)
def dump_details(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_model_representation_details",
["id", "representation_id",
"entity_id", "entity_description",
"entity_asym_id", "entity_poly_segment_id",
"model_object_primitive", "starting_model_id",
"model_mode", "model_granularity",
"model_object_count", "description"]) as lp:
for r in system._all_representations():
for segment in r:
entity = segment.asym_unit.entity
lp.write(
id=next(ordinal), representation_id=r._id,
entity_id=entity._id,
entity_description=entity.description,
entity_asym_id=segment.asym_unit._id,
entity_poly_segment_id=segment.asym_unit._range_id,
model_object_primitive=segment.primitive,
starting_model_id=segment.starting_model._id
if segment.starting_model else None,
model_mode='rigid' if segment.rigid else 'flexible',
model_granularity=segment.granularity,
model_object_count=segment.count,
description=segment.description)
class _StartingModelRangeChecker:
"""Check Atoms in StartingModels to make sure they match the Entities"""
def __init__(self, model, check):
self.model = model
self._check = check
def __call__(self, atom):
if not self._check:
return
# Check that atom seq_id is in range
e = atom.asym_unit.entity
if atom.seq_id > len(e.sequence) or atom.seq_id < 1:
raise IndexError(
"Starting model %d atom seq_id (%d) out of range (1-%d) for %s"
% (self.model._id, atom.seq_id, len(e.sequence), e))
class _StartingModelDumper(Dumper):
def finalize(self, system):
# Assign IDs to starting models
for nm, m in enumerate(system._all_starting_models()):
m._id = nm + 1
def dump(self, system, writer):
self.dump_details(system, writer)
self.dump_computational(system, writer)
self.dump_comparative(system, writer)
self.dump_coords(system, writer)
self.dump_seq_dif(system, writer)
def dump_details(self, system, writer):
# Map dataset types to starting model sources
source_map = {'Comparative model': 'comparative model',
'Integrative model': 'integrative model',
'Experimental model': 'experimental model',
'De Novo model': 'ab initio model',
'Other': 'other'}
with writer.loop(
"_ihm_starting_model_details",
["starting_model_id", "entity_id", "entity_description",
"asym_id", "entity_poly_segment_id",
"starting_model_source",
"starting_model_auth_asym_id",
"starting_model_sequence_offset",
"dataset_list_id", "description"]) as lp:
for sm in system._all_starting_models():
lp.write(
starting_model_id=sm._id,
entity_id=sm.asym_unit.entity._id,
entity_description=sm.asym_unit.entity.description,
asym_id=sm.asym_unit._id,
entity_poly_segment_id=sm.asym_unit._range_id,
starting_model_source=source_map[sm.dataset.data_type],
starting_model_auth_asym_id=sm.asym_id,
dataset_list_id=sm.dataset._id,
starting_model_sequence_offset=sm.offset,
description=sm.description)
def dump_computational(self, system, writer):
"""Dump details on computational models."""
with writer.loop(
"_ihm_starting_computational_models",
["starting_model_id", "software_id",
"script_file_id"]) as lp:
for sm in system._all_starting_models():
if sm.software or sm.script_file:
lp.write(starting_model_id=sm._id,
software_id=sm.software._id
if sm.software else None,
script_file_id=sm.script_file._id
if sm.script_file else None)
def dump_comparative(self, system, writer):
"""Dump details on comparative models."""
with writer.loop(
"_ihm_starting_comparative_models",
["id", "starting_model_id",
"starting_model_auth_asym_id",
"starting_model_seq_id_begin",
"starting_model_seq_id_end",
"template_auth_asym_id", "template_seq_id_begin",
"template_seq_id_end", "template_sequence_identity",
"template_sequence_identity_denominator",
"template_dataset_list_id",
"alignment_file_id"]) as lp:
ordinal = itertools.count(1)
for sm in system._all_starting_models():
for template in sm.templates:
self._dump_template(template, sm, lp, ordinal)
def _dump_template(self, template, sm, lp, ordinal):
off = sm.offset
denom = template.sequence_identity.denominator
if denom is not None and denom is not ihm.unknown:
denom = int(denom)
# Add offset only if seq_id_range isn't . or ?
seq_id_begin = template.seq_id_range[0]
if isinstance(template.seq_id_range[0], numbers.Integral):
seq_id_begin += off
seq_id_end = template.seq_id_range[1]
if isinstance(template.seq_id_range[1], numbers.Integral):
seq_id_end += off
lp.write(id=next(ordinal),
starting_model_id=sm._id,
starting_model_auth_asym_id=sm.asym_id,
starting_model_seq_id_begin=seq_id_begin,
starting_model_seq_id_end=seq_id_end,
template_auth_asym_id=template.asym_id,
template_seq_id_begin=template.template_seq_id_range[0],
template_seq_id_end=template.template_seq_id_range[1],
template_sequence_identity=template.sequence_identity.value,
template_sequence_identity_denominator=denom,
template_dataset_list_id=template.dataset._id
if template.dataset else None,
alignment_file_id=template.alignment_file._id
if template.alignment_file else None)
def dump_coords(self, system, writer):
"""Write out coordinate information"""
ordinal = itertools.count(1)
with writer.loop(
"_ihm_starting_model_coord",
["starting_model_id", "group_PDB", "id", "type_symbol",
"atom_id", "comp_id", "entity_id", "asym_id",
"seq_id", "Cartn_x", "Cartn_y", "Cartn_z", "B_iso_or_equiv",
"ordinal_id"]) as lp:
for model in system._all_starting_models():
rngcheck = _StartingModelRangeChecker(model, self._check)
for natom, atom in enumerate(model.get_atoms()):
rngcheck(atom)
lp.write(starting_model_id=model._id,
group_PDB='HETATM' if atom.het else 'ATOM',
id=natom + 1,
type_symbol=atom.type_symbol,
atom_id=atom.atom_id,
comp_id=_get_comp_id(atom.asym_unit.entity,
atom.seq_id),
asym_id=atom.asym_unit._id,
entity_id=atom.asym_unit.entity._id,
seq_id=atom.seq_id,
Cartn_x=atom.x, Cartn_y=atom.y, Cartn_z=atom.z,
B_iso_or_equiv=atom.biso,
ordinal_id=next(ordinal))
def dump_seq_dif(self, system, writer):
"""Write out sequence difference information"""
ordinal = itertools.count(1)
with writer.loop(
"_ihm_starting_model_seq_dif",
["id", "entity_id", "asym_id",
"seq_id", "comp_id", "starting_model_id",
"db_asym_id", "db_seq_id", "db_comp_id",
"details"]) as lp:
for model in system._all_starting_models():
for sd in model.get_seq_dif():
comp = model.asym_unit.entity.sequence[sd.seq_id - 1]
lp.write(
id=next(ordinal),
entity_id=model.asym_unit.entity._id,
asym_id=model.asym_unit._id,
seq_id=sd.seq_id, comp_id=comp.id,
db_asym_id=model.asym_id, db_seq_id=sd.db_seq_id,
db_comp_id=sd.db_comp_id, starting_model_id=model._id,
details=sd.details)
class _ProtocolDumper(Dumper):
def finalize(self, system):
# Assign IDs to protocols and steps
for np, p in enumerate(system._all_protocols()):
p._id = np + 1
for ns, s in enumerate(p.steps):
s._id = ns + 1
def dump(self, system, writer):
self.dump_summary(system, writer)
self.dump_details(system, writer)
def dump_summary(self, system, writer):
with writer.loop("_ihm_modeling_protocol",
["id", "protocol_name", "num_steps",
"details"]) as lp:
for p in system._all_protocols():
lp.write(id=p._id,
protocol_name=p.name, num_steps=len(p.steps),
details=p.details)
def dump_details(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_modeling_protocol_details",
["id", "protocol_id", "step_id",
"struct_assembly_id", "dataset_group_id",
"step_name", "step_method", "num_models_begin",
"num_models_end", "multi_scale_flag",
"multi_state_flag", "ordered_flag",
"ensemble_flag", "software_id", "script_file_id",
"description"]) as lp:
for p in system._all_protocols():
for s in p.steps:
if s.ensemble == 'default':
ensemble = len(system.ensembles) > 0
else:
ensemble = s.ensemble
lp.write(
id=next(ordinal), protocol_id=p._id,
step_id=s._id,
struct_assembly_id=s.assembly._id,
dataset_group_id=s.dataset_group._id
if s.dataset_group else None,
step_name=s.name, step_method=s.method,
num_models_begin=s.num_models_begin,
num_models_end=s.num_models_end,
multi_state_flag=s.multi_state,
ordered_flag=s.ordered,
multi_scale_flag=s.multi_scale,
ensemble_flag=ensemble,
software_id=s.software._id if s.software else None,
script_file_id=s.script_file._id
if s.script_file else None,
description=s.description)
class _PostProcessDumper(Dumper):
def finalize(self, system):
pp_id = itertools.count(1)
# Assign IDs to analyses and steps
# todo: handle case where one analysis is referred to from multiple
# protocols
for p in system._all_protocols():
for na, a in enumerate(p.analyses):
a._id = na + 1
for ns, s in enumerate(a.steps):
s._id = ns + 1
# Assign globally unique postproc id
s._post_proc_id = next(pp_id)
def dump(self, system, writer):
with writer.loop("_ihm_modeling_post_process",
["id", "protocol_id", "analysis_id", "step_id",
"type", "feature", "num_models_begin",
"num_models_end", "struct_assembly_id",
"dataset_group_id", "software_id",
"script_file_id", "details"]) as lp:
for p in system._all_protocols():
for a in p.analyses:
for s in a.steps:
lp.write(
id=s._post_proc_id, protocol_id=p._id,
analysis_id=a._id, step_id=s._id, type=s.type,
feature=s.feature,
num_models_begin=s.num_models_begin,
num_models_end=s.num_models_end,
struct_assembly_id=s.assembly._id if s.assembly
else None,
dataset_group_id=s.dataset_group._id
if s.dataset_group else None,
software_id=s.software._id if s.software
else None,
script_file_id=s.script_file._id
if s.script_file else None,
details=s.details)
class _RangeChecker:
"""Check Atom or Sphere objects to make sure they match the
Representation and Assembly"""
def __init__(self, model, check=True):
self.check = check
self._setup_representation(model)
self._setup_assembly(model)
self._seen_atoms = set()
def _setup_representation(self, model):
"""Make map from asym_id to representation segments for that ID"""
r = model.representation if model.representation else []
self.repr_asym_ids = {}
for segment in r:
asym_id = segment.asym_unit._id
if asym_id not in self.repr_asym_ids:
self.repr_asym_ids[asym_id] = []
self.repr_asym_ids[asym_id].append(segment)
self._last_repr_segment_matched = None
def _setup_assembly(self, model):
"""Make map from asym_id to assembly seq_id ranges for that ID"""
a = model.assembly if model.assembly else []
self.asmb_asym_ids = {}
for obj in a:
if hasattr(obj, 'entity'):
asym_id = obj._id
if asym_id not in self.asmb_asym_ids:
self.asmb_asym_ids[asym_id] = []
self.asmb_asym_ids[asym_id].append(obj.seq_id_range)
self._last_asmb_range_matched = None
self._last_asmb_asym_matched = None
def _type_check_atom(self, obj, segment):
"""Check an Atom object against a representation segment."""
# Atom objects can only match an AtomicSegment
return isinstance(segment, ihm.representation.AtomicSegment)
def _type_check_sphere(self, obj, segment):
"""Check a Sphere object against a representation segment."""
if isinstance(segment, ihm.representation.ResidueSegment):
# Only 1-residue Spheres are OK for by-residue segments
return obj.seq_id_range[0] == obj.seq_id_range[1]
elif isinstance(segment, ihm.representation.MultiResidueSegment):
# Sphere must cover the entire range for multi-residue segments
return (
obj.seq_id_range[0] == segment.asym_unit.seq_id_range[0]
and obj.seq_id_range[1] == segment.asym_unit.seq_id_range[1])
elif isinstance(segment, ihm.representation.FeatureSegment):
# Sphere can cover any set of residues but must fall within the
# segment range for by-feature (already checked)
return True
else:
# Spheres can never be used to represent a by-atom segment
return False
def __call__(self, obj):
"""Check the given Atom or Sphere object"""
if not self.check:
return
asym = obj.asym_unit
if isinstance(obj, ihm.model.Sphere):
type_check = self._type_check_sphere
seq_id_range = obj.seq_id_range
else:
type_check = self._type_check_atom
seq_id_range = (obj.seq_id, obj.seq_id)
# Allow seq_id to be either 1 or None for ligands
if obj.seq_id == 1 and asym.entity.type == 'non-polymer':
seq_id_range = (None, None)
self._check_duplicate_atom(obj)
self._check_assembly(obj, asym, seq_id_range)
self._check_representation(obj, asym, type_check, seq_id_range)
def _check_duplicate_atom(self, atom):
# e.g. multiple bulk water oxygen atoms can have "same" seq_id (None)
if atom.seq_id is None:
return
k = (atom.asym_unit._id, atom.atom_id, atom.seq_id, atom.alt_id)
if k in self._seen_atoms:
raise ValueError(
"Multiple atoms with same atom_id (%s), seq_id (%d) "
"and alt_id (%s) found in asym ID %s"
% (atom.atom_id, atom.seq_id, atom.alt_id, atom.asym_unit._id))
self._seen_atoms.add(k)
def _check_assembly(self, obj, asym, seq_id_range):
# Check last match first
last_rng = self._last_asmb_range_matched
if last_rng and asym._id == self._last_asmb_asym_matched \
and _is_subrange(seq_id_range, last_rng):
return
# Check asym_id
if asym._id not in self.asmb_asym_ids:
raise ValueError(
"%s refers to an asym ID (%s) that is not in this "
"model's assembly (which includes the following asym IDs: %s)"
% (obj, asym._id,
", ".join(sorted(a for a in self.asmb_asym_ids))))
# Check range
for rng in self.asmb_asym_ids[asym._id]:
if _is_subrange(seq_id_range, rng):
self._last_asmb_asym_matched = asym._id
self._last_asmb_range_matched = rng
return
def print_range(rng):
if rng == (None, None):
return "None"
else:
return "%d-%d" % rng
raise ValueError(
"%s seq_id range (%s) does not match any range "
"in the assembly for asym ID %s (ranges are %s)"
% (obj, print_range(seq_id_range), asym._id,
", ".join(print_range(x)
for x in self.asmb_asym_ids[asym._id])))
def _check_representation(self, obj, asym, type_check, seq_id_range):
# Check last match first
last_seg = self._last_repr_segment_matched
if last_seg and asym._id == last_seg.asym_unit._id \
and _is_subrange(seq_id_range, last_seg.asym_unit.seq_id_range) \
and type_check(obj, last_seg):
return
# Check asym_id
if asym._id not in self.repr_asym_ids:
raise ValueError(
"%s refers to an asym ID (%s) that is not in this "
"model's representation (which includes the following asym "
"IDs: %s)"
% (obj, asym._id,
", ".join(sorted(a for a in self.repr_asym_ids))))
# Check range
bad_type_segments = []
for segment in self.repr_asym_ids[asym._id]:
rng = segment.asym_unit.seq_id_range
if _is_subrange(seq_id_range, rng):
if type_check(obj, segment):
self._last_repr_segment_matched = segment
return
else:
bad_type_segments.append(segment)
if bad_type_segments:
raise ValueError(
"%s does not match the type of any representation "
"segment in the seq_id_range (%d-%d) for asym ID %s. "
"Representation segments are: %s"
% (obj, seq_id_range[0], seq_id_range[1], asym._id,
", ".join(str(s) for s in bad_type_segments)))
else:
raise ValueError(
"%s seq_id range (%d-%d) does not match any range "
"in the representation for asym ID %s (representation "
"ranges are %s)"
% (obj, seq_id_range[0], seq_id_range[1], asym._id,
", ".join("%d-%d" % x.asym_unit.seq_id_range
for x in self.repr_asym_ids[asym._id])))
class _AssemblyChecker:
"""Check that all Assembly asyms are in a Model"""
def __init__(self):
# Map from Assembly id to set of Asym ids
self._asmb_asyms = {}
# Map from Assembly id to Assembly object
self._asmb_from_id = {}
# Map from Assembly id to set of all represented Asym ids (in models)
self._asmb_model_asyms = {}
def add_model_asyms(self, model, seen_asym_ids):
"""Add a set of asym IDs seen in atoms or spheres in the model"""
asmb = model.assembly
# If this is the first time we've seen this assembly, get its
# declared set of asym IDs
if id(asmb) not in self._asmb_asyms:
self._asmb_from_id[id(asmb)] = asmb
asyms = frozenset(x._id for x in asmb if hasattr(x, 'entity'))
self._asmb_asyms[id(asmb)] = asyms
# Add asym IDs from model
if id(asmb) not in self._asmb_model_asyms:
self._asmb_model_asyms[id(asmb)] = set()
self._asmb_model_asyms[id(asmb)] |= seen_asym_ids
def check(self):
"""Make sure each Assembly only references asym IDs that are
represented by atoms or spheres in at least one Model, or
raise ValueError."""
def get_extra_asyms():
for asmb_id, asyms in self._asmb_asyms.items():
extra = asyms - self._asmb_model_asyms[asmb_id]
if extra:
asmb = self._asmb_from_id[asmb_id]
asmb_id = ("ID %s" % asmb._id
if hasattr(asmb, '_id') else asmb)
yield asmb_id, ", ".join(sorted(extra))
err = "; ".join("%s, asym IDs %s" % extra
for extra in get_extra_asyms())
if err:
raise ValueError(
"The following Assemblies reference asym IDs that don't "
"have coordinates in any Model: " + err)
class _ModelDumperBase(Dumper):
def finalize(self, system):
# Remove any existing ID
for g in system._all_model_groups(only_in_states=False):
if hasattr(g, '_id'):
del g._id
for m in g:
if hasattr(m, '_id'):
del m._id
model_id = itertools.count(1)
# Assign IDs to models and groups in states
for ng, g in enumerate(system._all_model_groups()):
g._id = ng + 1
for m in g:
if not hasattr(m, '_id'):
m._id = next(model_id)
# Check for any groups not referenced by states
for g in system._all_model_groups(only_in_states=False):
if not hasattr(g, '_id'):
raise ValueError("%s is referenced only by an Ensemble or "
"OrderedProcess. ModelGroups should be "
"stored in State objects." % g)
def dump_atom_type(self, seen_types, system, writer):
"""Output the atom_type table with a list of elements used
in atom_site. This table is needed by atom_site. Note that we
output it *after* atom_site (otherwise we would need to iterate
through all atoms in the system twice)."""
# Also check all assemblies, after dumping all atoms/spheres
if self._check:
self._assembly_checker.check()
elements = [x for x in sorted(seen_types.keys()) if x is not None]
with writer.loop("_atom_type", ["symbol"]) as lp:
for element in elements:
lp.write(symbol=element)
def __get_assembly_checker(self):
if not hasattr(self, '_asmb_check'):
self._asmb_check = _AssemblyChecker()
return self._asmb_check
_assembly_checker = property(__get_assembly_checker)
def dump_atoms(self, system, writer, add_ihm=True):
seen_types = {}
ordinal = itertools.count(1)
it = ["group_PDB", "id", "type_symbol", "label_atom_id",
"label_alt_id", "label_comp_id", "label_seq_id", "auth_seq_id",
"pdbx_PDB_ins_code", "label_asym_id", "Cartn_x", "Cartn_y",
"Cartn_z", "occupancy", "label_entity_id", "auth_asym_id",
"auth_comp_id", "B_iso_or_equiv", "pdbx_PDB_model_num"]
if add_ihm:
it.append("ihm_model_id")
with writer.loop("_atom_site", it) as lp:
for group, model in system._all_models():
seen_asym_ids = set()
rngcheck = _RangeChecker(model, self._check)
for atom in model.get_atoms():
rngcheck(atom)
seen_asym_ids.add(atom.asym_unit._id)
seq_id = 1 if atom.seq_id is None else atom.seq_id
label_seq_id = atom.seq_id
if not atom.asym_unit.entity.is_polymeric():
label_seq_id = None
comp = atom.asym_unit.sequence[seq_id - 1]
seen_types[atom.type_symbol] = None
auth_seq_id, ins = \
atom.asym_unit._get_auth_seq_id_ins_code(seq_id)
lp.write(id=next(ordinal),
type_symbol=atom.type_symbol,
group_PDB='HETATM' if atom.het else 'ATOM',
label_atom_id=atom.atom_id,
label_alt_id=atom.alt_id,
label_comp_id=comp.id,
label_asym_id=atom.asym_unit._id,
label_entity_id=atom.asym_unit.entity._id,
label_seq_id=label_seq_id,
auth_seq_id=auth_seq_id, auth_comp_id=comp.id,
pdbx_PDB_ins_code=ins or ihm.unknown,
auth_asym_id=atom.asym_unit.strand_id,
Cartn_x=atom.x, Cartn_y=atom.y, Cartn_z=atom.z,
B_iso_or_equiv=atom.biso,
occupancy=atom.occupancy,
pdbx_PDB_model_num=model._id,
ihm_model_id=model._id)
self._assembly_checker.add_model_asyms(model, seen_asym_ids)
return seen_types
class _ModelDumper(_ModelDumperBase):
def dump(self, system, writer):
self.dump_model_list(system, writer)
self.dump_model_groups(system, writer)
seen_types = self.dump_atoms(system, writer)
self.dump_spheres(system, writer)
self.dump_atom_type(seen_types, system, writer)
def dump_model_groups(self, system, writer):
self.dump_model_group_summary(system, writer)
self.dump_model_group_link(system, writer)
def dump_model_list(self, system, writer):
with writer.loop("_ihm_model_list",
["model_id", "model_name", "assembly_id",
"protocol_id", "representation_id"]) as lp:
for group, model in system._all_models():
lp.write(model_id=model._id,
model_name=model.name,
assembly_id=model.assembly._id,
protocol_id=model.protocol._id
if model.protocol else None,
representation_id=model.representation._id)
def dump_model_group_summary(self, system, writer):
with writer.loop("_ihm_model_group", ["id", "name", "details"]) as lp:
for group in system._all_model_groups():
lp.write(id=group._id, name=group.name, details=group.details)
def dump_model_group_link(self, system, writer):
with writer.loop("_ihm_model_group_link",
["group_id", "model_id"]) as lp:
for group in system._all_model_groups():
for model_id in sorted(set(model._id for model in group)):
lp.write(model_id=model_id, group_id=group._id)
def dump_spheres(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_sphere_obj_site",
["id", "entity_id", "seq_id_begin",
"seq_id_end", "asym_id", "Cartn_x",
"Cartn_y", "Cartn_z", "object_radius", "rmsf",
"model_id"]) as lp:
for group, model in system._all_models():
rngcheck = _RangeChecker(model, self._check)
seen_asym_ids = set()
for sphere in model.get_spheres():
rngcheck(sphere)
seen_asym_ids.add(sphere.asym_unit._id)
lp.write(id=next(ordinal),
entity_id=sphere.asym_unit.entity._id,
seq_id_begin=sphere.seq_id_range[0],
seq_id_end=sphere.seq_id_range[1],
asym_id=sphere.asym_unit._id,
Cartn_x=sphere.x, Cartn_y=sphere.y,
Cartn_z=sphere.z, object_radius=sphere.radius,
rmsf=sphere.rmsf, model_id=model._id)
self._assembly_checker.add_model_asyms(model, seen_asym_ids)
class _ModelRepresentativeDumper(Dumper):
def dump(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_model_representative",
["id", "model_group_id", "model_id",
"selection_criteria"]) as lp:
for group in system._all_model_groups():
for rep in group.representatives:
# This assumes that each representative is also a
# member of the group, so we don't need to assign an ID.
lp.write(id=next(ordinal), model_group_id=group._id,
model_id=rep.model._id,
selection_criteria=rep.selection_criteria)
class _NotModeledResidueRangeDumper(Dumper):
def dump(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_residues_not_modeled",
["id", "model_id", "entity_description",
"entity_id", "asym_id", "seq_id_begin", "seq_id_end",
"comp_id_begin", "comp_id_end", "reason"]) as lp:
for group, model in system._all_models():
for rr in model.not_modeled_residue_ranges:
e = rr.asym_unit.entity
if self._check:
util._check_residue_range(
(rr.seq_id_begin, rr.seq_id_end), e)
lp.write(id=next(ordinal), model_id=model._id,
entity_description=e.description,
entity_id=e._id,
asym_id=rr.asym_unit._id,
seq_id_begin=rr.seq_id_begin,
seq_id_end=rr.seq_id_end,
comp_id_begin=_get_comp_id(e, rr.seq_id_begin),
comp_id_end=_get_comp_id(e, rr.seq_id_end),
reason=rr.reason)
class _EnsembleDumper(Dumper):
def finalize(self, system):
# Assign IDs
for ne, e in enumerate(system.ensembles):
e._id = ne + 1
def dump(self, system, writer):
self.dump_info(system, writer)
self.dump_subsamples(system, writer)
def dump_info(self, system, writer):
with writer.loop("_ihm_ensemble_info",
["ensemble_id", "ensemble_name", "post_process_id",
"model_group_id", "ensemble_clustering_method",
"ensemble_clustering_feature",
"num_ensemble_models",
"num_ensemble_models_deposited",
"ensemble_precision_value",
"ensemble_file_id", "details",
"model_group_superimposed_flag",
"sub_sample_flag", "sub_sampling_type"]) as lp:
for e in system.ensembles:
if e.subsamples:
sstype = e.subsamples[0].sub_sampling_type
else:
sstype = None
lp.write(ensemble_id=e._id, ensemble_name=e.name,
post_process_id=e.post_process._id if e.post_process
else None,
model_group_id=e.model_group._id
if e.model_group is not None else None,
ensemble_clustering_method=e.clustering_method,
ensemble_clustering_feature=e.clustering_feature,
num_ensemble_models=e.num_models,
num_ensemble_models_deposited=e.num_models_deposited,
ensemble_precision_value=e.precision,
ensemble_file_id=e.file._id if e.file else None,
details=e.details,
model_group_superimposed_flag=e.superimposed,
sub_sample_flag=len(e.subsamples) > 0,
sub_sampling_type=sstype)
def dump_subsamples(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_ensemble_sub_sample",
["id", "name", "ensemble_id", "num_models",
"num_models_deposited", "model_group_id",
"file_id"]) as lp:
for e in system.ensembles:
for s in e.subsamples:
lp.write(id=next(ordinal), name=s.name, ensemble_id=e._id,
num_models=s.num_models,
num_models_deposited=s.num_models_deposited,
model_group_id=s.model_group._id
if s.model_group else None,
file_id=s.file._id if s.file else None)
if type(s) != type(e.subsamples[0]): # noqa: E721
raise TypeError(
"Subsamples are not all of the same type "
"(%s vs %s) for ensemble %s"
% (s, e.subsamples[0], e))
class _DensityDumper(Dumper):
def finalize(self, system):
# Assign globally unique IDs
did = itertools.count(1)
for e in system.ensembles:
for d in e.densities:
d._id = next(did)
def dump(self, system, writer):
with writer.loop("_ihm_localization_density_files",
["id", "file_id", "ensemble_id", "entity_id",
"asym_id", "entity_poly_segment_id"]) as lp:
for ensemble in system.ensembles:
for density in ensemble.densities:
lp.write(
id=density._id, file_id=density.file._id,
ensemble_id=ensemble._id,
entity_id=density.asym_unit.entity._id,
asym_id=density.asym_unit._id,
entity_poly_segment_id=density.asym_unit._range_id)
class _MultiStateDumper(Dumper):
def finalize(self, system):
state_id = itertools.count(1)
# Assign IDs
for ng, g in enumerate(system.state_groups):
g._id = ng + 1
for state in g:
state._id = next(state_id)
def dump(self, system, writer):
# Nothing to do for single state modeling
if len(system.state_groups) == 1 and len(system.state_groups[0]) <= 1:
return
self.dump_summary(system, writer)
self.dump_model_groups(system, writer)
def dump_summary(self, system, writer):
with writer.loop("_ihm_multi_state_modeling",
["state_id", "state_group_id",
"population_fraction", "state_type", "state_name",
"experiment_type", "details"]) as lp:
for state_group in system.state_groups:
for state in state_group:
lp.write(state_id=state._id,
state_group_id=state_group._id,
population_fraction=state.population_fraction,
state_type=state.type,
state_name=state.name,
experiment_type=state.experiment_type,
details=state.details)
def dump_model_groups(self, system, writer):
with writer.loop("_ihm_multi_state_model_group_link",
["state_id", "model_group_id"]) as lp:
for state_group in system.state_groups:
for state in state_group:
for model_group in state:
lp.write(state_id=state._id,
model_group_id=model_group._id)
class _OrderedDumper(Dumper):
def finalize(self, system):
for nproc, proc in enumerate(system.ordered_processes):
proc._id = nproc + 1
edge_id = itertools.count(1)
for nstep, step in enumerate(proc.steps):
step._id = nstep + 1
for edge in step:
edge._id = next(edge_id)
def dump(self, system, writer):
with writer.loop("_ihm_ordered_model",
["process_id", "process_description", "ordered_by",
"step_id", "step_description",
"edge_id", "edge_description",
"model_group_id_begin", "model_group_id_end"]) as lp:
for proc in system.ordered_processes:
for step in proc.steps:
for edge in step:
lp.write(process_id=proc._id,
process_description=proc.description,
ordered_by=proc.ordered_by, step_id=step._id,
step_description=step.description,
edge_id=edge._id,
edge_description=edge.description,
model_group_id_begin=edge.group_begin._id,
model_group_id_end=edge.group_end._id)
class _GeometricObjectDumper(Dumper):
def finalize(self, system):
seen_objects = {}
seen_centers = {}
seen_transformations = {}
self._centers_by_id = []
self._transformations_by_id = []
self._objects_by_id = []
for o in system._all_geometric_objects():
util._remove_id(o)
if hasattr(o, 'center'):
util._remove_id(o.center)
if hasattr(o, 'transformation') and o.transformation:
util._remove_id(o.transformation)
for t in system._orphan_geometric_transforms:
util._remove_id(t)
for c in system._orphan_centers:
util._remove_id(c)
for o in system._all_geometric_objects():
util._assign_id(o, seen_objects, self._objects_by_id)
if hasattr(o, 'center'):
util._assign_id(o.center, seen_centers, self._centers_by_id)
if hasattr(o, 'transformation') and o.transformation:
util._assign_id(o.transformation, seen_transformations,
self._transformations_by_id)
for t in system._orphan_geometric_transforms:
util._assign_id(t, seen_transformations,
self._transformations_by_id)
for c in system._orphan_centers:
util._assign_id(c, seen_centers, self._centers_by_id)
def dump(self, system, writer):
self.dump_centers(writer)
self.dump_transformations(writer)
self.dump_generic(writer)
self.dump_sphere(writer)
self.dump_torus(writer)
self.dump_half_torus(writer)
self.dump_axis(writer)
self.dump_plane(writer)
def dump_centers(self, writer):
with writer.loop("_ihm_geometric_object_center",
["id", "xcoord", "ycoord", "zcoord"]) as lp:
for c in self._centers_by_id:
lp.write(id=c._id, xcoord=c.x, ycoord=c.y, zcoord=c.z)
def dump_transformations(self, writer):
with writer.loop(
"_ihm_geometric_object_transformation",
["id",
"rot_matrix[1][1]", "rot_matrix[2][1]", "rot_matrix[3][1]",
"rot_matrix[1][2]", "rot_matrix[2][2]", "rot_matrix[3][2]",
"rot_matrix[1][3]", "rot_matrix[2][3]", "rot_matrix[3][3]",
"tr_vector[1]", "tr_vector[2]", "tr_vector[3]"]) as lp:
for t in self._transformations_by_id:
if self._check:
util._check_transform(t)
lp.write(id=t._id, **_get_transform(t.rot_matrix, t.tr_vector))
def dump_generic(self, writer):
with writer.loop("_ihm_geometric_object_list",
["object_id", "object_type", "object_name",
"object_description"]) as lp:
for o in self._objects_by_id:
lp.write(object_id=o._id, object_type=o.type,
object_name=o.name, object_description=o.description)
def dump_sphere(self, writer):
with writer.loop("_ihm_geometric_object_sphere",
["object_id", "center_id", "transformation_id",
"radius_r"]) as lp:
for o in self._objects_by_id:
if not isinstance(o, geometry.Sphere):
continue
lp.write(object_id=o._id, center_id=o.center._id,
transformation_id=o.transformation._id
if o.transformation else None,
radius_r=o.radius)
def dump_torus(self, writer):
with writer.loop("_ihm_geometric_object_torus",
["object_id", "center_id", "transformation_id",
"major_radius_R", "minor_radius_r"]) as lp:
for o in self._objects_by_id:
if not isinstance(o, (geometry.Torus, geometry.HalfTorus)):
continue
lp.write(object_id=o._id, center_id=o.center._id,
transformation_id=o.transformation._id
if o.transformation else None,
major_radius_R=o.major_radius,
minor_radius_r=o.minor_radius)
def dump_half_torus(self, writer):
section_map = {True: 'inner half', False: 'outer half'}
with writer.loop("_ihm_geometric_object_half_torus",
["object_id", "thickness_th", "section"]) as lp:
for o in self._objects_by_id:
if not isinstance(o, geometry.HalfTorus):
continue
lp.write(object_id=o._id, thickness_th=o.thickness,
section=section_map.get(o.inner, 'other'))
def dump_axis(self, writer):
with writer.loop("_ihm_geometric_object_axis",
["object_id", "axis_type",
"transformation_id"]) as lp:
for o in self._objects_by_id:
if not isinstance(o, geometry.Axis):
continue
lp.write(object_id=o._id, axis_type=o.axis_type,
transformation_id=o.transformation._id
if o.transformation else None)
def dump_plane(self, writer):
with writer.loop("_ihm_geometric_object_plane",
["object_id", "plane_type",
"transformation_id"]) as lp:
for o in self._objects_by_id:
if not isinstance(o, geometry.Plane):
continue
lp.write(object_id=o._id, plane_type=o.plane_type,
transformation_id=o.transformation._id
if o.transformation else None)
class _FeatureDumper(Dumper):
def finalize(self, system):
seen_features = {}
self._features_by_id = []
for f in system._all_features():
util._remove_id(f)
for f in system._all_features():
util._assign_id(f, seen_features, self._features_by_id,
seen_obj=f._signature()
if hasattr(f, '_signature') else f)
def dump(self, system, writer):
self.dump_list(writer)
self.dump_poly_residue(writer)
self.dump_poly_atom(writer)
self.dump_non_poly(writer)
self.dump_pseudo_site(writer)
def dump_list(self, writer):
with writer.loop("_ihm_feature_list",
["feature_id", "feature_type", "entity_type",
"details"]) as lp:
for f in self._features_by_id:
if self._check and f.type is ihm.unknown:
raise ValueError("Invalid null feature %s" % f)
lp.write(feature_id=f._id, feature_type=f.type,
entity_type=f._get_entity_type(),
details=f.details)
def dump_poly_residue(self, writer):
def _get_entity(x):
return x if isinstance(x, ihm.Entity) else x.entity
def _get_asym_id(x):
return (x._id if isinstance(x, (ihm.AsymUnit, ihm.AsymUnitRange))
else None)
ordinal = itertools.count(1)
with writer.loop("_ihm_poly_residue_feature",
["ordinal_id", "feature_id", "entity_id", "asym_id",
"seq_id_begin", "comp_id_begin", "seq_id_end",
"comp_id_end"]) as lp:
for f in self._features_by_id:
if not isinstance(f, restraint.ResidueFeature):
continue
if not f.ranges:
raise ValueError("%s selects no residues" % f)
for r in f.ranges:
entity = _get_entity(r)
seq = entity.sequence
lp.write(ordinal_id=next(ordinal), feature_id=f._id,
entity_id=entity._id, asym_id=_get_asym_id(r),
seq_id_begin=r.seq_id_range[0],
comp_id_begin=seq[r.seq_id_range[0] - 1].id,
seq_id_end=r.seq_id_range[1],
comp_id_end=seq[r.seq_id_range[1] - 1].id)
def dump_poly_atom(self, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_poly_atom_feature",
["ordinal_id", "feature_id", "entity_id", "asym_id",
"seq_id", "comp_id", "atom_id"]) as lp:
for f in self._features_by_id:
if not isinstance(f, restraint.AtomFeature):
continue
for a in f.atoms:
r = a.residue
entity = r.entity if r.entity else r.asym.entity
if entity.is_polymeric():
seq = entity.sequence
lp.write(ordinal_id=next(ordinal), feature_id=f._id,
entity_id=entity._id,
asym_id=r.asym._id if r.asym else None,
seq_id=r.seq_id, comp_id=seq[r.seq_id - 1].id,
atom_id=a.id)
def dump_non_poly(self, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_non_poly_feature",
["ordinal_id", "feature_id", "entity_id", "asym_id",
"comp_id", "atom_id"]) as lp:
for f in self._features_by_id:
if isinstance(f, restraint.AtomFeature):
for a in f.atoms:
r = a.residue
entity = r.entity if r.entity else r.asym.entity
if not entity.is_polymeric():
seq = entity.sequence
lp.write(ordinal_id=next(ordinal),
feature_id=f._id,
entity_id=entity._id,
asym_id=r.asym._id if r.asym else None,
comp_id=seq[r.seq_id - 1].id,
atom_id=a.id)
elif isinstance(f, restraint.NonPolyFeature):
_ = f._get_entity_type() # trigger check for poly/nonpoly
for a in f.objs:
entity = a if isinstance(a, ihm.Entity) else a.entity
asym_id = (a._id if isinstance(a, ihm.AsymUnit)
else None)
seq = entity.sequence
lp.write(ordinal_id=next(ordinal), feature_id=f._id,
entity_id=entity._id,
asym_id=asym_id, comp_id=seq[0].id,
atom_id=None)
def dump_pseudo_site(self, writer):
with writer.loop("_ihm_pseudo_site_feature",
["feature_id", "pseudo_site_id"]) as lp:
for f in self._features_by_id:
if not isinstance(f, restraint.PseudoSiteFeature):
continue
lp.write(feature_id=f._id, pseudo_site_id=f.site._id)
class _PseudoSiteDumper(Dumper):
def finalize(self, system):
seen_sites = {}
self._sites_by_id = []
for f in system._all_pseudo_sites():
util._remove_id(f)
for f in system._all_pseudo_sites():
util._assign_id(f, seen_sites, self._sites_by_id,
seen_obj=f._signature())
def dump(self, system, writer):
with writer.loop("_ihm_pseudo_site",
["id", "Cartn_x", "Cartn_y",
"Cartn_z", "radius", "description"]) as lp:
for s in self._sites_by_id:
lp.write(id=s._id, Cartn_x=s.x, Cartn_y=s.y,
Cartn_z=s.z, radius=s.radius,
description=s.description)
class _CrossLinkDumper(Dumper):
def _all_restraints(self, system):
return [r for r in system._all_restraints()
if isinstance(r, restraint.CrossLinkRestraint)]
def finalize(self, system):
self.finalize_experimental(system)
self.finalize_modeling(system)
def finalize_experimental(self, system):
seen_cross_links = {}
seen_group_ids = {}
xl_id = itertools.count(1)
self._ex_xls_by_id = []
for r in self._all_restraints(system):
for g in r.experimental_cross_links:
for xl in g:
# Assign identical cross-links the same ID and group ID
sig = (xl.residue1.entity, xl.residue1.seq_id,
xl.residue2.entity, xl.residue2.seq_id,
r.linker)
if sig in seen_cross_links:
xl._id, xl._group_id = seen_cross_links[sig]
else:
if id(g) not in seen_group_ids:
seen_group_ids[id(g)] = len(seen_group_ids) + 1
xl._group_id = seen_group_ids[id(g)]
xl._id = next(xl_id)
self._ex_xls_by_id.append((r, xl))
seen_cross_links[sig] = xl._id, xl._group_id
def finalize_modeling(self, system):
seen_cross_links = {}
xl_id = itertools.count(1)
self._xls_by_id = []
for r in self._all_restraints(system):
for xl in r.cross_links:
# Assign identical cross-links the same ID
ex_xl = xl.experimental_cross_link
sig = (xl.asym1._id, ex_xl.residue1.seq_id, xl.atom1,
xl.asym2._id, ex_xl.residue2.seq_id, xl.atom2,
r.linker)
if sig in seen_cross_links:
xl._id = seen_cross_links[sig]
else:
xl._id = next(xl_id)
self._xls_by_id.append((r, xl))
seen_cross_links[sig] = xl._id
def dump(self, system, writer):
self.dump_list(system, writer)
pseudo_xls = self.dump_restraint(system, writer)
self.dump_pseudo_sites(system, writer, pseudo_xls)
self.dump_result(system, writer)
self.dump_result_parameters(system, writer)
def dump_list(self, system, writer):
with writer.loop("_ihm_cross_link_list",
["id", "group_id", "entity_description_1",
"entity_id_1", "seq_id_1", "comp_id_1",
"entity_description_2",
"entity_id_2", "seq_id_2", "comp_id_2",
"linker_chem_comp_descriptor_id", "linker_type",
"dataset_list_id", "details"]) as lp:
for r, xl in self._ex_xls_by_id:
entity1 = xl.residue1.entity
entity2 = xl.residue2.entity
if self._check:
util._check_residue(xl.residue1)
util._check_residue(xl.residue2)
lp.write(id=xl._id, group_id=xl._group_id,
entity_description_1=entity1.description,
entity_id_1=entity1._id,
seq_id_1=xl.residue1.seq_id,
comp_id_1=_get_comp_id(entity1, xl.residue1.seq_id),
entity_description_2=entity2.description,
entity_id_2=entity2._id,
seq_id_2=xl.residue2.seq_id,
comp_id_2=_get_comp_id(entity2, xl.residue2.seq_id),
linker_chem_comp_descriptor_id=r.linker._id,
linker_type=r.linker.auth_name,
dataset_list_id=r.dataset._id,
details=xl.details)
def dump_restraint(self, system, writer):
pseudo_xls = []
with writer.loop("_ihm_cross_link_restraint",
["id", "group_id", "entity_id_1", "asym_id_1",
"seq_id_1", "comp_id_1",
"entity_id_2", "asym_id_2", "seq_id_2", "comp_id_2",
"atom_id_1", "atom_id_2",
"restraint_type", "conditional_crosslink_flag",
"model_granularity", "distance_threshold",
"psi", "sigma_1", "sigma_2",
"pseudo_site_flag"]) as lp:
condmap = {True: 'ALL', False: 'ANY', None: None}
for r, xl in self._xls_by_id:
ex_xl = xl.experimental_cross_link
entity1 = ex_xl.residue1.entity
entity2 = ex_xl.residue2.entity
pseudo = False
for np, ps in enumerate((xl.pseudo1, xl.pseudo2)):
if ps:
pseudo = True
for p in ps:
pseudo_xls.append((p, np, xl))
lp.write(id=xl._id, group_id=ex_xl._id,
entity_id_1=entity1._id, asym_id_1=xl.asym1._id,
seq_id_1=ex_xl.residue1.seq_id,
comp_id_1=_get_comp_id(entity1,
ex_xl.residue1.seq_id),
entity_id_2=entity2._id, asym_id_2=xl.asym2._id,
seq_id_2=ex_xl.residue2.seq_id,
comp_id_2=_get_comp_id(entity2,
ex_xl.residue2.seq_id),
atom_id_1=xl.atom1, atom_id_2=xl.atom2,
restraint_type=xl.distance.restraint_type,
conditional_crosslink_flag=condmap[xl.restrain_all],
model_granularity=xl.granularity,
distance_threshold=xl.distance.distance,
psi=xl.psi, sigma_1=xl.sigma1, sigma_2=xl.sigma2,
pseudo_site_flag=pseudo)
return pseudo_xls
def dump_pseudo_sites(self, system, writer, pseudo_xls):
with writer.loop("_ihm_cross_link_pseudo_site",
["id", "restraint_id", "cross_link_partner",
"pseudo_site_id", "model_id"]) as lp:
ordinal = itertools.count(1)
for p, partner, rsr in pseudo_xls:
lp.write(id=next(ordinal), restraint_id=rsr._id,
cross_link_partner=partner + 1,
pseudo_site_id=p.site._id,
model_id=p.model._id if p.model else None)
def dump_result(self, system, writer):
with writer.loop("_ihm_cross_link_result",
["id", "restraint_id", "ensemble_id",
"model_group_id", "num_models", "distance_threshold",
"median_distance", "details"]) as lp:
ordinal = itertools.count(1)
for r in self._all_restraints(system):
for xl in r.cross_links:
# all fits ordered by ID
for g, fit in sorted(
(it for it in xl.fits.items()
if not isinstance(it[0], ihm.model.Model)),
key=lambda i: i[0]._id):
if isinstance(g, ihm.model.Ensemble):
ens_id = g._id
if g.model_group is None:
mg_id = None
else:
mg_id = g.model_group._id
else:
mg_id = g._id
ens_id = None
lp.write(id=next(ordinal), restraint_id=xl._id,
model_group_id=mg_id, ensemble_id=ens_id,
num_models=fit.num_models,
distance_threshold=xl.distance.distance,
median_distance=fit.median_distance,
details=fit.details)
def dump_result_parameters(self, system, writer):
with writer.loop("_ihm_cross_link_result_parameters",
["id", "restraint_id", "model_id",
"psi", "sigma_1", "sigma_2"]) as lp:
ordinal = itertools.count(1)
for r in self._all_restraints(system):
for xl in r.cross_links:
# all fits ordered by model ID
for model, fit in sorted(
(it for it in xl.fits.items()
if isinstance(it[0], ihm.model.Model)),
key=lambda i: i[0]._id):
lp.write(id=next(ordinal), restraint_id=xl._id,
model_id=model._id, psi=fit.psi,
sigma_1=fit.sigma1, sigma_2=fit.sigma2)
class _GeometricRestraintDumper(Dumper):
def _all_restraints(self, system):
return [r for r in system._all_restraints()
if isinstance(r, restraint.GeometricRestraint)]
def finalize(self, system):
for nr, r in enumerate(self._all_restraints(system)):
r._id = nr + 1
def dump(self, system, writer):
condmap = {True: 'ALL', False: 'ANY', None: None}
with writer.loop("_ihm_geometric_object_distance_restraint",
["id", "object_id", "feature_id",
"object_characteristic", "restraint_type",
"harmonic_force_constant",
"distance_lower_limit", "distance_upper_limit",
"group_conditionality", "dataset_list_id"]) as lp:
for r in self._all_restraints(system):
lp.write(id=r._id, object_id=r.geometric_object._id,
feature_id=r.feature._id,
object_characteristic=r.object_characteristic,
restraint_type=r.distance.restraint_type,
distance_lower_limit=r.distance.distance_lower_limit,
distance_upper_limit=r.distance.distance_upper_limit,
harmonic_force_constant=r.harmonic_force_constant,
group_conditionality=condmap[r.restrain_all],
dataset_list_id=r.dataset._id if r.dataset else None)
def _finalize_restraints_and_groups(system, restraint_class):
"""Assign IDs to all restraints of the given class, and also assign IDs
to groups of these restraints."""
def _all_restraints():
return [r for r in system._all_restraints()
if isinstance(r, restraint_class)]
def _all_restraint_groups():
return [rg for rg in system.restraint_groups
if all(isinstance(r, restraint_class)
for r in rg) and len(rg) > 0]
restraints_by_id = []
seen_restraints = {}
for r in _all_restraints():
util._remove_id(r)
for r in _all_restraints():
util._assign_id(r, seen_restraints, restraints_by_id)
group_for_id = {}
for nrg, rg in enumerate(_all_restraint_groups()):
rg._id = nrg + 1
for r in rg:
if r._id in group_for_id:
raise ValueError("%s cannot be in more than one group" % r)
group_for_id[r._id] = rg._id
return restraints_by_id, group_for_id
class _DerivedDistanceRestraintDumper(Dumper):
def finalize(self, system):
(self._restraints_by_id,
self._group_for_id) = _finalize_restraints_and_groups(
system, restraint.DerivedDistanceRestraint)
def dump(self, system, writer):
condmap = {True: 'ALL', False: 'ANY', None: None}
with writer.loop("_ihm_derived_distance_restraint",
["id", "group_id", "feature_id_1", "feature_id_2",
"restraint_type", "distance_lower_limit",
"distance_upper_limit", "probability", "mic_value",
"group_conditionality", "dataset_list_id"]) as lp:
for r in self._restraints_by_id:
lp.write(id=r._id, feature_id_1=r.feature1._id,
group_id=self._group_for_id.get(r._id, None),
feature_id_2=r.feature2._id,
restraint_type=r.distance.restraint_type,
distance_lower_limit=r.distance.distance_lower_limit,
distance_upper_limit=r.distance.distance_upper_limit,
probability=r.probability, mic_value=r.mic_value,
group_conditionality=condmap[r.restrain_all],
dataset_list_id=r.dataset._id if r.dataset else None)
class _HDXRestraintDumper(Dumper):
def _all_restraints(self, system):
return [r for r in system._all_restraints()
if isinstance(r, restraint.HDXRestraint)]
def finalize(self, system):
for nr, r in enumerate(self._all_restraints(system)):
r._id = nr + 1
def dump(self, system, writer):
with writer.loop("_ihm_hdx_restraint",
["id", "feature_id", "protection_factor",
"dataset_list_id", "details"]) as lp:
for r in self._all_restraints(system):
lp.write(id=r._id, feature_id=r.feature._id,
protection_factor=r.protection_factor,
details=r.details,
dataset_list_id=r.dataset._id if r.dataset else None)
class _PredictedContactRestraintDumper(Dumper):
def finalize(self, system):
(self._restraints_by_id,
self._group_for_id) = _finalize_restraints_and_groups(
system, restraint.PredictedContactRestraint)
def dump(self, system, writer):
with writer.loop("_ihm_predicted_contact_restraint",
["id", "group_id", "entity_id_1", "asym_id_1",
"comp_id_1", "seq_id_1", "rep_atom_1", "entity_id_2",
"asym_id_2", "comp_id_2", "seq_id_2", "rep_atom_2",
"restraint_type", "distance_lower_limit",
"distance_upper_limit", "probability",
"model_granularity", "dataset_list_id",
"software_id"]) as lp:
for r in self._restraints_by_id:
e = r.resatom1.asym.entity
comp1 = e.sequence[r.resatom1.seq_id - 1].id
e = r.resatom2.asym.entity
comp2 = e.sequence[r.resatom2.seq_id - 1].id
atom1 = atom2 = None
if isinstance(r.resatom1, ihm.Atom):
atom1 = r.resatom1.id
if isinstance(r.resatom2, ihm.Atom):
atom2 = r.resatom2.id
lp.write(id=r._id,
group_id=self._group_for_id.get(r._id, None),
entity_id_1=r.resatom1.asym.entity._id,
asym_id_1=r.resatom1.asym._id,
comp_id_1=comp1, seq_id_1=r.resatom1.seq_id,
rep_atom_1=atom1,
entity_id_2=r.resatom2.asym.entity._id,
asym_id_2=r.resatom2.asym._id,
comp_id_2=comp2, seq_id_2=r.resatom2.seq_id,
rep_atom_2=atom2,
restraint_type=r.distance.restraint_type,
distance_lower_limit=r.distance.distance_lower_limit,
distance_upper_limit=r.distance.distance_upper_limit,
probability=r.probability,
model_granularity="by-residue" if r.by_residue
else 'by-feature',
dataset_list_id=r.dataset._id if r.dataset else None,
software_id=r.software._id if r.software else None)
class _EM3DDumper(Dumper):
def _all_restraints(self, system):
return [r for r in system._all_restraints()
if isinstance(r, restraint.EM3DRestraint)]
def finalize(self, system):
for nr, r in enumerate(self._all_restraints(system)):
r._id = nr + 1
def dump(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_3dem_restraint",
["id", "dataset_list_id", "fitting_method",
"fitting_method_citation_id",
"struct_assembly_id", "map_segment_flag",
"number_of_gaussians", "model_id",
"cross_correlation_coefficient", "details"]) as lp:
for r in self._all_restraints(system):
if r.fitting_method_citation:
citation_id = r.fitting_method_citation._id
else:
citation_id = None
# all fits ordered by model ID
for model, fit in sorted(r.fits.items(),
key=lambda i: i[0]._id):
ccc = fit.cross_correlation_coefficient
lp.write(id=next(ordinal),
dataset_list_id=r.dataset._id,
fitting_method=r.fitting_method,
fitting_method_citation_id=citation_id,
struct_assembly_id=r.assembly._id,
map_segment_flag=r.segment,
number_of_gaussians=r.number_of_gaussians,
model_id=model._id,
cross_correlation_coefficient=ccc,
details=r.details)
class _EM2DDumper(Dumper):
def _all_restraints(self, system):
return [r for r in system._all_restraints()
if isinstance(r, restraint.EM2DRestraint)]
def finalize(self, system):
for nr, r in enumerate(self._all_restraints(system)):
r._id = nr + 1
def dump(self, system, writer):
self.dump_restraint(system, writer)
self.dump_fitting(system, writer)
def dump_restraint(self, system, writer):
with writer.loop("_ihm_2dem_class_average_restraint",
["id", "dataset_list_id", "number_raw_micrographs",
"pixel_size_width", "pixel_size_height",
"image_resolution", "image_segment_flag",
"number_of_projections", "struct_assembly_id",
"details"]) as lp:
for r in self._all_restraints(system):
lp.write(id=r._id, dataset_list_id=r.dataset._id,
number_raw_micrographs=r.number_raw_micrographs,
pixel_size_width=r.pixel_size_width,
pixel_size_height=r.pixel_size_height,
image_resolution=r.image_resolution,
number_of_projections=r.number_of_projections,
struct_assembly_id=r.assembly._id,
image_segment_flag=r.segment,
details=r.details)
def dump_fitting(self, system, writer):
ordinal = itertools.count(1)
with writer.loop(
"_ihm_2dem_class_average_fitting",
["id", "restraint_id", "model_id",
"cross_correlation_coefficient", "rot_matrix[1][1]",
"rot_matrix[2][1]", "rot_matrix[3][1]", "rot_matrix[1][2]",
"rot_matrix[2][2]", "rot_matrix[3][2]", "rot_matrix[1][3]",
"rot_matrix[2][3]", "rot_matrix[3][3]", "tr_vector[1]",
"tr_vector[2]", "tr_vector[3]"]) as lp:
for r in self._all_restraints(system):
# all fits ordered by model ID
for model, fit in sorted(r.fits.items(),
key=lambda i: i[0]._id):
ccc = fit.cross_correlation_coefficient
if fit.tr_vector is None:
t = [None] * 3
else:
t = fit.tr_vector
if fit.rot_matrix is None:
rm = [[None] * 3] * 3
else:
# mmCIF writer usually outputs floats to 3 decimal
# places, but we need more precision for rotation
# matrices
rm = [["%.6f" % e for e in fit.rot_matrix[i]]
for i in range(3)]
lp.write(id=next(ordinal), restraint_id=r._id,
model_id=model._id,
cross_correlation_coefficient=ccc,
rot_matrix11=rm[0][0], rot_matrix21=rm[1][0],
rot_matrix31=rm[2][0], rot_matrix12=rm[0][1],
rot_matrix22=rm[1][1], rot_matrix32=rm[2][1],
rot_matrix13=rm[0][2], rot_matrix23=rm[1][2],
rot_matrix33=rm[2][2], tr_vector1=t[0],
tr_vector2=t[1], tr_vector3=t[2])
class _SASDumper(Dumper):
def _all_restraints(self, system):
return [r for r in system._all_restraints()
if isinstance(r, restraint.SASRestraint)]
def finalize(self, system):
for nr, r in enumerate(self._all_restraints(system)):
r._id = nr + 1
def dump(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_ihm_sas_restraint",
["id", "dataset_list_id", "model_id",
"struct_assembly_id", "profile_segment_flag",
"fitting_atom_type", "fitting_method",
"fitting_state", "radius_of_gyration",
"chi_value", "details"]) as lp:
for r in self._all_restraints(system):
# all fits ordered by model ID
for model, fit in sorted(r.fits.items(),
key=lambda i: i[0]._id):
lp.write(id=next(ordinal),
dataset_list_id=r.dataset._id,
fitting_method=r.fitting_method,
fitting_atom_type=r.fitting_atom_type,
fitting_state='Multiple' if r.multi_state
else 'Single',
profile_segment_flag=r.segment,
radius_of_gyration=r.radius_of_gyration,
struct_assembly_id=r.assembly._id,
model_id=model._id,
chi_value=fit.chi_value,
details=r.details)
def _assign_all_ids(all_objs_func):
"""Given a function that returns a list of all objects, assign IDs and
return a list of objects sorted by ID"""
objs_by_id = []
obj_id = itertools.count(1)
for f in all_objs_func():
util._remove_id(f)
for f in all_objs_func():
if not hasattr(f, '_id'):
f._id = next(obj_id)
objs_by_id.append(f)
return objs_by_id
class _MultiStateSchemeDumper(Dumper):
def finalize(self, system):
# Assign IDs
s_id = itertools.count(1)
for s in system.multi_state_schemes:
if not hasattr(s, '_id'):
s._id = next(s_id)
def dump(self, system, writer):
with writer.loop('_ihm_multi_state_scheme',
['id', 'name', 'details']) as lp:
seen_multi_state_schemes = []
for s in system.multi_state_schemes:
if s not in seen_multi_state_schemes:
seen_multi_state_schemes.append(s)
lp.write(id=s._id,
name=s.name,
details=s.details)
class _MultiStateSchemeConnectivityDumper(Dumper):
def finalize(self, system):
# Assign IDs
c_id = itertools.count(1)
for c in system._all_multi_state_scheme_connectivities():
if not hasattr(c, '_id'):
c._id = next(c_id)
def dump(self, system, writer):
with writer.loop('_ihm_multi_state_scheme_connectivity',
['id', 'scheme_id', 'begin_state_id', 'end_state_id',
'dataset_group_id', 'details']) as lp:
for mss in system.multi_state_schemes:
for c in mss.get_connectivities():
end_state_id = c.end_state._id if \
c.end_state is not None else None
dataset_group_id = c.dataset_group._id if \
c.dataset_group else None
lp.write(id=c._id, scheme_id=mss._id,
begin_state_id=c.begin_state._id,
end_state_id=end_state_id,
dataset_group_id=dataset_group_id,
details=c.details)
class _RelaxationTimeDumper(Dumper):
def finalize(self, system):
# Assign IDs
r_id = itertools.count(1)
for r in system._all_relaxation_times():
if not hasattr(r, '_id'):
r._id = next(r_id)
def dump(self, system, writer):
self.dump_ihm_relaxation_time(system, writer)
self.dump_ihm_relaxation_time_multi_state_scheme(system, writer)
def dump_ihm_relaxation_time(self, system, writer):
with writer.loop('_ihm_relaxation_time',
['id', 'value', 'unit', 'amplitude',
'dataset_group_id', 'external_file_id',
'details']) as lp:
# Relaxation times that are only assigned to multi-state schemes
for r in system._all_relaxation_times():
dataset_group_id = r.dataset_group._id if \
r.dataset_group else None
external_file_id = r.external_file._id if \
r.external_file else None
lp.write(
id=r._id,
value=r.value,
unit=r.unit,
amplitude=r.amplitude,
dataset_group_id=dataset_group_id,
external_file_id=external_file_id,
details=r.details)
def dump_ihm_relaxation_time_multi_state_scheme(self, system, writer):
with writer.loop('_ihm_relaxation_time_multi_state_scheme',
['id', 'relaxation_time_id', 'scheme_id',
'scheme_connectivity_id', 'details']) as lp:
# Relaxation times that are only assigned to multi-state schemes
ordinal = itertools.count(1)
# Keep track of the seen relaxation times, in order to avoid
# writing duplicates when it comes to the flr_data
seen_relaxation_times = []
for mss in system.multi_state_schemes:
for r in mss.get_relaxation_times():
if r not in seen_relaxation_times:
seen_relaxation_times.append(r)
lp.write(id=next(ordinal),
relaxation_time_id=r._id,
scheme_id=mss._id,
scheme_connectivity_id=None,
details=None)
# Relaxation times assigned to multi-state scheme connectivities
for mss in system.multi_state_schemes:
for mssc in mss.get_connectivities():
if mssc.relaxation_time is not None:
if mssc.relaxation_time not in seen_relaxation_times:
seen_relaxation_times.append(mssc.relaxation_time)
lp.write(
id=next(ordinal),
relaxation_time_id=mssc.relaxation_time._id,
scheme_id=mss._id,
scheme_connectivity_id=mssc._id,
details=None)
# This case only occurs if the relaxation time was not assigned to
# a multi-state scheme, but occurs within the context of flr_data
for f in system.flr_data:
for c in f.relaxation_time_fret_analysis_connections:
r = c.relaxation_time
if r not in seen_relaxation_times:
seen_relaxation_times.append(r)
lp.write(id=next(ordinal),
relaxation_time_id=r._id,
scheme_id=None,
scheme_connectivity_id=None,
details=None)
class _KineticRateDumper(Dumper):
def finalize(self, system):
# Assign IDs
k_id = itertools.count(1)
for k in system._all_kinetic_rates():
if not hasattr(k, '_id'):
k._id = next(k_id)
def dump(self, system, writer):
with writer.loop('_ihm_kinetic_rate',
['id', 'transition_rate_constant',
'equilibrium_constant',
'equilibrium_constant_determination_method',
'equilibrium_constant_unit', 'details',
'scheme_connectivity_id',
'dataset_group_id', 'external_file_id']) as lp:
ordinal = itertools.count(1)
seen_kinetic_rates = []
for mssc in system._all_multi_state_scheme_connectivities():
if mssc.kinetic_rate is not None:
k = mssc.kinetic_rate
seen_kinetic_rates.append(k)
trconst = k.transition_rate_constant
eqconst = k.equilibrium_constant
dataset_group_id = k.dataset_group._id if \
k.dataset_group else None
external_file_id = k.external_file._id if \
k.external_file else None
lp.write(
id=next(ordinal),
transition_rate_constant=trconst,
equilibrium_constant=None if
eqconst is None else eqconst.value,
equilibrium_constant_determination_method=None
if eqconst is None else eqconst.method,
equilibrium_constant_unit=None if
eqconst is None else eqconst.unit,
details=k.details,
scheme_connectivity_id=mssc._id,
dataset_group_id=dataset_group_id,
external_file_id=external_file_id)
for f in system.flr_data:
for c in f.kinetic_rate_fret_analysis_connections:
k = c.kinetic_rate
if k not in seen_kinetic_rates:
seen_kinetic_rates.append(k)
trconst = k.transition_rate_constant
eqconst = k.equilibrium_constant
dataset_group_id = k.dataset_group._id if \
k.dataset_group else None
external_file_id = k.external_file._id if \
k.external_file else None
lp.write(
id=next(ordinal),
transition_rate_constant=trconst,
equilibrium_constant=None if eqconst is None
else eqconst.value,
equilibrium_constant_determination_method=None
if eqconst is None else eqconst.method,
equilibrium_constant_unit=None if eqconst is None
else eqconst.unit,
details=k.details,
scheme_connectivity_id=None,
dataset_group_id=dataset_group_id,
external_file_id=external_file_id)
class _FLRExperimentDumper(Dumper):
def finalize(self, system):
def all_experiments():
return itertools.chain.from_iterable(f._all_experiments()
for f in system.flr_data)
self._experiments_by_id = _assign_all_ids(all_experiments)
def dump(self, system, writer):
with writer.loop(
'_flr_experiment',
['ordinal_id', 'id', 'instrument_id', 'inst_setting_id',
'exp_condition_id', 'sample_id', 'details']) as lp:
ordinal = 1
for x in self._experiments_by_id:
for i in range(len(x.sample_list)):
lp.write(ordinal_id=ordinal, id=x._id,
instrument_id=x.instrument_list[i]._id,
inst_setting_id=x.inst_setting_list[i]._id,
exp_condition_id=x.exp_condition_list[i]._id,
sample_id=x.sample_list[i]._id,
details=x.details_list[i])
ordinal += 1
class _FLRInstSettingDumper(Dumper):
def finalize(self, system):
def all_inst_settings():
return itertools.chain.from_iterable(f._all_inst_settings()
for f in system.flr_data)
self._inst_settings_by_id = _assign_all_ids(all_inst_settings)
def dump(self, system, writer):
with writer.loop('_flr_inst_setting', ['id', 'details']) as lp:
for x in self._inst_settings_by_id:
lp.write(id=x._id, details=x.details)
class _FLRExpConditionDumper(Dumper):
def finalize(self, system):
def all_exp_conditions():
return itertools.chain.from_iterable(f._all_exp_conditions()
for f in system.flr_data)
self._exp_conditions_by_id = _assign_all_ids(all_exp_conditions)
def dump(self, system, writer):
with writer.loop('_flr_exp_condition', ['id', 'details']) as lp:
for x in self._exp_conditions_by_id:
lp.write(id=x._id, details=x.details)
class _FLRInstrumentDumper(Dumper):
def finalize(self, system):
def all_instruments():
return itertools.chain.from_iterable(f._all_instruments()
for f in system.flr_data)
self._instruments_by_id = _assign_all_ids(all_instruments)
def dump(self, system, writer):
with writer.loop('_flr_instrument', ['id', 'details']) as lp:
for x in self._instruments_by_id:
lp.write(id=x._id, details=x.details)
class _FLREntityAssemblyDumper(Dumper):
def finalize(self, system):
def all_entity_assemblies():
return itertools.chain.from_iterable(
(s.entity_assembly for s in f._all_samples())
for f in system.flr_data)
self._entity_assemblies_by_id = _assign_all_ids(all_entity_assemblies)
def dump(self, system, writer):
with writer.loop(
'_flr_entity_assembly',
['ordinal_id', 'assembly_id', 'entity_id', 'num_copies',
'entity_description']) as lp:
ordinal = itertools.count(1)
for x in self._entity_assemblies_by_id:
for i in range(len(x.entity_list)):
lp.write(ordinal_id=next(ordinal),
assembly_id=x._id,
entity_id=x.entity_list[i]._id,
num_copies=x.num_copies_list[i],
entity_description=x.entity_list[i].description)
class _FLRSampleConditionDumper(Dumper):
def finalize(self, system):
def all_sample_conditions():
return itertools.chain.from_iterable(
(s.condition for s in f._all_samples())
for f in system.flr_data)
self._sample_conditions_by_id = _assign_all_ids(all_sample_conditions)
def dump(self, system, writer):
with writer.loop('_flr_sample_condition', ['id', 'details']) as lp:
for x in self._sample_conditions_by_id:
lp.write(id=x._id, details=x.details)
class _FLRSampleDumper(Dumper):
def finalize(self, system):
def all_samples():
return itertools.chain.from_iterable(f._all_samples()
for f in system.flr_data)
self._samples_by_id = _assign_all_ids(all_samples)
def dump(self, system, writer):
with writer.loop('_flr_sample',
['id', 'entity_assembly_id', 'num_of_probes',
'sample_condition_id', 'sample_description',
'sample_details', 'solvent_phase']) as lp:
for x in self._samples_by_id:
lp.write(id=x._id, entity_assembly_id=x.entity_assembly._id,
num_of_probes=x.num_of_probes,
sample_condition_id=x.condition._id,
sample_description=x.description,
sample_details=x.details,
solvent_phase=x.solvent_phase)
class _FLRProbeDumper(Dumper):
def finalize(self, system):
def all_probes():
return itertools.chain.from_iterable(f._all_probes()
for f in system.flr_data)
self._probes_by_id = _assign_all_ids(all_probes)
def dump(self, system, writer):
self.dump_probe_list(system, writer)
self.dump_probe_descriptor(system, writer)
def dump_probe_list(self, system, writer):
with writer.loop('_flr_probe_list',
['probe_id', 'chromophore_name',
'reactive_probe_flag',
'reactive_probe_name', 'probe_origin',
'probe_link_type']) as lp:
for x in self._probes_by_id:
entry = x.probe_list_entry
lp.write(probe_id=x._id,
chromophore_name=entry.chromophore_name,
reactive_probe_flag=entry.reactive_probe_flag,
reactive_probe_name=entry.reactive_probe_name,
probe_origin=entry.probe_origin,
probe_link_type=entry.probe_link_type)
def dump_probe_descriptor(self, system, writer):
with writer.loop('_flr_probe_descriptor',
['probe_id', 'reactive_probe_chem_descriptor_id',
'chromophore_chem_descriptor_id',
'chromophore_center_atom']) as lp:
for x in self._probes_by_id:
reactive = x.probe_descriptor.reactive_probe_chem_descriptor
chrom = x.probe_descriptor.chromophore_chem_descriptor
reactive_id = None if reactive is None else reactive._id
chrom_id = None if chrom is None else chrom._id
center = x.probe_descriptor.chromophore_center_atom
lp.write(probe_id=x._id,
reactive_probe_chem_descriptor_id=reactive_id,
chromophore_chem_descriptor_id=chrom_id,
chromophore_center_atom=center)
class _FLRSampleProbeDetailsDumper(Dumper):
def finalize(self, system):
def all_sample_probe_details():
return itertools.chain.from_iterable(f._all_sample_probe_details()
for f in system.flr_data)
self._sample_probe_details_by_id = _assign_all_ids(
all_sample_probe_details)
def dump(self, system, writer):
with writer.loop('_flr_sample_probe_details',
['sample_probe_id', 'sample_id', 'probe_id',
'fluorophore_type', 'description',
'poly_probe_position_id']) as lp:
for x in self._sample_probe_details_by_id:
lp.write(sample_probe_id=x._id,
sample_id=x.sample._id,
probe_id=x.probe._id,
fluorophore_type=x.fluorophore_type,
description=x.description,
poly_probe_position_id=x.poly_probe_position._id)
class _FLRPolyProbePositionDumper(Dumper):
def finalize(self, system):
def all_poly_probe_positions():
return itertools.chain.from_iterable(f._all_poly_probe_positions()
for f in system.flr_data)
self._positions_by_id = _assign_all_ids(all_poly_probe_positions)
def dump(self, system, writer):
self.dump_position(system, writer)
self.dump_position_mutated(system, writer)
self.dump_position_modified(system, writer)
def dump_position(self, system, writer):
with writer.loop('_flr_poly_probe_position',
['id', 'entity_id', 'entity_description',
'asym_id',
'seq_id', 'comp_id', 'atom_id',
'mutation_flag', 'modification_flag',
'auth_name']) as lp:
for x in self._positions_by_id:
atom = None
if isinstance(x.resatom, ihm.Atom):
atom = x.resatom.id
if x.resatom.asym is None:
comp = x.resatom.entity.sequence[x.resatom.seq_id - 1].id
a_id = None
e_id = x.resatom.entity._id
e_desc = x.resatom.entity.description
else:
e = x.resatom.asym.entity
comp = e.sequence[x.resatom.seq_id - 1].id
a_id = x.resatom.asym._id
e_id = x.resatom.asym.entity._id
e_desc = x.resatom.asym.entity.description
lp.write(id=x._id, entity_id=e_id,
entity_description=e_desc,
asym_id=a_id,
seq_id=x.resatom.seq_id,
comp_id=comp, atom_id=atom,
mutation_flag=x.mutation_flag,
modification_flag=x.modification_flag,
auth_name=x.auth_name)
def dump_position_mutated(self, system, writer):
with writer.loop('_flr_poly_probe_position_mutated',
['id', 'chem_comp_id', 'atom_id']) as lp:
for x in self._positions_by_id:
if x.mutation_flag is True:
atom = None
if isinstance(x.resatom, ihm.Atom):
atom = x.resatom.id
lp.write(id=x._id,
chem_comp_id=x.mutated_chem_comp_id.id,
atom_id=atom)
def dump_position_modified(self, system, writer):
with writer.loop('_flr_poly_probe_position_modified',
['id', 'chem_descriptor_id', 'atom_id']) as lp:
for x in self._positions_by_id:
if x.modification_flag is True:
atom = None
if isinstance(x.resatom, ihm.Atom):
atom = x.resatom.id
lp.write(id=x._id,
chem_descriptor_id=x.modified_chem_descriptor._id,
atom_id=atom)
class _FLRConjugateDumper(Dumper):
def finalize(self, system):
def all_conjugates():
return itertools.chain.from_iterable(f.poly_probe_conjugates
for f in system.flr_data)
self._conjugates_by_id = _assign_all_ids(all_conjugates)
def dump(self, system, writer):
with writer.loop('_flr_poly_probe_conjugate',
['id', 'sample_probe_id', 'chem_descriptor_id',
'ambiguous_stoichiometry_flag',
'probe_stoichiometry']) as lp:
for x in self._conjugates_by_id:
lp.write(
id=x._id,
sample_probe_id=x.sample_probe._id,
chem_descriptor_id=x.chem_descriptor._id,
ambiguous_stoichiometry_flag=x.ambiguous_stoichiometry,
probe_stoichiometry=x.probe_stoichiometry)
class _FLRForsterRadiusDumper(Dumper):
def finalize(self, system):
def all_forster_radii():
return itertools.chain.from_iterable(f._all_forster_radii()
for f in system.flr_data)
self._radii_by_id = _assign_all_ids(all_forster_radii)
def dump(self, system, writer):
with writer.loop('_flr_fret_forster_radius',
['id', 'donor_probe_id', 'acceptor_probe_id',
'forster_radius', 'reduced_forster_radius']) as lp:
for x in self._radii_by_id:
lp.write(id=x._id, donor_probe_id=x.donor_probe._id,
acceptor_probe_id=x.acceptor_probe._id,
forster_radius=x.forster_radius,
reduced_forster_radius=x.reduced_forster_radius)
class _FLRCalibrationParametersDumper(Dumper):
def finalize(self, system):
def all_calibration_parameters():
return itertools.chain.from_iterable(
f._all_calibration_parameters() for f in system.flr_data)
self._parameters_by_id = _assign_all_ids(all_calibration_parameters)
def dump(self, system, writer):
with writer.loop('_flr_fret_calibration_parameters',
['id', 'phi_acceptor', 'alpha', 'alpha_sd',
'gG_gR_ratio', 'beta', 'gamma', 'delta',
'a_b']) as lp:
for x in self._parameters_by_id:
lp.write(id=x._id, phi_acceptor=x.phi_acceptor,
alpha=x.alpha, alpha_sd=x.alpha_sd,
gG_gR_ratio=x.gg_gr_ratio, beta=x.beta,
gamma=x.gamma, delta=x.delta, a_b=x.a_b)
class _FLRLifetimeFitModelDumper(Dumper):
def finalize(self, system):
def all_lifetime_fit_models():
return itertools.chain.from_iterable(f._all_lifetime_fit_models()
for f in system.flr_data)
self._lifetime_fit_models_by_id = \
_assign_all_ids(all_lifetime_fit_models)
def dump(self, system, writer):
with writer.loop('_flr_lifetime_fit_model',
['id', 'name', 'description',
'external_file_id', 'citation_id']) as lp:
for x in self._lifetime_fit_models_by_id:
lp.write(id=x._id, name=x.name,
description=x.description,
external_file_id=None if x.external_file is None
else x.external_file._id,
citation_id=None if x.citation is None
else x.citation._id)
class _FLRRefMeasurementDumper(Dumper):
def finalize(self, system):
def all_ref_measurement_groups():
return itertools.chain.from_iterable(
f._all_ref_measurement_groups() for f in system.flr_data)
self._ref_measurement_groups_by_id = _assign_all_ids(
all_ref_measurement_groups)
def _all_ref_measurements():
return itertools.chain.from_iterable(f._all_ref_measurements()
for f in system.flr_data)
self._ref_measurements_by_id = _assign_all_ids(_all_ref_measurements)
def _all_ref_measurement_lifetimes():
return itertools.chain.from_iterable(
f._all_ref_measurement_lifetimes() for f in system.flr_data)
self._ref_measurement_lifetimes_by_id = _assign_all_ids(
_all_ref_measurement_lifetimes)
def dump(self, system, writer):
self.dump_ref_measurement_group(system, writer)
self.dump_ref_measurement_group_link(system, writer)
self.dump_ref_measurement(system, writer)
self.dump_ref_measurement_lifetimes(system, writer)
def dump_ref_measurement_group(self, system, writer):
with writer.loop('_flr_reference_measurement_group',
['id', 'num_measurements', 'details']) as lp:
for x in self._ref_measurement_groups_by_id:
lp.write(id=x._id,
num_measurements=len(x.ref_measurement_list),
details=x.details)
def dump_ref_measurement_group_link(self, system, writer):
with writer.loop('_flr_reference_measurement_group_link',
['group_id', 'reference_measurement_id']) as lp:
for x in self._ref_measurement_groups_by_id:
for m in x.ref_measurement_list:
lp.write(group_id=x._id,
reference_measurement_id=m._id)
def dump_ref_measurement(self, system, writer):
with writer.loop('_flr_reference_measurement',
['id', 'reference_sample_probe_id',
'num_species', 'details']) as lp:
for x in self._ref_measurements_by_id:
lp.write(id=x._id,
reference_sample_probe_id=x.ref_sample_probe._id,
num_species=len(x.list_of_lifetimes),
details=x.details)
def dump_ref_measurement_lifetimes(self, system, writer):
with writer.loop('_flr_reference_measurement_lifetime',
['ordinal_id', 'reference_measurement_id',
'species_name', 'species_fraction',
'lifetime']) as lp:
ordinal = itertools.count(1)
for x in self._ref_measurements_by_id:
for m in x.list_of_lifetimes:
lp.write(ordinal_id=next(ordinal),
reference_measurement_id=x._id,
species_name=m.species_name,
species_fraction=m.species_fraction,
lifetime=m.lifetime)
class _FLRAnalysisDumper(Dumper):
def finalize(self, system):
def all_analyses():
return itertools.chain.from_iterable(f._all_analyses()
for f in system.flr_data)
self._analyses_by_id = _assign_all_ids(all_analyses)
def dump(self, system, writer):
self.dump_fret_analysis_general(system, writer)
self.dump_fret_analysis_intensity(system, writer)
self.dump_fret_analysis_lifetime(system, writer)
def dump_fret_analysis_general(self, system, writer):
with writer.loop('_flr_fret_analysis',
['id', 'experiment_id', 'type',
'sample_probe_id_1', 'sample_probe_id_2',
'forster_radius_id', 'dataset_list_id',
'external_file_id', 'software_id']) as lp:
for x in self._analyses_by_id:
lp.write(id=x._id,
experiment_id=x.experiment._id,
type=x.type,
sample_probe_id_1=x.sample_probe_1._id,
sample_probe_id_2=x.sample_probe_2._id,
forster_radius_id=x.forster_radius._id,
dataset_list_id=x.dataset._id,
external_file_id=None if x.external_file is None
else x.external_file._id,
software_id=None if x.software is None
else x.software._id)
def dump_fret_analysis_intensity(self, system, writer):
with writer.loop('_flr_fret_analysis_intensity',
['ordinal_id', 'analysis_id',
'calibration_parameters_id', 'donor_only_fraction',
'chi_square_reduced', 'method_name',
'details']) as lp:
ordinal = itertools.count(1)
for x in self._analyses_by_id:
# if it is an intensity-based analysis.
if 'intensity' in x.type:
p = x.calibration_parameters
lp.write(
ordinal_id=next(ordinal),
analysis_id=x._id,
calibration_parameters_id=None if p is None else p._id,
donor_only_fraction=x.donor_only_fraction,
chi_square_reduced=x.chi_square_reduced,
method_name=x.method_name,
details=x.details)
def dump_fret_analysis_lifetime(self, system, writer):
with writer.loop('_flr_fret_analysis_lifetime',
['ordinal_id', 'analysis_id',
'reference_measurement_group_id',
'lifetime_fit_model_id',
'donor_only_fraction', 'chi_square_reduced',
'method_name', 'details']) as lp:
ordinal = itertools.count(1)
for x in self._analyses_by_id:
# if it is a lifetime-based analysis
if 'lifetime' in x.type:
mgid = x.ref_measurement_group._id
lp.write(
ordinal_id=next(ordinal),
analysis_id=x._id,
reference_measurement_group_id=mgid,
lifetime_fit_model_id=x.lifetime_fit_model._id,
donor_only_fraction=x.donor_only_fraction,
chi_square_reduced=x.chi_square_reduced,
method_name=x.method_name,
details=x.details)
class _FLRPeakAssignmentDumper(Dumper):
def finalize(self, system):
def all_peak_assignments():
return itertools.chain.from_iterable(f._all_peak_assignments()
for f in system.flr_data)
self._peak_assignments_by_id = _assign_all_ids(all_peak_assignments)
def dump(self, system, writer):
with writer.loop('_flr_peak_assignment',
['id', 'method_name', 'details']) as lp:
for x in self._peak_assignments_by_id:
lp.write(id=x._id, method_name=x.method_name,
details=x.details)
class _FLRDistanceRestraintDumper(Dumper):
def finalize(self, system):
def all_restraint_groups():
return itertools.chain.from_iterable(f.distance_restraint_groups
for f in system.flr_data)
self._restraint_groups_by_id = _assign_all_ids(all_restraint_groups)
def _all_restraints():
return itertools.chain.from_iterable(
rg.distance_restraint_list
for rg in self._restraint_groups_by_id)
for i, r in enumerate(_all_restraints()):
r._id = i + 1
def dump(self, system, writer):
with writer.loop('_flr_fret_distance_restraint',
['ordinal_id', 'id', 'group_id', 'sample_probe_id_1',
'sample_probe_id_2', 'state_id', 'analysis_id',
'distance', 'distance_error_plus',
'distance_error_minus', 'distance_type',
'population_fraction', 'peak_assignment_id']) as lp:
ordinal = itertools.count(1)
for rg in self._restraint_groups_by_id:
for r in rg.distance_restraint_list:
lp.write(ordinal_id=next(ordinal), id=r._id,
group_id=rg._id,
sample_probe_id_1=r.sample_probe_1._id,
sample_probe_id_2=r.sample_probe_2._id,
state_id=None if r.state is None else r.state._id,
analysis_id=r.analysis._id, distance=r.distance,
distance_error_plus=r.distance_error_plus,
distance_error_minus=r.distance_error_minus,
distance_type=r.distance_type,
population_fraction=r.population_fraction,
peak_assignment_id=r.peak_assignment._id)
class _FLRModelQualityDumper(Dumper):
def finalize(self, system):
def all_model_qualities():
return itertools.chain.from_iterable(f.fret_model_qualities
for f in system.flr_data)
self._model_qualities_by_id = _assign_all_ids(all_model_qualities)
def dump(self, system, writer):
with writer.loop('_flr_fret_model_quality',
['model_id', 'chi_square_reduced', 'dataset_group_id',
'method', 'details']) as lp:
for x in self._model_qualities_by_id:
lp.write(model_id=x.model._id,
chi_square_reduced=x.chi_square_reduced,
dataset_group_id=x.dataset_group._id,
method=x.method, details=x.details)
class _FLRModelDistanceDumper(Dumper):
def finalize(self, system):
def all_model_distances():
return itertools.chain.from_iterable(f.fret_model_distances
for f in system.flr_data)
self._model_distances_by_id = _assign_all_ids(all_model_distances)
def dump(self, system, writer):
with writer.loop('_flr_fret_model_distance',
['id', 'restraint_id', 'model_id', 'distance',
'distance_deviation']) as lp:
for x in self._model_distances_by_id:
lp.write(id=x._id, restraint_id=x.restraint._id,
model_id=x.model._id, distance=x.distance,
distance_deviation=x.distance_deviation)
class _FLRFPSModelingDumper(Dumper):
def finalize(self, system):
def all_fps_modeling():
return itertools.chain.from_iterable(f._all_fps_modeling()
for f in system.flr_data)
self._fps_modeling_by_id = _assign_all_ids(all_fps_modeling)
def all_fps_global_parameters():
return itertools.chain.from_iterable(f._all_fps_global_parameters()
for f in system.flr_data)
self._fps_modeling_by_id = _assign_all_ids(all_fps_modeling)
self._fps_parameters_by_id = _assign_all_ids(all_fps_global_parameters)
def dump(self, system, writer):
self.dump_fps_modeling(system, writer)
self.dump_fps_global_parameters(system, writer)
def dump_fps_modeling(self, system, writer):
with writer.loop('_flr_FPS_modeling',
['id', 'ihm_modeling_protocol_ordinal_id',
'restraint_group_id', 'global_parameter_id',
'probe_modeling_method', 'details']) as lp:
for x in self._fps_modeling_by_id:
lp.write(id=x._id,
ihm_modeling_protocol_ordinal_id=x.protocol._id,
restraint_group_id=x.restraint_group._id,
global_parameter_id=x.global_parameter._id,
probe_modeling_method=x.probe_modeling_method,
details=x.details)
def dump_fps_global_parameters(self, system, writer):
with writer.loop('_flr_FPS_global_parameter',
['id', 'forster_radius_value',
'conversion_function_polynom_order', 'repetition',
'AV_grid_rel', 'AV_min_grid_A', 'AV_allowed_sphere',
'AV_search_nodes', 'AV_E_samples_k',
'sim_viscosity_adjustment', 'sim_dt_adjustment',
'sim_max_iter_k', 'sim_max_force',
'sim_clash_tolerance_A', 'sim_reciprocal_kT',
'sim_clash_potential', 'convergence_E',
'convergence_K', 'convergence_F',
'convergence_T']) as lp:
for x in self._fps_parameters_by_id:
polynom_order = x.conversion_function_polynom_order
lp.write(id=x._id,
forster_radius_value=x.forster_radius,
conversion_function_polynom_order=polynom_order,
repetition=x.repetition,
AV_grid_rel=x.av_grid_rel,
AV_min_grid_A=x.av_min_grid_a,
AV_allowed_sphere=x.av_allowed_sphere,
AV_search_nodes=x.av_search_nodes,
AV_E_samples_k=x.av_e_samples_k,
sim_viscosity_adjustment=x.sim_viscosity_adjustment,
sim_dt_adjustment=x.sim_dt_adjustment,
sim_max_iter_k=x.sim_max_iter_k,
sim_max_force=x.sim_max_force,
sim_clash_tolerance_A=x.sim_clash_tolerance_a,
sim_reciprocal_kT=x.sim_reciprocal_kt,
sim_clash_potential=x.sim_clash_potential,
convergence_E=x.convergence_e,
convergence_K=x.convergence_k,
convergence_F=x.convergence_f,
convergence_T=x.convergence_t)
class _FLRFPSAVModelingDumper(Dumper):
def finalize(self, system):
def all_fps_av_modeling():
return itertools.chain.from_iterable(f._all_fps_av_modeling()
for f in system.flr_data)
self._fps_av_modeling_by_id = _assign_all_ids(all_fps_av_modeling)
def all_fps_av_parameter():
return itertools.chain.from_iterable(f._all_fps_av_parameter()
for f in system.flr_data)
self._fps_av_modeling_by_id = _assign_all_ids(all_fps_av_modeling)
self._fps_av_parameter_by_id = _assign_all_ids(all_fps_av_parameter)
def dump(self, system, writer):
self.dump_parameter(system, writer)
self.dump_modeling(system, writer)
def dump_parameter(self, system, writer):
with writer.loop('_flr_FPS_AV_parameter',
['id', 'num_linker_atoms', 'linker_length',
'linker_width', 'probe_radius_1', 'probe_radius_2',
'probe_radius_3']) as lp:
for x in self._fps_av_parameter_by_id:
lp.write(id=x._id,
num_linker_atoms=x.num_linker_atoms,
linker_length=x.linker_length,
linker_width=x.linker_width,
probe_radius_1=x.probe_radius_1,
probe_radius_2=x.probe_radius_2,
probe_radius_3=x.probe_radius_3)
def dump_modeling(self, system, writer):
with writer.loop('_flr_FPS_AV_modeling',
['id', 'sample_probe_id', 'FPS_modeling_id',
'parameter_id']) as lp:
for x in self._fps_av_modeling_by_id:
lp.write(id=x._id,
sample_probe_id=x.sample_probe._id,
FPS_modeling_id=x.fps_modeling._id,
parameter_id=x.parameter._id)
class _FLRFPSMPPModelingDumper(Dumper):
def finalize(self, system):
def all_fps_mpp_modeling():
return itertools.chain.from_iterable(f._all_fps_mpp_modeling()
for f in system.flr_data)
self._fps_mpp_modeling_by_id = _assign_all_ids(all_fps_mpp_modeling)
def all_fps_mean_probe_position():
return itertools.chain.from_iterable(
f._all_fps_mean_probe_position() for f in system.flr_data)
self._fps_mpp_modeling_by_id = _assign_all_ids(all_fps_mpp_modeling)
self._fps_mpp_by_id = _assign_all_ids(all_fps_mean_probe_position)
def all_atom_position_group():
return itertools.chain.from_iterable(
f._all_fps_atom_position_group() for f in system.flr_data)
self._atom_group_by_id = _assign_all_ids(all_atom_position_group)
def _all_atom_positions():
return itertools.chain.from_iterable(
ag.mpp_atom_position_list for ag in self._atom_group_by_id)
for i, a in enumerate(_all_atom_positions()):
a._id = i + 1
def dump(self, system, writer):
self.dump_mean_probe_position(system, writer)
self.dump_mpp_atom_position(system, writer)
self.dump_mpp_modeling(system, writer)
def dump_mean_probe_position(self, system, writer):
with writer.loop('_flr_FPS_mean_probe_position',
['id', 'sample_probe_id', 'mpp_xcoord', 'mpp_ycoord',
'mpp_zcoord']) as lp:
for x in self._fps_mpp_by_id:
lp.write(id=x._id, sample_probe_id=x.sample_probe._id,
mpp_xcoord=x.x, mpp_ycoord=x.y, mpp_zcoord=x.z)
def dump_mpp_atom_position(self, system, writer):
with writer.loop('_flr_FPS_MPP_atom_position',
['id', 'entity_id', 'seq_id', 'comp_id', 'atom_id',
'asym_id', 'xcoord', 'ycoord', 'zcoord',
'group_id']) as lp:
for group in self._atom_group_by_id:
for x in group.mpp_atom_position_list:
comp = x.atom.asym.entity.sequence[x.atom.seq_id - 1].id
lp.write(id=x._id, entity_id=x.atom.asym.entity._id,
seq_id=x.atom.seq_id, comp_id=comp,
atom_id=x.atom.id, asym_id=x.atom.asym._id,
xcoord=x.x, ycoord=x.y, zcoord=x.z,
group_id=group._id)
def dump_mpp_modeling(self, system, writer):
ordinal = itertools.count(1)
with writer.loop('_flr_FPS_MPP_modeling',
['ordinal_id', 'FPS_modeling_id', 'mpp_id',
'mpp_atom_position_group_id']) as lp:
for x in self._fps_mpp_modeling_by_id:
lp.write(
ordinal_id=next(ordinal),
FPS_modeling_id=x.fps_modeling._id, mpp_id=x.mpp._id,
mpp_atom_position_group_id=x.mpp_atom_position_group._id)
class _FLRKineticRateFretAnalysisConnectionDumper(Dumper):
def finalize(self, system):
# Assign IDs
c_id = itertools.count(1)
if system.flr_data:
for f in system.flr_data:
for c in f.kinetic_rate_fret_analysis_connections:
if not hasattr(c, '_id'):
c._id = next(c_id)
def dump(self, system, writer):
with writer.loop('_flr_kinetic_rate_analysis',
['id', 'fret_analysis_id',
'kinetic_rate_id', 'details']) as lp:
if system.flr_data:
for f in system.flr_data:
for c in f.kinetic_rate_fret_analysis_connections:
lp.write(id=c._id,
fret_analysis_id=c.fret_analysis._id,
kinetic_rate_id=c.kinetic_rate._id,
details=c.details)
class _FLRRelaxationTimeFretAnalysisConnectionDumper(Dumper):
def finalize(self, system):
# Assign IDs
c_id = itertools.count(1)
if system.flr_data:
for f in system.flr_data:
for c in f.relaxation_time_fret_analysis_connections:
if not hasattr(c, '_id'):
c._id = next(c_id)
def dump(self, system, writer):
with writer.loop('_flr_relaxation_time_analysis',
['id', 'fret_analysis_id',
'relaxation_time_id', 'details']) as lp:
if system.flr_data:
for f in system.flr_data:
for c in f.relaxation_time_fret_analysis_connections:
lp.write(id=c._id,
fret_analysis_id=c.fret_analysis._id,
relaxation_time_id=c.relaxation_time._id,
details=c.details)
_flr_dumpers = [_FLRExperimentDumper, _FLRInstSettingDumper,
_FLRExpConditionDumper, _FLRInstrumentDumper,
_FLREntityAssemblyDumper, _FLRSampleConditionDumper,
_FLRSampleDumper, _FLRProbeDumper,
_FLRSampleProbeDetailsDumper, _FLRPolyProbePositionDumper,
_FLRConjugateDumper, _FLRForsterRadiusDumper,
_FLRCalibrationParametersDumper, _FLRLifetimeFitModelDumper,
_FLRRefMeasurementDumper, _FLRAnalysisDumper,
_FLRPeakAssignmentDumper, _FLRDistanceRestraintDumper,
_FLRModelQualityDumper, _FLRModelDistanceDumper,
_FLRFPSModelingDumper, _FLRFPSAVModelingDumper,
_FLRFPSMPPModelingDumper,
_FLRKineticRateFretAnalysisConnectionDumper,
_FLRRelaxationTimeFretAnalysisConnectionDumper]
class _NullLoopCategoryWriter:
"""A do-nothing replacement for format._CifLoopWriter
or format._CifCategoryWriter"""
def write(self, *args, **keys):
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
pass
class _IgnoreWriter:
"""Utility class which normally just passes through to the default
``base_writer``, but ignores selected categories."""
def __init__(self, base_writer, ignores):
self._base_writer = base_writer
# Allow for categories with or without leading underscore
self._ignore_category = frozenset('_' + c.lstrip('_').lower()
for c in ignores)
def category(self, category):
if category in self._ignore_category:
return _NullLoopCategoryWriter()
else:
return self._base_writer.category(category)
def loop(self, category, keys):
if category in self._ignore_category:
return _NullLoopCategoryWriter()
else:
return self._base_writer.loop(category, keys)
# Pass through other methods to base_writer
def flush(self):
return self._base_writer.flush()
def end_block(self):
return self._base_writer.end_block()
def start_block(self, name):
return self._base_writer.start_block(name)
def write_comment(self, comment):
return self._base_writer.write_comment(comment)
class Variant:
"""Utility class to select the type of file to output by :func:`write`."""
def get_dumpers(self):
"""Get the :class:`Dumper` objects to use to write output.
:return: a list of :class:`Dumper` objects.
"""
pass
def get_system_writer(self, system, writer_class, writer):
"""Get a writer tailored to the given system.
By default, this just returns the ``writer`` unchanged."""
return writer
class IHMVariant(Variant):
"""Used to select typical PDBx/IHM file output. See :func:`write`."""
_dumpers = [
_EntryDumper, # must be first
_CollectionDumper, _StructDumper, _CommentDumper, _AuditConformDumper,
_DatabaseDumper, _DatabaseStatusDumper, _CitationDumper,
_SoftwareDumper, _AuditAuthorDumper, _AuditRevisionDumper,
_DataUsageDumper, _GrantDumper, _ChemCompDumper,
_ChemDescriptorDumper, _EntityDumper, _EntitySrcGenDumper,
_EntitySrcNatDumper, _EntitySrcSynDumper, _StructRefDumper,
_EntityPolyDumper, _EntityNonPolyDumper, _EntityPolySeqDumper,
_EntityPolySegmentDumper, _EntityBranchListDumper, _EntityBranchDumper,
_StructAsymDumper, _PolySeqSchemeDumper,
_NonPolySchemeDumper, _BranchSchemeDumper, _BranchDescriptorDumper,
_BranchLinkDumper, _AssemblyDumper, _ExternalReferenceDumper,
_DatasetDumper, _ModelRepresentationDumper, _StartingModelDumper,
_ProtocolDumper, _PostProcessDumper, _PseudoSiteDumper,
_GeometricObjectDumper, _FeatureDumper, _CrossLinkDumper,
_GeometricRestraintDumper, _DerivedDistanceRestraintDumper,
_HDXRestraintDumper,
_PredictedContactRestraintDumper, _EM3DDumper, _EM2DDumper, _SASDumper,
_ModelDumper, _ModelRepresentativeDumper,
_NotModeledResidueRangeDumper,
_EnsembleDumper, _DensityDumper, _MultiStateDumper,
_OrderedDumper,
_MultiStateSchemeDumper, _MultiStateSchemeConnectivityDumper,
_RelaxationTimeDumper, _KineticRateDumper]
def get_dumpers(self):
return [d() for d in self._dumpers + _flr_dumpers]
class IgnoreVariant(IHMVariant):
"""Exclude selected CIF categories from output.
This generates the same PDBx/IHM output as :class:`IHMVariant`,
but explicitly listed CIF categories are discarded, for example::
ihm.dumper.write(fh, systems,
variant=IgnoreVariant(['_audit_conform']))
This is intended for advanced users that have a working knowledge
of the PDBx and IHM CIF dictionaries.
:param sequence ignores: A list or tuple of CIF categories to exclude.
"""
def __init__(self, ignores):
self._ignores = ignores
def get_system_writer(self, system, writer_class, writer):
return _IgnoreWriter(writer, self._ignores)
def set_line_wrap(line_wrap):
"""Set whether output lines are wrapped at 80 characters.
By default the mmCIF writer tries to avoid writing lines longer than
80 characters, for compatibility with traditional PDB. When
disabled, each row in a "loop" construct will be written on a
single line.
This setting has no effect on binary formats (BinaryCIF).
:param bool line_wrap: whether to wrap lines at 80 characters.
"""
ihm.format.CifWriter._set_line_wrap(line_wrap)
def write(fh, systems, format='mmCIF', dumpers=[], variant=IHMVariant,
check=True):
"""Write out all `systems` to the file handle `fh`.
Files can be written in either the text-based mmCIF format or the
BinaryCIF format. The BinaryCIF writer needs the msgpack Python
module to function.
The file handle should be opened in binary mode for BinaryCIF files.
For mmCIF, text mode should be used, usually with UTF-8 encoding, e.g.::
with open('output.cif', 'w', encoding='utf-8') as fh:
ihm.dumper.write(fh, systems)
with open('output.bcif', 'wb') as fh:
ihm.dumper.write(fh, systems, format='BCIF')
If generating files for a tool that is sensitive to non-ASCII data,
a more restrictive encoding such as ASCII or ISO-8859-1 could also
be used (although note that this may lose some information such as
accented characters)::
with open('output.cif', 'w', encoding='ascii',
errors='replace') as fh:
ihm.dumper.write(fh, systems)
:param file fh: The file handle to write to.
:param list systems: The list of :class:`ihm.System` objects to write.
:param str format: The format of the file. This can be 'mmCIF' (the
default) for the (text-based) mmCIF format or 'BCIF' for
BinaryCIF.
:param list dumpers: A list of :class:`Dumper` classes (not objects).
These can be used to add extra categories to the file.
:param variant: A class or object that selects the type of file to
output. This primarily controls the set of tables that are
written to the file. In most cases the default
:class:`IHMVariant` should be used.
:type variant: :class:`Variant`
:param bool check: If True (the default), check the output objects
for self-consistency. If this is set to False, disabling some of
these checks, the output files may not correctly validate against
the mmCIF dictionaries. (Note that some checks are always
performed, as the library cannot function correctly without
these.)
"""
if isinstance(variant, type):
variant = variant()
dumpers = variant.get_dumpers() + [d() for d in dumpers]
writer_map = {'mmCIF': ihm.format.CifWriter,
'BCIF': ihm.format_bcif.BinaryCifWriter}
writer = writer_map[format](fh)
for system in systems:
w = variant.get_system_writer(system, writer_map[format], writer)
system._before_write()
for d in dumpers:
d._check = check
d.finalize(system)
system._check_after_write()
for d in dumpers:
d.dump(system, w)
w.end_block() # start_block is called by EntryDumper
writer.flush()
python-ihm-2.7/ihm/flr.py 0000664 0000000 0000000 00000137351 15035733372 0015412 0 ustar 00root root 0000000 0000000 # coding=utf-8
"""Classes to handle fluorescence data.
The classes roughly correspond to categories in the
`FLR dictionary `_.
See the top level :class:`FLRData` class for more information.
"""
class Probe:
"""Defines a fluorescent probe.
This class is not in the FLR dictionary, but it collects all the
information connected by the probe_ids.
:param probe_list_entry: A probe list object.
:type probe_list_entry: :class:`ProbeList`
:param probe_descriptor: A probe descriptor.
:type probe_descriptor: :class:`ProbeDescriptor`
"""
def __init__(self, probe_list_entry=None, probe_descriptor=None):
self.probe_list_entry = probe_list_entry
self.probe_descriptor = probe_descriptor
def __eq__(self, other):
return self.__dict__ == other.__dict__
class ProbeDescriptor:
"""Collects the chemical descriptors for a fluorescent probe.
This includes the chemical descriptor of the reactive probe and
the chromophore.
:param reactive_probe_chem_descriptor: The chemical descriptor for
the reactive probe.
:type reactive_probe_chem_descriptor: :class:`ihm.ChemDescriptor`
:param chromophore_chem_descriptor: The chemical descriptor of the
chromophore.
:type chromophore_chem_descriptor: :class:`ihm.ChemDescriptor`
:param chromophore_center_atom: The atom describing the center
of the chromophore.
"""
def __init__(self, reactive_probe_chem_descriptor,
chromophore_chem_descriptor, chromophore_center_atom=None):
self.reactive_probe_chem_descriptor = reactive_probe_chem_descriptor
self.chromophore_chem_descriptor = chromophore_chem_descriptor
self.chromophore_center_atom = chromophore_center_atom
def __eq__(self, other):
return self.__dict__ == other.__dict__
class ProbeList:
"""Store the chromophore name, whether there is a reactive probe
available, the origin of the probe and the type of linkage of the probe.
:param str chromophore_name: The name of the chromophore.
:param bool reactive_probe_flag: Flag to indicate whether a reactive
probe is given.
:param str reactive_probe_name: The name of the reactive probe.
:param str probe_origin: The origin of the probe (intrinsic
or extrinsic).
:param str probe_link_type: The type of linkage for the probe (covalent
or ligand).
"""
def __init__(self, chromophore_name, reactive_probe_flag=False,
reactive_probe_name=None, probe_origin=None,
probe_link_type=None):
self.chromophore_name = chromophore_name
self.reactive_probe_flag = reactive_probe_flag
self.reactive_probe_name = reactive_probe_name
self.probe_origin = probe_origin
self.probe_link_type = probe_link_type
def __eq__(self, other):
return self.__dict__ == other.__dict__
class SampleProbeDetails:
"""Connects a probe to a sample.
:param sample: The sample.
:type sample: :class:`Sample`
:param probe: A probe that is attached to the sample.
:type probe: :class:`Probe`
:param str fluorophore_type: The type of the fluorophore (donor,
acceptor, or unspecified).
:param poly_probe_position: The position on the polymer where
the dye is attached to.
:type poly_probe_position: :class:`PolyProbePosition`
:param str description: A description of the sample-probe-connection.
"""
def __init__(self, sample, probe, fluorophore_type='unspecified',
poly_probe_position=None, description=None):
self.sample = sample
self.probe = probe
self.fluorophore_type = fluorophore_type
self.description = description
self.poly_probe_position = poly_probe_position
def __eq__(self, other):
return self.__dict__ == other.__dict__
class PolyProbeConjugate:
"""Describes the conjugate of polymer residue and probe (including
possible linker)
:param sample_probe: The :class:`SampleProbeDetails` object to
which the conjugate is related.
:type sample_probe: :class:`SampleProbeDetails`
:param chem_descriptor: The chemical descriptor of the conjugate
of polymer residue and probe.
:type chem_descriptor: :class:`ihm.ChemDescriptor`
:param bool ambiguous_stoichiometry: Flag whether the labeling
is ambiguous.
:param float probe_stoichiometry: The stoichiometry of the
ambiguous labeling.
"""
def __init__(self, sample_probe, chem_descriptor,
ambiguous_stoichiometry=False, probe_stoichiometry=None):
self.sample_probe = sample_probe
self.chem_descriptor = chem_descriptor
self.ambiguous_stoichiometry = ambiguous_stoichiometry
self.probe_stoichiometry = probe_stoichiometry
def __eq__(self, other):
return self.__dict__ == other.__dict__
class PolyProbePosition:
"""Describes a position on the polymer used for attaching the probe.
This class combines Poly_probe_position, Poly_probe_position_modified,
and Poly_probe_position_mutated from the FLR dictionary.
:param resatom: The residue or atom that the probe is attached to.
:type resatom: :class:`ihm.Residue` or :class:`ihm.Atom`
:param bool mutation_flag: Flag whether the residue was mutated
(e.g. a Cys mutation).
:param bool modification_flag: Flag whether the residue was modified
(e.g. replacement of a residue with a labeled residue in
case of nucleic acids).
:param str auth_name: An author-given name for the position.
:param mutated_chem_comp_id: The chemical component ID of the
mutated residue.
:type modified_chem_descriptor: :class:`ihm.ChemComp`
:param modified_chem_descriptor: The chemical descriptor of the
modified residue.
:type modified_chem_descriptor: :class:`ihm.ChemDescriptor`
"""
def __init__(self, resatom, mutation_flag=False, modification_flag=False,
auth_name=None, mutated_chem_comp_id=None,
modified_chem_descriptor=None):
self.resatom = resatom
self.mutation_flag = mutation_flag
self.modification_flag = modification_flag
self.auth_name = auth_name
if self.mutation_flag:
self.mutated_chem_comp_id = mutated_chem_comp_id
if self.modification_flag:
self.modified_chem_descriptor = modified_chem_descriptor
def __eq__(self, other):
return self.__dict__ == other.__dict__
class Sample:
"""Sample corresponds to a measurement.
:param entity_assembly: The assembly of the entities that was measured.
:type entity_assembly: :class:`EntityAssembly`
:param int num_of_probes: The number of probes in the sample.
:param condition: The sample conditions for the Sample.
:type condition: :class:`SampleCondition`
:param str description: A description of the sample.
:param str details: Details about the sample.
:param solvent_phase: The solvent phase of the sample (liquid,
vitrified, or other).
"""
def __init__(self, entity_assembly, num_of_probes, condition,
description=None, details=None, solvent_phase=None):
self.entity_assembly = entity_assembly
self.num_of_probes = num_of_probes
self.condition = condition
self.description = description
self.details = details
self.solvent_phase = solvent_phase
def __eq__(self, other):
return self.__dict__ == other.__dict__
class EntityAssembly:
"""The assembly of the entities that are in the system.
:param entity: The entity to add.
:type entity: :class:`ihm.Entity`
:param num_copies: The number of copies for the entity in the assembly.
"""
def __init__(self, entity=None, num_copies=0):
self.entity_list = []
self.num_copies_list = []
if entity is not None and num_copies != 0:
self.add_entity(entity, num_copies)
def add_entity(self, entity, num_copies):
if num_copies < 0:
raise ValueError("Number of copies for Entity must be "
"larger than zero.")
self.entity_list.append(entity)
self.num_copies_list.append(num_copies)
def __eq__(self, other):
return self.__dict__ == other.__dict__
class SampleCondition:
"""Description of the sample conditions.
*Currently this is only text, but will be extended in the future.*
:param str details: Description of the sample conditions.
"""
def __init__(self, details=None):
self.details = details
def __eq__(self, other):
return self.__dict__ == other.__dict__
class Experiment:
"""The Experiment collects combinations of instrument, experimental
settings and sample.
:param instrument: The instrument.
:type instrument: :class:`Instrument`
:param inst_setting: The instrument setting.
:type inst_setting: :class:`InstSetting`
:param exp_condition: The experimental conditions.
:type exp_condition: :class:`ExpCondition`
:param sample: The sample.
:type sample: :class:`Sample`
:param details: Details on the experiment.
"""
def __init__(self, instrument=None, inst_setting=None, exp_condition=None,
sample=None, details=None):
"""The Experiment object can either be initiated with empty lists,
or with an entry for each of them. In this way, an experiment
object is created and filled with one entry.
"""
self.instrument_list = []
self.inst_setting_list = []
self.exp_condition_list = []
self.sample_list = []
self.details_list = []
if (instrument is not None and inst_setting is not None
and exp_condition is not None and sample is not None):
self.add_entry(instrument=instrument, inst_setting=inst_setting,
exp_condition=exp_condition,
sample=sample, details=details)
def add_entry(self, instrument, inst_setting, exp_condition, sample,
details=None):
"""Entries to the experiment object can also be added one by one.
"""
self.instrument_list.append(instrument)
self.inst_setting_list.append(inst_setting)
self.exp_condition_list.append(exp_condition)
self.sample_list.append(sample)
self.details_list.append(details)
def get_entry_by_index(self, index):
"""Returns the combination of :class:`Instrument`,
:class:`InstSetting`, :class:`ExpCondition`, :class:`Sample`,
and details for a given index.
"""
return (self.instrument_list[index],
self.inst_setting_list[index],
self.exp_condition_list[index],
self.sample_list[index],
self.details_list[index])
def __eq__(self, other):
return ((self.instrument_list == other.instrument_list)
and (self.inst_setting_list == other.inst_setting_list)
and (self.exp_condition_list == other.exp_condition_list)
and (self.sample_list == other.sample_list)
and (self.details_list == other.details_list))
def contains(self, instrument, inst_setting, exp_condition, sample):
"""Checks whether a combination of :class:`Instrument`,
:class:`InstSetting`, :class:`ExpCondition`,
:class:`Sample` is already included in the experiment object.
"""
# TODO: possibly extend this by the details_list?
for i in range(len(self.instrument_list)):
if ((instrument == self.instrument_list[i])
and (inst_setting == self.inst_setting_list[i])
and (exp_condition == self.exp_condition_list[i])
and (sample == self.sample_list[i])):
return True
return False
class Instrument:
"""Description of the Instrument used for the measurements.
*Currently this is only text, but will be extended in the future.*
:param details: Description of the instrument used for the measurements.
"""
def __init__(self, details=None):
self.details = details
def __eq__(self, other):
return self.__dict__ == other.__dict__
class InstSetting:
"""Description of the instrument settings.
*Currently this is only text, but will be extended in the future.*
:param str details: Description of the instrument settings used for
the measurement (e.g. laser power or size of observation
volume in case of confocal measurements).
"""
def __init__(self, details=None):
self.details = details
def __eq__(self, other):
return self.__dict__ == other.__dict__
class ExpCondition:
"""Description of the experimental conditions.
* Currently this is only text, but will be extended in the future.*
:param str details: Description of the experimental conditions (e.g.
the temperature at which the experiment was carried out).
"""
def __init__(self, details=None):
self.details = details
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FRETAnalysis:
"""An analysis of FRET data that was performed.
:param experiment: The Experiment object for this FRET analysis.
:type experiment: :class:`Experiment`
:param sample_probe_1: The combination of sample and probe for the
first probe.
:type sample_probe_1: :class:`SampleProbeDetails`
:param sample_probe_2: The combination of sample and probe for the
second probe.
:type sample_probe_2: :class:`SampleProbeDetails`
:param forster_radius: The Förster radius object for this FRET analysis.
:type forster_radius: :class:`FRETForsterRadius`.
:param str type: The type of the FRET analysis (intensity-based
or lifetime-based).
:param calibration_parameters: The calibration parameters used for
this analysis (only in case of intensity-based analyses).
:type calibration_parameters: :class:`FRETCalibrationParameters`
:param lifetime_fit_model: The fit model used in case of
lifetime-based analyses.
:type lifetime_fit_model: :class:`LifetimeFitModel`
:param ref_measurement_group: The group of reference measurements
in case of lifetime-based analyses.
:type ref_measurement_group: :class:`RefMeasurementGroup`
:param str method_name: The method used for the analysis.
:param float chi_square_reduced: The chi-square reduced as a quality
measure for the fit.
:param float donor_only_fraction: The donor-only fraction.
:param dataset: The dataset used.
:type dataset: :class:`ihm.dataset.Dataset`
:param file: The external file that contains (results of)
the analysis.
:type file: :class:`ihm.location.OutputFileLocation`
:param software: The software used for the analysis.
:type software: :class:`ihm.Software`
"""
def __init__(self, experiment, sample_probe_1, sample_probe_2,
forster_radius, type, calibration_parameters=None,
lifetime_fit_model=None,
ref_measurement_group=None,
method_name=None, details=None,
chi_square_reduced=None, donor_only_fraction=None,
dataset=None, file=None, software=None):
if type not in ['lifetime-based', 'intensity-based', None]:
raise ValueError(
'FRETAnalysis.type can be \'lifetime-based\' or '
'\'intensity-based\'. The value is %s' % type)
self.experiment = experiment
self.sample_probe_1 = sample_probe_1
self.sample_probe_2 = sample_probe_2
self.forster_radius = forster_radius
self.type = type
self.calibration_parameters = calibration_parameters
self.lifetime_fit_model = lifetime_fit_model
self.ref_measurement_group = ref_measurement_group
self.method_name = method_name
self.details = details
self.chi_square_reduced = chi_square_reduced
self.donor_only_fraction = donor_only_fraction
self.dataset = dataset
self.external_file = file
self.software = software
def __eq__(self, other):
return self.__dict__ == other.__dict__
class LifetimeFitModel:
"""A lifetime-fit model used for lifetime-based analysis.
:param str name: The name of the fit model.
:param str description: A description of the fit model.
:param file: An external file that contains additional
information on the fit model.
:type file: :class:`ihm.location.OutputFileLocation`
:param citation: A citation for the fit model.
:type citation: :class:`ihm.Citation`
"""
def __init__(self, name, description, file=None, citation=None):
self.name = name
self.description = description
self.external_file = file
self.citation = citation
def __eq__(self, other):
return self.__dict__ == other.__dict__
class RefMeasurementGroup:
"""A Group containing reference measurements for lifetime-based analysis.
:param str details: Details on the Group of reference measurements.
"""
def __init__(self, details=None):
self.details = details
self.ref_measurement_list = []
self.num_measurements = len(self.ref_measurement_list)
def add_ref_measurement(self, ref_measurement):
"""Add a lifetime reference measurement to a ref_measurement_group."""
self.ref_measurement_list.append(ref_measurement)
self.num_measurements = len(self.ref_measurement_list)
def get_info(self):
return self.ref_measurement_list
def __eq__(self, other):
return self.__dict__ == other.__dict__
class RefMeasurement:
"""A reference measurement for lifetime-based analysis.
:param ref_sample_probe: The combination of sample and probe used
for the reference measurement.
:type ref_sample_probe: :class:`SampleProbeDetails`
:param str details: Details on the measurement.
:param list_of_lifetimes: A list of the results from the reference
measurement.
:type list_of_lifetimes: List of :class:`RefMeasurementLifetime`
"""
def __init__(self, ref_sample_probe, details=None, list_of_lifetimes=None):
self.ref_sample_probe = ref_sample_probe
self.details = details
self.list_of_lifetimes = \
list_of_lifetimes if list_of_lifetimes is not None else []
self.num_species = len(self.list_of_lifetimes)
def add_lifetime(self, lifetime):
"""Add a lifetime to the list_of_lifetimes."""
self.list_of_lifetimes.append(lifetime)
self.num_species = len(self.list_of_lifetimes)
def __eq__(self, other):
return self.__dict__ == other.__dict__
class RefMeasurementLifetime:
"""Lifetime for a species in a reference measurement.
:param float species_fraction: The species-fraction for the
respective lifetime.
:param float lifetime: The lifetime (in ns).
:param str species_name: A name for the species.
"""
def __init__(self, species_fraction, lifetime, species_name=None):
self.species_fraction = species_fraction
self.lifetime = lifetime
self.species_name = species_name
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FRETDistanceRestraintGroup:
"""A collection of FRET distance restraints that are used together.
"""
def __init__(self):
self.distance_restraint_list = []
def add_distance_restraint(self, distance_restraint):
"""Add a distance restraint to a distance_restraint_group"""
self.distance_restraint_list.append(distance_restraint)
def get_info(self):
return self.distance_restraint_list
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FRETDistanceRestraint:
"""A distance restraint from FRET.
:param sample_probe_1: The combination of sample and probe for
the first probe.
:type sample_probe_1: :class:`SampleProbeDetails`
:param sample_probe_2: The combination of sample and probe for
the second probe.
:type sample_probe_2: :class:`SampleProbeDetails`
:param analysis: The FRET analysis from which the distance
restraint originated.
:type analysis: :class:`FRETAnalysis`
:param float distance: The distance of the restraint.
:param float distance_error_plus: The (absolute, e.g. in Angstrom) error
in the upper direction, such that
``upper boundary = distance + distance_error_plus``.
:param float distance_error_minus: The (absolute, e.g. in Angstrom)
error in the lower direction, such that
``lower boundary = distance + distance_error_minus``.
:param str distance_type: The type of distance (, _E,
or R_mp).
:param state: The state the distance restraints is connected to.
Important for multi-state models.
:type state: :class:`ihm.model.State`
:param float population_fraction: The population fraction of the state
in case of multi-state models.
:param peak_assignment: The method how a peak was assigned.
:type peak_assignment: :class:`PeakAssignment`
"""
def __init__(self, sample_probe_1, sample_probe_2, analysis, distance,
distance_error_plus=0., distance_error_minus=0.,
distance_type=None, state=None, population_fraction=0.,
peak_assignment=None):
self.sample_probe_1 = sample_probe_1
self.sample_probe_2 = sample_probe_2
self.state = state
self.analysis = analysis
self.distance = distance
self.distance_error_plus = distance_error_plus
self.distance_error_minus = distance_error_minus
self.distance_type = distance_type
self.population_fraction = population_fraction
self.peak_assignment = peak_assignment
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FRETForsterRadius:
"""The FRET Förster radius between two probes.
:param donor_probe: The donor probe.
:type donor_probe: :class:`Probe`
:param acceptor_probe: The acceptor probe.
:type acceptor_probe: :class:`Probe`
:param float forster_radius: The Förster radius between the two probes.
:param float reduced_forster_radius: The reduced Förster radius between
the two probes.
"""
def __init__(self, donor_probe, acceptor_probe, forster_radius,
reduced_forster_radius=None):
self.donor_probe = donor_probe
self.acceptor_probe = acceptor_probe
self.forster_radius = forster_radius
self.reduced_forster_radius = reduced_forster_radius
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FRETCalibrationParameters:
"""The calibration parameter from the FRET measurements.
For the definitions of the parameters see
Hellenkamp et al. Nat. Methods 2018.
:param float phi_acceptor: The quantum yield of the acceptor.
:param float alpha: The alpha parameter.
:param float alpha_sd: The standard deviation of the alpha parameter.
:param float gg_gr_ratio: The ratio of the green and red detection
efficiencies.
:param float beta: The beta parameter.
:param float gamma: The gamma parameter.
:param float delta: The delta parameter.
:param float a_b: The fraction of bright molecules.
"""
def __init__(self, phi_acceptor=None, alpha=None, alpha_sd=None,
gg_gr_ratio=None, beta=None, gamma=None, delta=None,
a_b=None):
self.phi_acceptor = phi_acceptor
self.alpha = alpha
self.alpha_sd = alpha_sd
self.gg_gr_ratio = gg_gr_ratio
self.beta = beta
self.gamma = gamma
self.delta = delta
self.a_b = a_b
def __eq__(self, other):
return self.__dict__ == other.__dict__
class PeakAssignment:
"""The method of peak assignment in case of multiple peaks,
e.g. by population.
:param str method_name: The method used for peak assignment.
:param str details: The details of the peak assignment procedure.
"""
def __init__(self, method_name, details=None):
self.method_name = method_name
self.details = details
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FRETModelQuality:
"""The quality measure for a Model based on FRET data.
:param model: The model being described.
:type model: :class:`ihm.model.Model`
:param chi_square_reduced: The quality of the model in terms of
chi_square_reduced based on the Distance restraints used
for the modeling.
:param dataset_group: The group of datasets that was used for the
quality estimation.
:type dataset_group: :class:`ihm.dataset.DatasetGroup`
:param method: The method used for judging the model quality.
:param str details: Details on the model quality.
"""
def __init__(self, model, chi_square_reduced, dataset_group,
method, details=None):
self.model = model
self.chi_square_reduced = chi_square_reduced
self.dataset_group = dataset_group
self.method = method
self.details = details
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FRETModelDistance:
"""The distance in a model for a certain distance restraint.
:param restraint: The Distance restraint.
:type restraint: :class:`FRETDistanceRestraint`
:param model: The model the distance applies to.
:type model: :class:`ihm.model.Model`
:param distance: The distance obtained for the distance restraint
in the current model.
:param distance_deviation: The deviation of the distance in the
model compared to the value of the distance restraint.
"""
def __init__(self, restraint, model, distance,
distance_deviation=None):
self.restraint = restraint
self.model = model
self.distance = distance
self.distance_deviation = distance_deviation
if self.distance_deviation is None and self.restraint is not None:
self.calculate_deviation()
def calculate_deviation(self):
if self.distance_deviation is None and self.restraint is not None:
self.distance_deviation = \
float(self.restraint.distance) - float(self.distance)
def update_deviation(self):
if self.restraint is not None:
self.distance_deviation = \
float(self.restraint.distance) - float(self.distance)
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FPSModeling:
"""Collect the modeling parameters for different steps of FPS,
e.g. Docking, Refinement, or Error estimation.
:param protocol: The modeling protocol to which the FPS modeling
step belongs.
:type protocol: :class:`ihm.protocol.Protocol`
:param restraint_group: The restraint group used for the modeling.
:type restraint_group: :class:`FRETDistanceRestraintGroup`
:param global_parameter: The global FPS parameters used.
:type global_parameter: :class:`FPSGlobalParameters`
:param str probe_modeling_method: either "AV" or "MPP".
:param str details: Details on the FPS modeling.
"""
def __init__(self, protocol, restraint_group,
global_parameter, probe_modeling_method, details=None):
self.protocol = protocol
self.restraint_group = restraint_group
self.global_parameter = global_parameter
self.probe_modeling_method = probe_modeling_method
self.details = details
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FPSGlobalParameters:
"""The global parameters in the FPS program.
*For a description of the parameters, see also the FPS manual.*
:param float forster_radius: The Förster radius used in the FPS program.
:param int conversion_function_polynom_order: Order of the polynom for
the conversion function between Rmp and E.
:param int repetition: The number of repetitions.
:param float av_grid_rel: The AV grid spacing relative to the smallest
dye or linker dimension.
:param float av_min_grid_a: The minimal AV grid spacing in Angstrom.
:param float av_allowed_sphere: The allowed sphere radius.
:param int av_search_nodes: Number of neighboring positions to be
scanned for clashes.
:param float av_e_samples_k: The number of samples for calculation
of E (in thousand).
:param float sim_viscosity_adjustment: Daming rate during docking
and refinement.
:param float sim_dt_adjustment: Time step during simulation.
:param float sim_max_iter_k: Maximal number of iterations (in thousand).
:param float sim_max_force: Maximal force.
:param float sim_clash_tolerance_a: Clash tolerance in Angstrom.
:param float sim_reciprocal_kt: reciprocal kT.
:param str sim_clash_potential: The clash potential.
:param float convergence_e: Convergence criterion E.
:param float convergence_k: Convergence criterion K.
:param float convergence_f: Convergence criterion F.
:param float convergence_t: Convergence criterion T.
:param str optimized_distances: Which distances are optimized?
"""
def __init__(self, forster_radius, conversion_function_polynom_order,
repetition, av_grid_rel, av_min_grid_a, av_allowed_sphere,
av_search_nodes, av_e_samples_k, sim_viscosity_adjustment,
sim_dt_adjustment, sim_max_iter_k, sim_max_force,
sim_clash_tolerance_a, sim_reciprocal_kt, sim_clash_potential,
convergence_e, convergence_k, convergence_f, convergence_t,
optimized_distances='All'):
self.forster_radius = forster_radius
self.conversion_function_polynom_order \
= conversion_function_polynom_order
self.repetition = repetition
self.av_grid_rel = av_grid_rel
self.av_min_grid_a = av_min_grid_a
self.av_allowed_sphere = av_allowed_sphere
self.av_search_nodes = av_search_nodes
self.av_e_samples_k = av_e_samples_k
self.sim_viscosity_adjustment = sim_viscosity_adjustment
self.sim_dt_adjustment = sim_dt_adjustment
self.sim_max_iter_k = sim_max_iter_k
self.sim_max_force = sim_max_force
self.sim_clash_tolerance_a = sim_clash_tolerance_a
self.sim_reciprocal_kt = sim_reciprocal_kt
self.sim_clash_potential = sim_clash_potential
self.convergence_e = convergence_e
self.convergence_k = convergence_k
self.convergence_f = convergence_f
self.convergence_t = convergence_t
self.optimized_distances = optimized_distances
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FPSAVModeling:
"""FPS modeling using AV.
This object connects the FPS_modeling step, the sample_probe and
the respective AV parameters.
:param fps_modeling: The FPS modeling ID.
:type fps_modeling: :class:`FPSModeling`
:param sample_probe: The Sample probe ID.
:type sample_probe: :class:`SampleProbeDetails`
:param parameter: The FPS AV parameters used.
:type parameter: :class:`FPSAVParameter`
"""
def __init__(self, fps_modeling, sample_probe, parameter):
# fps_modeling is the object containing information on the
# ihm modeling protocol, the restraint group and the global
# FPS parameters
self.fps_modeling = fps_modeling
self.sample_probe = sample_probe
self.parameter = parameter
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FPSAVParameter:
"""The AV parameters used for the modeling using FPS.
:param int num_linker_atoms: The number of atoms in the linker.
:param float linker_length: The length of the linker in Angstrom.
:param float linker_width: The width of the linker in Angstrom.
:param float probe_radius_1: The first radius of the probe.
:param float probe_radius_2: If AV3 is used, the second radius
of the probe.
:param float probe_radius_3: If AV3 is used, the third radius
of the probe.
"""
def __init__(self, num_linker_atoms, linker_length, linker_width,
probe_radius_1, probe_radius_2=None, probe_radius_3=None):
self.num_linker_atoms = num_linker_atoms
self.linker_length = linker_length
self.linker_width = linker_width
self.probe_radius_1 = probe_radius_1
self.probe_radius_2 = probe_radius_2
self.probe_radius_3 = probe_radius_3
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FPSMPPModeling:
"""Maps the FPSModeling object to a mean probe position and connects it
to the reference coordinate system.
:param fps_modeling: The FPS modeling object.
:type fps_modeling: :class:`FPSModeling`
:param mpp: The ID of the mean probe position.
:type mpp: :class:`FPSMeanProbePosition`
:param mpp_atom_position_group:
:type mpp_atom_position_group: :class:`FPSMPPAtomPositionGroup`
"""
def __init__(self, fps_modeling, mpp, mpp_atom_position_group):
# fps_modeling is the object containing information on the
# ihm modeling protocol, the restraint group and the global
# FPS parameters
self.fps_modeling = fps_modeling
self.mpp = mpp
self.mpp_atom_position_group = mpp_atom_position_group
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FPSMeanProbePosition:
"""The mean probe position of an AV, which can be used instead of an AV.
*It is usually not recommended to use this. Use AVs instead.*
The coordinates are with respect to a reference coordinate system
defined by :class:`FPSMPPAtomPositionGroup`.
:param sample_probe: The Sample probe.
:type sample_probe: :class:`SampleProbeDetails`
:param float x: The x-coordinate of the mean probe position.
:param float y: The y-coordinate of the mean probe position.
:param float z: The z-coordinate of the mean probe position.
"""
def __init__(self, sample_probe, x, y, z):
self.sample_probe = sample_probe
self.x, self.y, self.z = x, y, z
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FPSMPPAtomPositionGroup:
"""A group of atom positions used to define the coordinate system
of a mean probe position.
*Not part of the FLR dictionary.*
"""
def __init__(self):
self.mpp_atom_position_list = []
def add_atom_position(self, atom_position):
self.mpp_atom_position_list.append(atom_position)
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FPSMPPAtomPosition:
"""An atom used to describe the coordinate system for a mean probe position
:param atom: The atom being described.
:type atom: :class:`ihm.Atom`
:param float x: The x-coordinate of the atom.
:param float y: The y-coordinate of the atom.
:param float z: The z-coordinate of the atom.
"""
# atoms describing the coordinate system for a mean probe position
def __init__(self, atom, x, y, z):
self.atom, self.x, self.y, self.z = atom, x, y, z
def __eq__(self, other):
return self.__dict__ == other.__dict__
class KineticRateFretAnalysisConnection:
"""Connects a kinetic rate with a FRET analysis.
:param fret_analysis: The FRETAnalysis object assigned to a kinetic rate
:type analysis: :class:`FRETAnalysis`
:param kinetic_rate: The kinetic rate.
:type kinetic_rate: :class:`ihm.multi_state_scheme.KineticRate`
:param str details: Details about the connection between the FRETAnalysis
object and the KineticRate object
"""
def __init__(self, fret_analysis, kinetic_rate, details=None):
self.fret_analysis = fret_analysis
self.kinetic_rate = kinetic_rate
self.details = details
def __eq__(self, other):
return self.__dict__ == other.__dict__
class RelaxationTimeFretAnalysisConnection:
"""Connects a relaxation time with a FRET analysis.
:param fret_analysis: The FRETAnalysis object assigned to a kinetic rate
:type analysis: :class:`FRETAnalysis`
:param relaxation_time: The relaxation time.
:type relaxation_time: :class:`ihm.multi_state_scheme.RelaxationTime`
:param str details: Details about the connection between the FRETAnalysis
object and the RelaxationTime object
"""
def __init__(self, fret_analysis, relaxation_time, details=None):
self.fret_analysis = fret_analysis
self.relaxation_time = relaxation_time
self.details = details
def __eq__(self, other):
return self.__dict__ == other.__dict__
class FLRData:
"""A collection of the fluorescence data to be added to the system.
Instances of this class are generally added to
:attr:`~ihm.System.flr_data`.
"""
def __init__(self):
#: All groups of FRET distance restraints.
#: See :class:`FRETDistanceRestraintGroup`.
self.distance_restraint_groups = []
#: All conjugates of polymer residue and probe.
#: See :class:`PolyProbeConjugate`.
self.poly_probe_conjugates = []
#: All quality measures for models based on FRET data.
#: See :class:`FRETModelQuality`.
self.fret_model_qualities = []
#: All distances in models for distance restraints.
#: See :class:`FRETModelDistance`.
self.fret_model_distances = []
#: All modeling objects.
#: See :class:`FPSAVModeling` and :class:`FPSMPPModeling`.
self.fps_modeling = []
#: All Connections between FRETAnalysis and KineticRate objects
#: See :class: `KineticRateFRETAnalysisConnection`
self.kinetic_rate_fret_analysis_connections = []
#: All Connections between FRETAnalysis and RelaxationTime objects
#: See :class: `RelaxationTimeFRETAnalysisConnection`
self.relaxation_time_fret_analysis_connections = []
# The following dictionaries are so far only used when reading data
self._collection_flr_experiment = {}
self._collection_flr_inst_setting = {}
self._collection_flr_exp_condition = {}
self._collection_flr_instrument = {}
self._collection_flr_entity_assembly = {}
self._collection_flr_sample_condition = {}
self._collection_flr_sample = {}
self._collection_flr_sample_probe_details = {}
self._collection_flr_probe = {}
self._collection_flr_poly_probe_position = {}
self._collection_flr_poly_probe_position_modified = {}
self._collection_flr_poly_probe_position_mutated = {}
self._collection_flr_poly_probe_conjugate = {}
self._collection_flr_fret_forster_radius = {}
self._collection_flr_fret_calibration_parameters = {}
self._collection_flr_fret_analysis = {}
self._collection_flr_lifetime_fit_model = {}
self._collection_flr_ref_measurement_group = {}
self._collection_flr_ref_measurement = {}
self._collection_flr_ref_measurement_lifetime = {}
self._collection_flr_peak_assignment = {}
self._collection_flr_fret_distance_restraint = {}
self._collection_flr_fret_distance_restraint_group = {}
self._collection_flr_fret_model_quality = {}
self._collection_flr_fret_model_distance = {}
self._collection_flr_fps_global_parameters = {}
self._collection_flr_fps_modeling = {}
self._collection_flr_fps_av_parameter = {}
self._collection_flr_fps_av_modeling = {}
self._collection_flr_fps_mean_probe_position = {}
self._collection_flr_fps_mpp_atom_position = {}
self._collection_flr_fps_mpp_modeling = {}
self._collection_flr_kinetic_rate_fret_analysis_connection = {}
self._collection_flr_relaxation_time_fret_analysis_connection = {}
def _all_distance_restraints(self):
"""Yield all FRETDistanceRestraint objects"""
for rg in self.distance_restraint_groups:
for r in rg.distance_restraint_list:
yield r
def _all_analyses(self):
"""Yield all FRETAnalysis objects"""
for r in self._all_distance_restraints():
yield r.analysis
# Get the analyses from the kinetic rate and
# relaxation time connections
for c in self.kinetic_rate_fret_analysis_connections:
yield c.fret_analysis
for c in self.relaxation_time_fret_analysis_connections:
yield c.fret_analysis
def _all_peak_assignments(self):
"""Yield all PeakAssignment objects"""
for r in self._all_distance_restraints():
yield r.peak_assignment
def _all_experiments(self):
"""Yield all Experiment objects"""
for a in self._all_analyses():
yield a.experiment
def _all_forster_radii(self):
"""Yield all FRETForsterRadius objects"""
for a in self._all_analyses():
yield a.forster_radius
def _all_calibration_parameters(self):
"""Yield all FRETCalibrationParameters objects"""
for a in self._all_analyses():
if a.type == 'intensity-based':
yield a.calibration_parameters
def _all_lifetime_fit_models(self):
"""Yield all LifetimeFitModel objects"""
for a in self._all_analyses():
if a.type == 'lifetime-based':
yield a.lifetime_fit_model
def _all_ref_measurement_groups(self):
"""Yield all RefMeasurementGroup objects"""
for a in self._all_analyses():
if a.type == 'lifetime-based':
yield a.ref_measurement_group
def _all_ref_measurements(self):
"""Yield all RefMeasurement objects"""
for rg in self._all_ref_measurement_groups():
for x in rg.ref_measurement_list:
yield x
def _all_ref_measurement_lifetimes(self):
"""Yield all RefMeasurementLifetime objects"""
for r in self._all_ref_measurements():
for x in r.list_of_lifetimes:
yield x
def _all_sample_probe_details(self):
"""Yield all SampleProbeDetails objects"""
for r in self._all_distance_restraints():
yield r.sample_probe_1
yield r.sample_probe_2
for r in self._all_ref_measurements():
yield r.ref_sample_probe
def _all_samples(self):
"""Yield all Sample objects"""
for s in self._all_sample_probe_details():
yield s.sample
def _all_probes(self):
"""Yield all Probe objects"""
for s in self._all_sample_probe_details():
yield s.probe
def _all_poly_probe_positions(self):
"""Yield all PolyProbePosition objects"""
for s in self._all_sample_probe_details():
yield s.poly_probe_position
def _all_inst_settings(self):
"""Yield all InstSetting objects"""
for e in self._all_experiments():
for s in e.inst_setting_list:
yield s
def _all_exp_conditions(self):
"""Yield all ExpCondition objects"""
for e in self._all_experiments():
for s in e.exp_condition_list:
yield s
def _all_instruments(self):
"""Yield all Instrument objects"""
for e in self._all_experiments():
for s in e.instrument_list:
yield s
def _all_fps_modeling(self):
"""Yield all FPSModeling objects"""
for m in self.fps_modeling:
yield m.fps_modeling
def _all_fps_global_parameters(self):
"""Yield all FPSGlobalParameters objects"""
for m in self._all_fps_modeling():
yield m.global_parameter
def _all_fps_av_modeling(self):
"""Yield all FPSAVModeling objects"""
for m in self.fps_modeling:
if isinstance(m, FPSAVModeling):
yield m
def _all_fps_av_parameter(self):
"""Yield all FPSAVParameter objects"""
for m in self._all_fps_av_modeling():
yield m.parameter
def _all_fps_mpp_modeling(self):
"""Yield all FPSMPPModeling objects"""
for m in self.fps_modeling:
if isinstance(m, FPSMPPModeling):
yield m
def _all_fps_mean_probe_position(self):
"""Yield all FPSMeanProbePosition objects"""
for m in self._all_fps_mpp_modeling():
yield m.mpp
def _all_fps_atom_position_group(self):
"""Yield all FPSMPPAtomPositionGroup objects"""
for m in self._all_fps_mpp_modeling():
yield m.mpp_atom_position_group
def _all_flr_chemical_descriptors(self):
"""Collect the chemical descriptors from the flr part.
*This might contain duplicates.*
"""
# collect from all distance_restraint_groups
for drgroup in self.distance_restraint_groups:
# collect from all distance restraints
for dr in drgroup.distance_restraint_list:
# collect from both sample_probe_1 and sample_probe_2
for this_sample_probe in (dr.sample_probe_1,
dr.sample_probe_2):
# collect from the probe
probe = this_sample_probe.probe
# reactive probe
yield probe.probe_descriptor.reactive_probe_chem_descriptor
# chromophore
yield probe.probe_descriptor.chromophore_chem_descriptor
# collect from the poly_probe_position
pos = this_sample_probe.poly_probe_position
# modified chem descriptor
if pos.modification_flag:
yield pos.modified_chem_descriptor
# collect from all analyses if they are lifetime-based
a = dr.analysis
if a.type == 'lifetime-based':
# RefMeasurementGroup
rg = a.ref_measurement_group
# collect from all RefMeasurements
for rm in rg.ref_measurement_list:
# collect from the ref_sample_probe
this_ref_sample_probe = rm.ref_sample_probe
probe = this_ref_sample_probe.probe
pd = probe.probe_descriptor
# reactive probe
yield pd.reactive_probe_chem_descriptor
# chromophore
yield pd.chromophore_chem_descriptor
# collect from the poly_probe_position
pos = this_ref_sample_probe.poly_probe_position
# modified chem descriptor
if pos.modification_flag:
yield pos.modified_chem_descriptor
# and collect from all poly_probe_conjugates
for c in self.poly_probe_conjugates:
yield c.chem_descriptor
python-ihm-2.7/ihm/format.py 0000664 0000000 0000000 00000144254 15035733372 0016117 0 ustar 00root root 0000000 0000000 """Utility classes to handle CIF format.
This module provides classes to read in and write out mmCIF files. It is
only concerned with handling syntactically correct CIF - it does not know
the set of tables or the mapping to ihm objects. For that,
see :mod:`ihm.dumper` for writing and :mod:`ihm.reader` for reading.
See also the `stream parser example `_
and the `token reader example `_.
""" # noqa: E501
import textwrap
import operator
import ihm
from io import StringIO
import inspect
import re
try:
from . import _format
except ImportError:
_format = None
def _write_multiline(val, fh):
fh.write("\n;")
fh.write(val)
if not val.endswith('\n'):
fh.write("\n")
fh.write(";\n")
class _LineWriter:
def __init__(self, writer, line_len=80):
self.writer = writer
self.line_len = line_len
self.column = 0
def write(self, val):
if isinstance(val, str) and '\n' in val:
_write_multiline(val, self.writer.fh)
self.column = 0
return
val = '.' if val is None else self.writer._repr(val)
if self.column > 0:
if self.line_len and self.column + len(val) + 1 > self.line_len:
self.writer.fh.write("\n")
self.column = 0
else:
self.writer.fh.write(" ")
self.column += 1
self.writer.fh.write(val)
self.column += len(val)
class _CifCategoryWriter:
def __init__(self, writer, category):
self.writer = writer
self.category = category
def write(self, **kwargs):
self.writer._write(self.category, kwargs)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
pass
class _CifLoopWriter:
def __init__(self, writer, category, keys, line_wrap=True):
self._line_wrap = line_wrap
self.writer = writer
self.category = category
self.keys = keys
# Remove characters that we can't use in Python identifiers
self.python_keys = [k.replace('[', '').replace(']', '') for k in keys]
self._empty_loop = True
def write(self, **kwargs):
if self._empty_loop:
f = self.writer.fh
f.write("#\nloop_\n")
for k in self.keys:
f.write("%s.%s\n" % (self.category, k))
self._empty_loop = False
lw = _LineWriter(self.writer, line_len=80 if self._line_wrap else 0)
for k in self.python_keys:
lw.write(kwargs.get(k, None))
self.writer.fh.write("\n")
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
if not self._empty_loop:
self.writer.fh.write("#\n")
class _Writer:
"""Base class for all writers"""
omitted = '.'
unknown = '?'
_boolmap = {False: 'NO', True: 'YES'}
def __init__(self, fh):
self.fh = fh
class CifWriter(_Writer):
"""Write information to a CIF file.
The constructor takes a single argument - a Python filelike object
to write to - and provides methods to write Python objects to
that file. Most simple Python types are supported (string, float,
bool, int). The Python bool type is mapped to CIF strings
'NO' and 'YES'. Floats are always represented with 3 decimal places
(or in scientific notation with 3 digits of precision if smaller
than 1e-3); if a different amount of precision is desired, convert
the float to a string first."""
_line_wrap = True
@classmethod
def _set_line_wrap(cls, line_wrap):
cls._line_wrap = line_wrap
def flush(self):
# noop - data is written as it is encountered
pass
def start_block(self, name):
"""Start a new data block in the file with the given name."""
self.fh.write('data_%s\n' % name)
def end_block(self):
# noop - mmCIF has no end-of-block indicator
pass
def category(self, category):
"""Return a context manager to write a CIF category.
A CIF category is a simple list of key:value pairs.
:param str category: the name of the category
(e.g. "_struct_conf_type").
:return: an object with a single method `write` which takes
keyword arguments.
For example::
with writer.category("_struct_conf_type") as l:
l.write(id='HELX_P', criteria=writer.unknown)
"""
return _CifCategoryWriter(self, category)
def loop(self, category, keys):
"""Return a context manager to write a CIF loop.
:param str category: the name of the category
(e.g. "_struct_conf")
:param list keys: the field keys in that category
:return: an object with a single method `write` which takes
keyword arguments; this can be called any number of
times to add entries to the loop. Any field keys in `keys`
that are not provided as arguments to `write`, or values
that are the Python value `None`, will get the CIF
omitted value ('.'), while arguments to `write` that
are not present in `keys` will be ignored.
For example::
with writer.loop("_struct_conf", ["id", "conf_type_id"]) as l:
for i in range(5):
l.write(id='HELX_P1%d' % i, conf_type_id='HELX_P')
"""
return _CifLoopWriter(self, category, keys, line_wrap=self._line_wrap)
def write_comment(self, comment):
"""Write a simple comment to the CIF file.
The comment will be wrapped if necessary for readability.
See :meth:`_set_line_wrap`."""
if self._line_wrap:
for line in textwrap.wrap(comment, 78):
self.fh.write('# ' + line + '\n')
else:
self.fh.write('# ' + comment + '\n')
def _write(self, category, kwargs):
for key, val in sorted(kwargs.items(), key=operator.itemgetter(0)):
if isinstance(val, str) and '\n' in val:
self.fh.write("%s.%s" % (category, key))
_write_multiline(val, self.fh)
else:
self.fh.write("%s.%s %s\n" % (category, key,
self.omitted if val is None
else self._repr(val)))
def _repr(self, obj):
if isinstance(obj, str) and '"' not in obj \
and "'" not in obj and " " not in obj \
and len(obj) > 0 \
and not obj.startswith('_') \
and not obj.startswith('global_') \
and not obj.startswith('[') \
and obj[:5] not in ('data_', 'save_', 'loop_', 'stop_', '?', '.'):
return obj
elif isinstance(obj, float):
if abs(obj) < 1e-3:
return "%.3g" % obj
else:
return "%.3f" % obj
elif isinstance(obj, bool):
return self._boolmap[obj]
elif isinstance(obj, str):
return repr(obj)
else:
return str(obj)
# Acceptable 'whitespace' characters in CIF
_WHITESPACE = set(" \t")
class CifParserError(Exception):
"""Exception raised for invalid format mmCIF files"""
pass
class _Token:
"""A token in an mmCIF file"""
pass
class _ValueToken(_Token):
"""The value of a variable in mmCIF"""
pass
class _OmittedValueToken(_ValueToken):
"""A value that is deliberately omitted (the '.' string in mmCIF)"""
def as_mmcif(self):
return "."
class _UnknownValueToken(_ValueToken):
"""A value that is unknown (the '?' string in mmCIF)"""
def as_mmcif(self):
return "?"
class _TextValueToken(_ValueToken):
"""The value of a variable in mmCIF as a piece of text"""
__slots__ = ['txt', 'quote']
def __init__(self, txt, quote):
self.txt = txt
self.quote = quote
def as_mmcif(self):
if '\n' in self.txt or self.quote == ';':
suffix = ";\n" if self.txt.endswith('\n') else "\n;\n"
return ";" + self.txt + suffix
elif self.quote == "'":
return "'" + self.txt + "'"
elif self.quote == '"' or ' ' in self.txt:
return '"' + self.txt + '"'
else:
return self.txt
class _VariableToken(_Token):
"""A variable name, e.g. _entry.id, in mmCIF"""
__slots__ = ['category', 'keyword']
def __init__(self, val, linenum):
# mmCIF categories and keywords are case insensitive, so make
# everything lowercase
self.category, _, self.keyword = val.lower().partition('.')
if not self.category or not self.keyword:
raise CifParserError("Malformed mmCIF variable name "
"(%s) on line %d" % (val, linenum))
class _PreservingVariableToken(_VariableToken):
"""A variable name that preserves the original case of the keyword"""
__slots__ = ['category', 'keyword', 'orig_keyword']
def __init__(self, val, linenum):
super().__init__(val, linenum)
_, _, self.orig_keyword = val.partition('.')
def as_mmcif(self):
if self.orig_keyword and self.orig_keyword.lower() == self.keyword:
return self.category + '.' + self.orig_keyword
else:
return self.category + '.' + self.keyword
class _CommentToken(_Token):
"""A comment in mmCIF without the leading '#'"""
__slots__ = ['txt']
def __init__(self, txt):
self.txt = txt
def as_mmcif(self):
return "#" + self.txt
class _WhitespaceToken(_Token):
"""Space between other mmCIF tokens"""
__slots__ = ['txt']
def __init__(self, txt):
self.txt = txt
def as_mmcif(self):
return self.txt
class _EndOfLineToken(_Token):
"""End of a line in an mmCIF file"""
def as_mmcif(self):
return "\n"
class _NullToken(_Token):
"""Null token"""
def as_mmcif(self):
return ""
# Return dummy values for filters that expect a variable or value token
keyword = property(lambda self: None)
class _DataToken(_Token):
"""A data_* keyword in mmCIF, denoting a new data block"""
__slots__ = ['txt']
def __init__(self, txt):
self.txt = txt
def as_mmcif(self):
return 'data_' + self.txt
class _LoopToken(_Token):
"""A loop_ keyword in mmCIF, denoting the start of a loop construct"""
def as_mmcif(self):
return "loop_"
class _SaveToken(_Token):
"""A save_* keyword in mmCIF, denoting the start or end of a save frame"""
pass
class _Reader:
"""Base class for reading a file and extracting some or all of its data."""
def _add_category_keys(self):
"""Populate _keys for each category by inspecting its __call__
method"""
def python_to_cif(field):
# Map valid Python identifiers to mmCIF keywords
if field.startswith('tr_vector') or field.startswith('rot_matrix'):
return re.sub(r'(\d)', r'[\1]', field)
else:
return field
def fill_keys(h, s, attr, typ):
if not hasattr(h, attr):
setattr(h, attr, frozenset(
python_to_cif(k) for k, v in s.annotations.items()
if v is typ))
def check_extra(h, attr):
extra = frozenset(getattr(h, attr)) - frozenset(h._keys)
if extra:
raise ValueError("For %s, %s not in _keys: %s"
% (h, attr, ", ".join(extra)))
for h in self.category_handler.values():
s = inspect.getfullargspec(h.__call__)
if not hasattr(h, '_keys'):
h._keys = [python_to_cif(x) for x in s.args[1:]]
fill_keys(h, s, '_int_keys', int)
fill_keys(h, s, '_float_keys', float)
fill_keys(h, s, '_bool_keys', bool)
bad_keys = frozenset(k for k, v in s.annotations.items()
if v not in (int, float, str, bool))
if bad_keys:
raise ValueError("For %s, bad annotations: %s"
% (h, ", ".join(bad_keys)))
check_extra(h, '_int_keys')
check_extra(h, '_float_keys')
check_extra(h, '_bool_keys')
class _CifTokenizer:
def __init__(self, fh):
self.fh = fh
self._tokens = []
self._token_index = 0
self._linenum = 0
# Read a line from the file. Treat it as ASCII (not Unicode)
# but be tolerant of 8-bit characters by assuming latin-1 encoding
def _read_line(self):
line = self.fh.readline()
if isinstance(line, bytes):
return line.decode('latin-1')
else:
return line
def _read_multiline_token(self, first_line, ignore_multiline):
"""Read a semicolon-delimited (multiline) token"""
lines = [first_line[1:]] # Skip initial semicolon
start_linenum = self._linenum
while True:
self._linenum += 1
nextline = self._read_line()
if nextline == '':
raise CifParserError(
"End of file while reading multiline "
"string which started on line %d" % start_linenum)
elif nextline.startswith(';'):
# Strip last newline
lines[-1] = lines[-1].rstrip('\r\n')
self._tokens = [_TextValueToken("".join(lines), ';')]
return
elif not ignore_multiline:
lines.append(nextline)
def _handle_quoted_token(self, line, strlen, start_pos, quote_type):
"""Given the start of a quoted string, find the end and add a token
for it"""
quote = line[start_pos]
# Get the next quote that is followed by whitespace (or line end).
# In mmCIF a quote within a string is not considered an end quote as
# long as it is not followed by whitespace.
end = start_pos
while True:
end = line.find(quote, end + 1)
if end == -1:
raise CifParserError("%s-quoted string not terminated "
"at line %d"
% (quote_type, self._linenum))
elif end == strlen - 1 or line[end + 1] in _WHITESPACE:
# A quoted string is always a literal string, even if it is
# "?" or ".", not an unknown/omitted value
self._tokens.append(_TextValueToken(line[start_pos + 1:end],
quote))
return end + 1 # Step past the closing quote
def _skip_initial_whitespace(self, line, strlen, start_pos):
while start_pos < strlen and line[start_pos] in _WHITESPACE:
start_pos += 1
return start_pos
def _extract_line_token(self, line, strlen, start_pos):
"""Extract the next token from the given line starting at start_pos,
populating self._tokens. The new start_pos is returned."""
start_pos = self._skip_initial_whitespace(line, strlen, start_pos)
if start_pos >= strlen:
return strlen
if line[start_pos] == '"':
return self._handle_quoted_token(line, strlen, start_pos, "Double")
elif line[start_pos] == "'":
return self._handle_quoted_token(line, strlen, start_pos, "Single")
elif line[start_pos] == "#":
# Comment - discard the rest of the line
self._handle_comment(line, start_pos)
return strlen
else:
# Find end of token (whitespace or end of line)
end_pos = start_pos
while end_pos < strlen and line[end_pos] not in _WHITESPACE:
end_pos += 1
val = line[start_pos:end_pos]
if val == 'loop_':
tok = _LoopToken()
elif val.startswith('data_'):
tok = _DataToken(val[5:])
elif val.startswith('save_'):
tok = _SaveToken()
elif val.startswith('_'):
tok = self._handle_variable_token(val, self._linenum)
elif val == '.':
tok = _OmittedValueToken()
elif val == '?':
tok = _UnknownValueToken()
else:
# Note that we do no special processing for other reserved
# words (global_, save_, stop_). But the probability of
# them occurring where we expect a value is pretty small.
tok = _TextValueToken(val, None) # don't alter case of values
self._tokens.append(tok)
return end_pos
def _handle_variable_token(self, val, linenum):
return _VariableToken(val, linenum)
def _handle_comment(self, line, start_pos):
"""Potentially handle a comment that spans line[start_pos:]."""
pass
def _tokenize(self, line):
"""Break up a line into tokens, populating self._tokens"""
self._tokens = []
if line.startswith('#'):
self._handle_comment(line, 0)
return # Skip comment lines
start_pos = 0
strlen = len(line)
while start_pos < strlen:
start_pos = self._extract_line_token(line, strlen, start_pos)
def _unget_token(self):
"""Push back the last token returned by _get_token() so it can
be read again"""
self._token_index -= 1
def _get_token(self, ignore_multiline=False):
"""Get the next :class:`_Token` from an mmCIF file, or None
on end of file.
If ignore_multiline is TRUE, the string contents of any multiline
value tokens (those that are semicolon-delimited) are not stored
in memory.
"""
while len(self._tokens) <= self._token_index:
# No tokens left - read the next non-blank line in
self._linenum += 1
line = self._read_line()
if line == '': # End of file
return
if line.startswith(';'):
self._read_multiline_token(line, ignore_multiline)
else:
self._tokenize(line.rstrip('\r\n'))
self._token_index = 0
self._token_index += 1
return self._tokens[self._token_index - 1]
class _PreservingCifTokenizer(_CifTokenizer):
"""A tokenizer subclass which preserves comments, case and whitespace"""
def _tokenize(self, line):
_CifTokenizer._tokenize(self, line)
self._tokens.append(_EndOfLineToken())
def _handle_comment(self, line, start_pos):
self._tokens.append(_CommentToken(line[start_pos + 1:]))
def _handle_variable_token(self, val, linenum):
return _PreservingVariableToken(val, linenum)
def _skip_initial_whitespace(self, line, strlen, start_pos):
end_pos = start_pos
while end_pos < strlen and line[end_pos] in _WHITESPACE:
end_pos += 1
if end_pos > start_pos:
self._tokens.append(_WhitespaceToken(line[start_pos:end_pos]))
return end_pos
class _CategoryTokenGroup:
"""A group of tokens which set a single data item"""
def __init__(self, vartoken, valtoken):
self.vartoken, self.valtoken = vartoken, valtoken
def __str__(self):
return ("<_CategoryTokenGroup(%s, %s)>"
% (self.vartoken.as_mmcif(), self.valtoken.token.as_mmcif()))
def as_mmcif(self):
return self.vartoken.as_mmcif() + self.valtoken.as_mmcif() + "\n"
def __set_value(self, val):
self.valtoken.value = val
category = property(lambda self: self.vartoken.category)
keyword = property(lambda self: self.vartoken.keyword)
value = property(lambda self: self.valtoken.value, __set_value)
class _LoopHeaderTokenGroup:
"""A group of tokens that form the start of a loop_ construct"""
def __init__(self, looptoken, category, keywords, end_spacers):
self._loop, self.category = looptoken, category
self.keywords = keywords
self.end_spacers = end_spacers
def keyword_index(self, keyword):
"""Get the zero-based index of the given keyword, or ValueError"""
return [k.token.keyword for k in self.keywords].index(keyword)
def __str__(self):
return ("<_LoopHeaderTokenGroup(%s, %s)>"
% (self.category,
str([k.token.keyword for k in self.keywords])))
def as_mmcif(self):
all_tokens = [self._loop] + self.keywords + self.end_spacers
return "".join(x.as_mmcif() for x in all_tokens)
class _LoopRowTokenGroup:
"""A group of tokens that represent one row in a loop_ construct"""
def __init__(self, items):
self.items = items
def as_mmcif(self):
return "".join(x.as_mmcif() for x in self.items)
class _SpacedToken:
"""A token with zero or more leading whitespace or newline tokens"""
def __init__(self, spacers, token):
self.spacers, self.token = spacers, token
def as_mmcif(self):
return ("".join(x.as_mmcif() for x in self.spacers)
+ self.token.as_mmcif())
def __get_value(self):
if isinstance(self.token, _OmittedValueToken):
return None
elif isinstance(self.token, _UnknownValueToken):
return ihm.unknown
else:
return self.token.txt
def __set_value(self, val):
if val is None:
self.token = _OmittedValueToken()
elif val is ihm.unknown:
self.token = _UnknownValueToken()
elif isinstance(self.token, _TextValueToken):
self.token.txt = val
else:
self.token = _TextValueToken(val, quote=None)
value = property(__get_value, __set_value)
class Filter:
"""Base class for filters used by :meth:`CifTokenReader.read_file`.
Typically, a subclass such as :class:`ChangeValueFilter` is used when
reading an mmCIF file.
:param str target: the mmCIF data item this filter should act on.
It can be the full name of the data item (including category)
such as ``_entity.type``; or just the attribute or keyword name
such as ``.type_symbol`` which would match any category
(e.g. ``_atom_site.type_symbol``).
"""
def __init__(self, target):
ts = target.lower().split('.')
if len(ts) == 1 or not ts[0]:
self.category = None
elif ts[0].startswith('_'):
self.category = ts[0]
else:
self.category = '_' + ts[0]
self.keyword = ts[-1]
def _set_category_from_target(self, target):
if target.startswith('_'):
self.category = target
else:
self.category = '_' + target
self.keyword = None
def match_token_category(self, tok):
"""Return true iff the given token matches the target's category"""
return self.category is None or tok.category == self.category
def match_token_keyword(self, tok):
"""Return true iff the given token matches the target's category
and keyword"""
return self.match_token_category(tok) and tok.keyword == self.keyword
def filter_category(self, tok):
"""Filter the given category token.
:return: the original token (which may have been modified),
a replacement token, or None if the token should be
deleted.
"""
raise NotImplementedError
def filter_loop_header(self, tok):
"""Filter the given loop header token.
:return: the original token (which must not have been modified),
a replacement token, or None if the token should be
deleted. If the header token is replaced or deleted,
all of the original loop rows will also be deleted.
"""
return tok
def get_loop_filter(self, tok):
"""Given a loop header token, potentially return a handler for each
loop row token. This function is also permitted to alter the
header in place (but not replace or remove it). Keywords should
not be removed from the header (as that may confuse other filters)
but can be replaced with null tokens.
:return: a callable which will be called for each loop row token
(and acts like :meth:`filter_category`), or None if no
filtering is needed for this loop.
"""
raise NotImplementedError
class ChangeValueFilter(Filter):
"""Change any token that sets a data item to ``old`` to be ``new``.
For example, this could be used to rename certain chains, or change
all residues of a certain type.
:param str old: The existing value of the data item.
:param str new: The new value of the data item.
See :class:`Filter` for a description of the ``target`` parameter.
"""
def __init__(self, target, old, new):
super().__init__(target)
self.old, self.new = old, new
def filter_category(self, tok):
if self.match_token_keyword(tok) and tok.value == self.old:
tok.value = self.new
return tok
def get_loop_filter(self, tok):
if self.match_token_category(tok):
try:
keyword_index = tok.keyword_index(self.keyword)
except ValueError:
return
def loop_filter(t):
if t.items[keyword_index].value == self.old:
t.items[keyword_index].value = self.new
return t
return loop_filter
class ChangeFuncValueFilter(Filter):
"""Change any token that sets a data item to x to be f(x).
For example, this could be used to perform a search and replace on
a string, or match against a regex.
:param callable func: A function that is given the existing value
of the data item, the category name (e.g. ``_atom_site``),
and the keyword name (e.g. ``auth_seq_id``), and should return
the new value of the data item (perhaps unchanged).
See :class:`Filter` for a description of the ``target`` parameter.
"""
def __init__(self, target, func):
super().__init__(target)
self.func = func
def filter_category(self, tok):
if self.match_token_keyword(tok):
tok.value = self.func(tok.value, tok.category, tok.keyword)
return tok
def get_loop_filter(self, tok):
if self.match_token_category(tok):
try:
keyword_index = tok.keyword_index(self.keyword)
except ValueError:
return
def loop_filter(t):
item = t.items[keyword_index]
item.value = self.func(item.value, tok.category, self.keyword)
return t
return loop_filter
class RemoveItemFilter(Filter):
"""Remove any token from the file that sets the given data item.
See :class:`Filter` for a description of the ``target`` parameter.
"""
def filter_category(self, tok):
if self.match_token_keyword(tok):
return None
else:
return tok
def get_loop_filter(self, tok):
if self.match_token_category(tok):
try:
keyword_index = tok.keyword_index(self.keyword)
except ValueError:
return
# Remove keyword from loop header
tok.keywords[keyword_index].spacers = []
tok.keywords[keyword_index].token = _NullToken()
def loop_filter(t):
# Remove item from loop row (we don't want to pop from
# t.items as other filters may reference later indexes)
spc = t.items[keyword_index].spacers
if len(spc) > 0 and isinstance(spc[0], _EndOfLineToken):
del spc[1:]
else:
t.items[keyword_index].spacers = []
t.items[keyword_index].token = _NullToken()
return t
return loop_filter
class ChangeKeywordFilter(Filter):
"""Change the keyword in any applicable token to be ``new``.
:param str new: The new keyword.
See :class:`Filter` for a description of the ``target`` parameter.
"""
def __init__(self, target, new):
super().__init__(target)
self.new = new
def filter_category(self, tok):
if self.match_token_keyword(tok):
tok.vartoken.keyword = self.new
return tok
def get_loop_filter(self, tok):
if self.match_token_category(tok):
try:
keyword_index = tok.keyword_index(self.keyword)
except ValueError:
return
tok.keywords[keyword_index].token.keyword = self.new
class ReplaceCategoryFilter(Filter):
"""Replace any token from the file that sets the given category.
This can also be used to completely remove a category if no
replacement is given.
:param str target: the mmCIF category name this filter should act on,
such as ``_entity``.
:param str raw_cif: if given, text in mmCIF format which should replace
the first instance of the category.
:param dumper: if given, a dumper object that should generate mmCIF
output to replace the first instance of the category.
:type dumper: :class:`ihm.dumper.Dumper`
:param system: the System that the given dumper will work on.
:type system: :class:`ihm.System`
"""
class _RawCifToken(_Token):
__slots__ = ['txt']
category = keyword = None
def __init__(self, txt):
self.txt = txt
def as_mmcif(self):
return self.txt
def __init__(self, target, raw_cif=None, dumper=None, system=None):
self._set_category_from_target(target)
self.raw_cif = raw_cif
self.dumper = dumper
self.system = system
#: The number of times the category was found in the mmCIF file
self.num_matches = 0
def _get_replacement_token(self):
if self.num_matches > 1:
return None
if self.raw_cif:
return self._RawCifToken(self.raw_cif)
elif self.dumper and self.system:
fh = StringIO()
writer = CifWriter(fh)
self.dumper.finalize(self.system)
self.dumper.dump(self.system, writer)
return self._RawCifToken(fh.getvalue())
def filter_category(self, tok):
if self.match_token_category(tok):
self.num_matches += 1
return self._get_replacement_token()
else:
return tok
def filter_loop_header(self, tok):
return self.filter_category(tok)
def get_loop_filter(self, tok):
return None
class CifTokenReader(_PreservingCifTokenizer):
"""Read an mmCIF file and break it into tokens.
Unlike :class:`CifReader` which extracts selected data from an mmCIF
file, this class operates on the file at a lower level, splitting
it into tokens, and preserving data such as comments and whitespace.
This can be used for various housekeeping tasks directly on an mmCIF
file, such as changing chain IDs or renaming categories or data items.
Use :meth:`read_file` to actually read the file.
:param file fh: Open handle to the mmCIF file
"""
def __init__(self, fh):
super().__init__(fh)
def read_file(self, filters=None):
"""Read the file and yield tokens and/or token groups. The exact type
of the tokens is subject to change and is not currently documented;
however, each token or group object has an ``as_mmcif`` method
which returns the corresponding text in mmCIF format. Thus, the
file can be reconstructed by concatenating the result of
``as_mmcif`` for all tokens.
:exc:`CifParserError` will be raised if the file cannot be parsed.
:param filters: if a list of :class:`Filter` objects is provided,
the read tokens will be modified or removed by each of these
filters before being returned.
:type filters: sequence of :class:`Filter`
:return: tokens and/or token groups.
"""
if filters is None:
return self._read_file_internal()
else:
return self._read_file_with_filters(filters)
def _read_file_with_filters(self, filters):
loop_filters = None
remove_all_loop_rows = False
for tok in self._read_file_internal():
if isinstance(tok, _CategoryTokenGroup):
tok = self._filter_category(tok, filters)
elif isinstance(tok, ihm.format._LoopHeaderTokenGroup):
new_tok = self._filter_loop_header(tok, filters)
if new_tok is not tok:
tok = new_tok
remove_all_loop_rows = True
else:
remove_all_loop_rows = False
loop_filters = [f.get_loop_filter(tok) for f in filters]
loop_filters = [f for f in loop_filters if f is not None]
# Did filters remove all keywords from the loop?
if all(isinstance(k.token, _NullToken)
for k in tok.keywords):
tok = None
remove_all_loop_rows = True
elif isinstance(tok, ihm.format._LoopRowTokenGroup):
if remove_all_loop_rows:
tok = None
elif loop_filters:
tok = self._filter_loop(tok, loop_filters)
if tok is not None:
yield tok
def _filter_category(self, tok, filters):
for f in filters:
tok = f.filter_category(tok)
if tok is None:
return
return tok
def _filter_loop_header(self, tok, filters):
orig_tok = tok
for f in filters:
tok = f.filter_loop_header(tok)
if tok is not orig_tok:
break
return tok
def _filter_loop(self, tok, filters):
for f in filters:
tok = f(tok)
if tok is None:
return
return tok
def _read_file_internal(self):
while True:
token = self._get_token()
if token is None:
break
if isinstance(token, _VariableToken):
yield self._read_value(token)
elif isinstance(token, _LoopToken):
for tok in self._read_loop(token):
yield tok
# Did we hit the end of the file?
if self._token_index < 0:
break
else:
yield token
def _get_spaced_token(self):
"""Get the next token plus any number of leading space/EOL tokens"""
spacers = []
while True:
token = self._get_token()
if isinstance(token, (_EndOfLineToken, _WhitespaceToken)):
spacers.append(token)
else:
return _SpacedToken(spacers, token)
def _read_value(self, vartoken):
"""Read a line that sets a single value, e.g. "_entry.id 1YTI"""
spval = self._get_spaced_token()
if not isinstance(spval.token, _ValueToken):
raise CifParserError(
"No valid value found for %s.%s on line %d"
% (vartoken.category, vartoken.keyword, self._linenum))
eoltok = self._get_token()
if not isinstance(eoltok, _EndOfLineToken):
raise CifParserError(
"No end of line after %s.%s on line %d"
% (vartoken.category, vartoken.keyword, self._linenum))
return _CategoryTokenGroup(vartoken, spval)
def _read_loop(self, looptoken):
"""Handle a loop_ construct"""
header = self._read_loop_header(looptoken)
# Record original number of keywords, in case the header token
# is filtered
num_keywords = len(header.keywords)
yield header
for line in self._read_loop_data(num_keywords):
yield line
def _read_loop_header(self, looptoken):
"""Read the set of keywords for a loop_ construct"""
category = None
keywords = []
while True:
spt = self._get_spaced_token()
if isinstance(spt.token, _VariableToken):
if category is None:
category = spt.token.category
elif category != spt.token.category:
raise CifParserError(
"mmCIF files cannot contain multiple "
"categories within a single loop at line %d"
% self._linenum)
keywords.append(spt)
elif isinstance(spt.token, _ValueToken):
# OK, end of keywords; proceed on to values
self._unget_token()
return _LoopHeaderTokenGroup(looptoken, category, keywords,
spt.spacers)
else:
raise CifParserError("Was expecting a keyword or value for "
"loop at line %d" % self._linenum)
def _read_loop_data(self, num_keywords):
"""Read the data for a loop_ construct"""
while True:
items = []
for i in range(num_keywords):
spt = self._get_spaced_token()
if isinstance(spt.token, _ValueToken):
items.append(spt)
elif i == 0:
# OK, end of the loop
for s in spt.spacers:
yield s
if spt.token is not None:
self._unget_token()
return
else:
raise CifParserError(
"Wrong number of data values in loop "
"(should be an exact multiple of the number "
"of keys) at line %d" % self._linenum)
yield _LoopRowTokenGroup(items)
def _int_type_handler(txt, linenum):
try:
return int(txt)
except ValueError as exc:
raise ValueError("%s at line %d" % (str(exc), linenum))
def _float_type_handler(txt, linenum):
try:
return float(txt)
except ValueError as exc:
raise ValueError("%s at line %d" % (str(exc), linenum))
class _BoolTypeHandler:
_bool_map = {'YES': True, 'NO': False}
def __init__(self, omitted):
self.omitted = omitted
def __call__(self, txt, linenum):
return self._bool_map.get(txt.upper(), self.omitted)
def _str_type_handler(txt, linenum):
return txt
class CifReader(_Reader, _CifTokenizer):
"""Class to read an mmCIF file and extract some or all of its data.
Use :meth:`read_file` to actually read the file.
See also :class:`CifTokenReader` for a class that operates on the
lower-level structure of an mmCIF file, preserving data such as
comments and whitespace.
:param file fh: Open handle to the mmCIF file
:param dict category_handler: A dict to handle data
extracted from the file. Keys are category names
(e.g. "_entry") and values are objects that have a `__call__`
method and `not_in_file`, `omitted`, and `unknown` attributes.
The names of the arguments to this `__call__` method
are mmCIF keywords that are extracted from the file (for the
keywords tr_vector[N] and rot_matrix[N][M] simply omit the [
and ] characters, since these are not valid for Python
identifiers). The object will be called with the data from
the file as a set of strings, or `not_in_file`, `omitted` or
`unknown` for any keyword that is not present in the file,
the mmCIF omitted value (.), or mmCIF unknown value (?)
respectively. (mmCIF keywords are case insensitive, so this
class always treats them as lowercase regardless of the
file contents.)
:param unknown_category_handler: A callable (or `None`) that is called
for each category in the file that isn't handled; it is given
two arguments: the name of the category, and the line in the
file at which the category was encountered (if known, otherwise
None).
:param unknown_keyword_handler: A callable (or `None`) that is called
for each keyword in the file that isn't handled (within a
category that is handled); it is given three arguments:
the names of the category and keyword, and the line in the
file at which the keyword was encountered (if known,
otherwise None).
"""
def __init__(self, fh, category_handler, unknown_category_handler=None,
unknown_keyword_handler=None):
if _format is not None:
c_file = _format.ihm_file_new_from_python(fh, False)
self._c_format = _format.ihm_reader_new(c_file, False)
self.category_handler = category_handler
self.unknown_category_handler = unknown_category_handler
self.unknown_keyword_handler = unknown_keyword_handler
self._category_data = {}
_CifTokenizer.__init__(self, fh)
def __del__(self):
if hasattr(self, '_c_format'):
_format.ihm_reader_free(self._c_format)
def _read_value(self, vartoken):
"""Read a line that sets a single value, e.g. "_entry.id 1YTI"""
# Only read the value if we're interested in this category and key
if vartoken.category in self.category_handler:
if vartoken.keyword \
in self.category_handler[vartoken.category]._keys:
valtoken = self._get_token()
if isinstance(valtoken, _ValueToken):
ch = self.category_handler[vartoken.category]
if vartoken.category not in self._category_data:
self._category_data[vartoken.category] = {}
if isinstance(valtoken, _OmittedValueToken):
val = ch.omitted
elif isinstance(valtoken, _UnknownValueToken):
val = ch.unknown
else:
tc = self._get_type_handler(ch, vartoken.keyword)
val = tc(valtoken.txt, self._linenum)
self._category_data[vartoken.category][vartoken.keyword] \
= val
else:
raise CifParserError(
"No valid value found for %s.%s on line %d"
% (vartoken.category, vartoken.keyword, self._linenum))
elif self.unknown_keyword_handler is not None:
self.unknown_keyword_handler(vartoken.category,
vartoken.keyword, self._linenum)
elif self.unknown_category_handler is not None:
self.unknown_category_handler(vartoken.category, self._linenum)
def _read_loop_keywords(self):
"""Read the set of keywords for a loop_ construct"""
category = None
keywords = []
first_line = None
keyword_lines = []
while True:
token = self._get_token()
if isinstance(token, _VariableToken):
if category is None:
category = token.category
first_line = self._linenum
elif category != token.category:
raise CifParserError(
"mmCIF files cannot contain multiple "
"categories within a single loop at line %d"
% self._linenum)
keywords.append(token.keyword)
keyword_lines.append(self._linenum)
elif isinstance(token, _ValueToken):
# OK, end of keywords; proceed on to values
self._unget_token()
return category, keywords, keyword_lines, first_line
else:
raise CifParserError("Was expecting a keyword or value for "
"loop at line %d" % self._linenum)
def _read_loop_data(self, handler, num_wanted_keys, keyword_indices,
type_handlers):
"""Read the data for a loop_ construct"""
data = [handler.not_in_file] * num_wanted_keys
while True:
for i, index in enumerate(keyword_indices):
token = self._get_token()
if isinstance(token, _ValueToken):
if index >= 0:
if isinstance(token, _OmittedValueToken):
data[index] = handler.omitted
elif isinstance(token, _UnknownValueToken):
data[index] = handler.unknown
else:
data[index] = type_handlers[index](token.txt,
self._linenum)
elif i == 0:
# OK, end of the loop
self._unget_token()
return
else:
raise CifParserError(
"Wrong number of data values in loop "
"(should be an exact multiple of the number "
"of keys) at line %d" % self._linenum)
handler(*data)
def _get_type_handler(self, category_handler, keyword):
"""Return a function that converts keyword string into desired type"""
if keyword in category_handler._int_keys:
return _int_type_handler
elif keyword in category_handler._bool_keys:
return _BoolTypeHandler(category_handler.omitted)
elif keyword in category_handler._float_keys:
return _float_type_handler
else:
return _str_type_handler
def _read_loop(self):
"""Handle a loop_ construct"""
(category, keywords,
keyword_lines, first_line) = self._read_loop_keywords()
# Skip data if we don't have a handler for it
if category in self.category_handler:
ch = self.category_handler[category]
type_handlers = [self._get_type_handler(ch, k) for k in ch._keys]
wanted_key_index = {}
for i, k in enumerate(ch._keys):
wanted_key_index[k] = i
indices = [wanted_key_index.get(k, -1) for k in keywords]
if self.unknown_keyword_handler is not None:
for k, i, line in zip(keywords, indices, keyword_lines):
if i == -1:
self.unknown_keyword_handler(category, k, line)
self._read_loop_data(ch, len(ch._keys), indices, type_handlers)
elif self.unknown_category_handler is not None:
self.unknown_category_handler(category, first_line)
def read_file(self):
"""Read the file and extract data.
Category handlers will be called as data becomes available -
for ``loop_`` constructs, this will be once for each row in the
loop; for categories (e.g. ``_entry.id model``), this will be once
at the very end of the file.
If the C-accelerated _format module is available, then it is used
instead of the (much slower) Python tokenizer.
:exc:`CifParserError` will be raised if the file cannot be parsed.
:return: True iff more data blocks are available to be read.
"""
self._add_category_keys()
if hasattr(self, '_c_format'):
return self._read_file_c()
def call_all_categories():
for cat, data in self._category_data.items():
ch = self.category_handler[cat]
ch(*[data.get(k, ch.not_in_file) for k in ch._keys])
# Clear category data for next call to read_file()
self._category_data = {}
ndata = 0
in_save = False
while True:
token = self._get_token(ignore_multiline=True)
if token is None:
break
if isinstance(token, _VariableToken):
self._read_value(token)
elif isinstance(token, _DataToken):
ndata += 1
# Only read the first data block
if ndata > 1:
# Allow reading the next data block
self._unget_token()
break
elif isinstance(token, _LoopToken):
self._read_loop()
# Did we hit the end of the file?
if self._token_index < 0:
break
elif isinstance(token, _SaveToken):
in_save = not in_save
if not in_save:
call_all_categories()
for handler in self.category_handler.values():
handler.end_save_frame()
call_all_categories()
return ndata > 1
def _read_file_c(self):
"""Read the file using the C parser"""
_format.ihm_reader_remove_all_categories(self._c_format)
for category, handler in self.category_handler.items():
func = getattr(handler, '_add_c_handler', None) \
or _format.add_category_handler
func(self._c_format, category, handler._keys,
frozenset(handler._int_keys), frozenset(handler._float_keys),
frozenset(handler._bool_keys), handler)
if self.unknown_category_handler is not None:
_format.add_unknown_category_handler(self._c_format,
self.unknown_category_handler)
if self.unknown_keyword_handler is not None:
_format.add_unknown_keyword_handler(self._c_format,
self.unknown_keyword_handler)
try:
ret_ok, more_data = _format.ihm_read_file(self._c_format)
except _format.FileFormatError as exc:
# Convert to the same exception used by the Python code
raise CifParserError(str(exc))
return more_data
python-ihm-2.7/ihm/format_bcif.py 0000664 0000000 0000000 00000053325 15035733372 0017100 0 ustar 00root root 0000000 0000000 """Utility classes to handle BinaryCIF format.
See https://github.com/molstar/BinaryCIF for a description of the
BinaryCIF file format.
This module provides classes to read in and write out BinaryCIF files. It is
only concerned with handling syntactically correct BinaryCIF -
it does not know the set of tables or the mapping to ihm objects. For that,
see :mod:`ihm.reader`.
"""
import struct
import sys
import inspect
import ihm.format
import ihm
try:
from . import _format
except ImportError:
_format = None
# ByteArray types
_Int8 = 1
_Int16 = 2
_Int32 = 3
_Uint8 = 4
_Uint16 = 5
_Uint32 = 6
_Float32 = 32
_Float64 = 33
class _Decoder:
"""Base class for all decoders."""
_kind = None # Encoder kind (in BinaryCIF specification)
def __call__(self, enc, data):
"""Given encoding information `enc` and raw data `data`, return
decoded data. This can be a generator."""
pass
class _StringArrayDecoder(_Decoder):
"""Decode an array of strings stored as a concatenation of all unique
strings, an array of offsets describing substrings, and indices into
the offset array."""
_kind = 'StringArray'
def __call__(self, enc, data):
offsets = list(_decode(enc['offsets'], enc['offsetEncoding']))
indices = _decode(data, enc['dataEncoding'])
substr = []
string_data = enc['stringData']
for i in range(0, len(offsets) - 1):
substr.append(string_data[offsets[i]:offsets[i + 1]])
# todo: return a listlike class instead?
for i in indices:
yield None if i < 0 else substr[i]
class _ByteArrayDecoder(_Decoder):
"""Decode an array of numbers of specified type stored as raw bytes"""
_kind = 'ByteArray'
# Map integer/float type to struct format string
_struct_map = {
_Int8: 'b',
_Int16: 'h',
_Int32: 'i',
_Uint8: 'B',
_Uint16: 'H',
_Uint32: 'I',
_Float32: 'f',
_Float64: 'd',
}
def __call__(self, enc, data):
fmt = self._struct_map[enc['type']]
sz = len(data) // struct.calcsize(fmt)
# All data is encoded little-endian in bcif
return struct.unpack('<' + fmt * sz, data)
class _IntegerPackingDecoder(_Decoder):
"""Decode a (32-bit) integer array stored as 8- or 16-bit values."""
_kind = 'IntegerPacking'
def _unsigned_decode(self, enc, data):
limit = 0xFF if enc['byteCount'] == 1 else 0xFFFF
i = 0
while i < len(data):
value = 0
t = data[i]
while t == limit:
value += t
i += 1
t = data[i]
yield value + t
i += 1
def _signed_decode(self, enc, data):
upper_limit = 0x7F if enc['byteCount'] == 1 else 0x7FFF
lower_limit = -upper_limit - 1
i = 0
while i < len(data):
value = 0
t = data[i]
while t == upper_limit or t == lower_limit:
value += t
i += 1
t = data[i]
yield value + t
i += 1
def __call__(self, enc, data):
if enc['isUnsigned']:
return self._unsigned_decode(enc, data)
else:
return self._signed_decode(enc, data)
class _DeltaDecoder(_Decoder):
"""Decode an integer array stored as an array of consecutive
differences."""
_kind = 'Delta'
def __call__(self, enc, data):
val = enc['origin']
for d in data:
val += d
yield val
class _RunLengthDecoder(_Decoder):
"""Decode an integer array stored as pairs of (value, number of repeats)"""
_kind = 'RunLength'
def __call__(self, enc, data):
data = list(data)
for i in range(0, len(data), 2):
for j in range(data[i + 1]):
yield data[i]
class _FixedPointDecoder(_Decoder):
"""Decode a floating point array stored as integers multiplied by
a given factor."""
_kind = 'FixedPoint'
def __call__(self, enc, data):
factor = float(enc['factor'])
for d in data:
yield float(d) / factor
class _IntervalQuantizationDecoder(_Decoder):
"""Decode a floating point array stored as integers quantized within
a given interval into a number of discrete steps."""
_kind = 'IntervalQuantization'
def __call__(self, enc, data):
minval = float(enc['min'])
maxval = float(enc['max'])
numsteps = int(enc['numSteps'])
delta = (maxval - minval) / (numsteps - 1)
for d in data:
yield minval + delta * d
def _get_decoder_map():
m = {}
for d in [x[1] for x in inspect.getmembers(sys.modules[__name__],
inspect.isclass)
if issubclass(x[1], _Decoder)]:
m[d._kind] = d()
return m
# Mapping from BinaryCIF encoding names to _Decoder objects
_decoder_map = _get_decoder_map()
def _decode(data, encoding):
"""Decode the data using the list of encodings, and return it."""
for enc in reversed(encoding):
data = _decoder_map[enc['kind']](enc, data)
return data
class _BoolTypeHandler:
_bool_map = {'YES': True, 'NO': False}
def __init__(self, omitted):
self.omitted = omitted
def __call__(self, txt):
return self._bool_map.get(str(txt).upper(), self.omitted)
class BinaryCifReader(ihm.format._Reader):
"""Class to read a BinaryCIF file and extract some or all of its data.
Use :meth:`read_file` to actually read the file.
See :class:`ihm.format.CifReader` for a description of the parameters.
"""
def __init__(self, fh, category_handler, unknown_category_handler=None,
unknown_keyword_handler=None):
if _format is not None:
c_file = _format.ihm_file_new_from_python(fh, True)
self._c_format = _format.ihm_reader_new(c_file, True)
self.category_handler = category_handler
self.unknown_category_handler = unknown_category_handler
self.unknown_keyword_handler = unknown_keyword_handler
self.fh = fh
self._file_blocks = None
def __del__(self):
if hasattr(self, '_c_format'):
_format.ihm_reader_free(self._c_format)
def read_file(self):
"""Read the file and extract data.
If the C-accelerated _format module is available, then it is used
instead of the (much slower) Python reader.
:return: True iff more data blocks are available to be read.
"""
self._add_category_keys()
if hasattr(self, '_c_format'):
return self._read_file_c()
if self._file_blocks is None:
self._file_blocks = self._read_msgpack()
if len(self._file_blocks) > 0:
for category in self._file_blocks[0]['categories']:
cat_name = category['name'].lower()
handler = self.category_handler.get(cat_name, None)
if handler:
self._handle_category(handler, category, cat_name)
elif self.unknown_category_handler is not None:
self.unknown_category_handler(cat_name, 0)
del self._file_blocks[0]
return len(self._file_blocks) > 0
def _read_file_c(self):
"""Read the file using the C parser"""
_format.ihm_reader_remove_all_categories(self._c_format)
for category, handler in self.category_handler.items():
func = getattr(handler, '_add_c_handler', None) \
or _format.add_category_handler
func(self._c_format, category, handler._keys,
frozenset(handler._int_keys), frozenset(handler._float_keys),
frozenset(handler._bool_keys), handler)
if self.unknown_category_handler is not None:
_format.add_unknown_category_handler(self._c_format,
self.unknown_category_handler)
if self.unknown_keyword_handler is not None:
_format.add_unknown_keyword_handler(self._c_format,
self.unknown_keyword_handler)
ret_ok, more_data = _format.ihm_read_file(self._c_format)
return more_data
def _get_type_handler(self, category_handler, keyword):
"""Return a function that converts keyword string into desired type"""
if keyword in category_handler._int_keys:
return int
elif keyword in category_handler._bool_keys:
return _BoolTypeHandler(category_handler.omitted)
elif keyword in category_handler._float_keys:
return float
else:
return str
def _handle_category(self, handler, category, cat_name):
"""Extract data for the given category"""
num_cols = len(handler._keys)
type_handlers = [self._get_type_handler(handler, k)
for k in handler._keys]
# Read all data for the category;
# category_data[col][row]
category_data = [None] * num_cols
num_rows = 0
# Only read columns that match a handler key (case insensitive)
key_index = {}
for i, key in enumerate(handler._keys):
key_index[key] = i
column_indices = []
for c in category['columns']:
key_name = c['name'].lower()
ki = key_index.get(key_name, None)
if ki is not None:
column_indices.append(ki)
r = self._read_column(c, handler, type_handlers[ki])
num_rows = len(r)
category_data[ki] = r
elif self.unknown_keyword_handler is not None:
self.unknown_keyword_handler(cat_name, key_name, 0)
row_data = [handler.not_in_file] * num_cols
for row in range(num_rows):
# Only update data for columns that we read (others will
# remain None)
for i in column_indices:
row_data[i] = category_data[i][row]
handler(*row_data)
def _read_column(self, column, handler, type_handler):
"""Read a single category column data"""
data = _decode(column['data']['data'], column['data']['encoding'])
# Handle 'unknown' values (mask==2) or 'omitted' (mask==1)
if column['mask'] is not None:
mask = _decode(column['mask']['data'],
column['mask']['encoding'])
return [handler.unknown if m == 2 else handler.omitted if m == 1
else type_handler(d) for d, m in zip(data, mask)]
else:
return [type_handler(d) for d in data]
def _read_msgpack(self):
"""Read the msgpack data from the file and return data blocks"""
import msgpack
d = msgpack.unpack(self.fh, raw=False)
return d['dataBlocks']
class _CategoryWriter:
def __init__(self, writer, category):
self.writer = writer
self.category = category
self._data = {}
def write(self, **kwargs):
self._data.update(kwargs)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
for k in self._data:
self._data[k] = [self._data[k]]
self.writer._add_category(self.category, self._data)
class _LoopWriter:
def __init__(self, writer, category, keys):
self.writer = writer
self.category = category
self.keys = keys
# Remove characters that we can't use in Python identifiers
self.python_keys = [k.replace('[', '').replace(']', '') for k in keys]
self._values = []
for i in range(len(keys)):
self._values.append([])
def write(self, **kwargs):
for i, k in enumerate(self.python_keys):
val = kwargs.get(k, None)
self._values[i].append(val)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
data = {}
for key, value in zip(self.keys, self._values):
data[key] = value
self.writer._add_category(self.category, data)
class EncodeError(Exception):
"""Exception raised if input data cannot be encoded"""
pass
class _Encoder:
"""Base class for all encoders"""
_kind = None # Encoder kind (in BinaryCIF specification)
def __call__(self, data):
"""Given raw data `data`, return encoded data and a BinaryCIF
encoder information dict."""
pass
def _get_int_float_type(data):
"""Determine the int/float type of the given data"""
# If anything is float, treat everything as single-precision float
for d in data:
if isinstance(d, float):
return _Float32
# Otherwise, figure out the most appropriate int type
min_val = min(data)
max_val = max(data)
if min_val >= 0:
# Unsigned types
for typ, limit in [(_Uint8, 0xFF), (_Uint16, 0xFFFF),
(_Uint32, 0xFFFFFFFF)]:
if max_val <= limit:
return typ
else:
# Signed types
for typ, up_limit in [(_Int8, 0x7F), (_Int16, 0x7FFF),
(_Int32, 0x7FFFFFFF)]:
low_limit = -up_limit - 1
if min_val >= low_limit and max_val <= up_limit:
return typ
raise TypeError("Cannot represent data as BinaryCIF")
class _ByteArrayEncoder(_Encoder):
# Map integer/float type to struct format string
_struct_map = _ByteArrayDecoder._struct_map
def __call__(self, data):
ba_type = _get_int_float_type(data)
encdict = {'kind': 'ByteArray', 'type': ba_type}
fmt = self._struct_map[ba_type]
# All data is encoded little-endian in bcif
return struct.pack('<' + fmt * len(data), *data), encdict
class _DeltaEncoder(_Encoder):
"""Encode an integer array as an array of consecutive differences."""
def __call__(self, data):
# Don't try to compress small arrays; the overhead of the compression
# probably will exceed the space savings
if len(data) <= 40:
return data, None
data_type = _get_int_float_type(data)
encdict = {'kind': 'Delta', 'origin': data[0],
'srcType': data_type}
encdata = [0] + [data[i] - data[i - 1] for i in range(1, len(data))]
return encdata, encdict
class _RunLengthEncoder(_Encoder):
"""Encode an integer array as pairs of (value, number of repeats)"""
def __call__(self, data):
# Don't try to compress small arrays; the overhead of the compression
# probably will exceed the space savings
if len(data) <= 40:
return data, None
data_type = _get_int_float_type(data)
encdict = {'kind': 'RunLength',
'srcType': data_type, 'srcSize': len(data)}
encdata = []
val = None
for d in data:
if d != val:
if val is not None:
encdata.extend((val, repeat)) # noqa: F821
val = d
repeat = 1
else:
repeat += 1
encdata.extend((val, repeat))
# If we didn't save any space, return the original unchanged
if len(encdata) > len(data):
return data, None
else:
return encdata, encdict
def _encode(data, encoders):
"""Encode data using the given encoder objects. Return the encoded data
and a list of BinaryCIF encoding dicts."""
encdicts = []
for enc in encoders:
data, encdict = enc(data)
if encdict is not None:
encdicts.append(encdict)
return data, encdicts
class _MaskedEncoder:
"""Base class for all encoders that handle potentially masked data"""
def __call__(self, data, mask):
"""Given raw data `data`, and `mask`, return encoded data"""
pass
class _StringArrayMaskedEncoder(_MaskedEncoder):
_int_encoders = [_DeltaEncoder(), _RunLengthEncoder(),
_ByteArrayEncoder()]
def __call__(self, data, mask):
seen_substrs = {} # keys are substrings, values indices
sorted_substrs = []
indices = []
for i, reals in enumerate(data):
if mask is not None and mask[i]:
indices.append(-1)
else:
s = reals
# Map bool to YES/NO strings
if isinstance(s, bool):
s = ihm.format._Writer._boolmap[s]
else:
s = str(s) # coerce non-str data to str
if s not in seen_substrs:
seen_substrs[s] = len(seen_substrs)
sorted_substrs.append(s)
indices.append(seen_substrs[s])
offsets = [0]
total_len = 0
for s in sorted_substrs:
total_len += len(s)
offsets.append(total_len)
data_offsets, enc_offsets = _encode(offsets, self._int_encoders)
data_indices, enc_indices = _encode(indices, self._int_encoders)
enc_dict = {'kind': 'StringArray',
'dataEncoding': enc_indices,
'stringData': ''.join(sorted_substrs),
'offsetEncoding': enc_offsets,
'offsets': data_offsets}
return data_indices, [enc_dict]
class _IntArrayMaskedEncoder(_MaskedEncoder):
_encoders = [_DeltaEncoder(), _RunLengthEncoder(), _ByteArrayEncoder()]
def __call__(self, data, mask):
if mask:
masked_data = [-1 if m else d for m, d in zip(mask, data)]
else:
masked_data = data
encdata, encoders = _encode(masked_data, self._encoders)
return encdata, encoders
class _FloatArrayMaskedEncoder(_MaskedEncoder):
_encoders = [_ByteArrayEncoder()]
def __call__(self, data, mask):
if mask:
masked_data = [0. if m else d for m, d in zip(mask, data)]
else:
masked_data = data
encdata, encoders = _encode(masked_data, self._encoders)
return encdata, encoders
def _get_mask_and_type(data):
"""Detect missing/omitted values in `data` and determine the type of
the remaining values (str, int, float)"""
mask = None
seen_types = set()
for i, val in enumerate(data):
if val is None or val == ihm.unknown:
if mask is None:
mask = [0] * len(data)
mask[i] = 1 if val is None else 2
else:
seen_types.add(type(val))
# If a mix of types, coerce to that of the highest precedence
# (mixed int/float can be represented as float; mix int/float/str can
# be represented as str; bool is represented as str)
if not seen_types or bool in seen_types or str in seen_types:
return mask, str
elif float in seen_types:
return mask, float
elif int in seen_types:
return mask, int
for t in seen_types:
# Handle numpy float types like Python float
# todo: this is a hack
if 'numpy.float' in str(t):
return mask, float
raise ValueError("Cannot determine type of data %s" % data)
class BinaryCifWriter(ihm.format._Writer):
"""Write information to a BinaryCIF file. See :class:`ihm.format.CifWriter`
for more information. The constructor takes a single argument - a Python
filelike object, open for writing in binary mode."""
_mask_encoders = [_DeltaEncoder(), _RunLengthEncoder(),
_ByteArrayEncoder()]
def __init__(self, fh):
super().__init__(fh)
self._blocks = []
self._masked_encoder = {str: _StringArrayMaskedEncoder(),
int: _IntArrayMaskedEncoder(),
float: _FloatArrayMaskedEncoder()}
def category(self, category):
"""See :meth:`ihm.format.CifWriter.category`."""
return _CategoryWriter(self, category)
def loop(self, category, keys):
"""See :meth:`ihm.format.CifWriter.loop`."""
return _LoopWriter(self, category, keys)
def write_comment(self, comment):
"""See :meth:`ihm.format.CifWriter.write_comment`.
.. note::
BinaryCIF does not support comments, so this is a noop.
"""
pass
def _encode_data(self, data):
mask, typ = _get_mask_and_type(data)
enc = self._masked_encoder[typ]
encdata, encs = enc(data, mask)
if mask:
data_mask, enc_mask = _encode(mask, self._mask_encoders)
mask = {'data': data_mask, 'encoding': enc_mask}
return mask, encdata, encs
def _encode_column(self, name, data):
mask, encdata, encs = self._encode_data(data)
return {'name': name, 'mask': mask,
'data': {'data': encdata, 'encoding': encs}}
def start_block(self, name):
"""See :meth:`ihm.format.CifWriter.start_block`."""
block = {'header': name, 'categories': []}
self._categories = block['categories']
self._blocks.append(block)
def end_block(self):
# noop - end-of-block is handled by start_block() and flush()
pass
def _add_category(self, category, data):
row_count = 0
cols = []
for k, v in data.items():
row_count = len(v)
# Do nothing if the category has no data
if row_count == 0:
return
cols.append(self._encode_column(k, v))
self._categories.append({'name': category,
'columns': cols, 'rowCount': row_count})
def flush(self):
data = {'version': ihm.__version__,
'encoder': 'python-ihm library',
'dataBlocks': self._blocks}
self._write_msgpack(data)
def _write_msgpack(self, data):
"""Read the msgpack data from the file and return data blocks"""
import msgpack
msgpack.pack(data, self.fh, use_bin_type=True)
python-ihm-2.7/ihm/geometry.py 0000664 0000000 0000000 00000015465 15035733372 0016463 0 ustar 00root root 0000000 0000000 """Classes for handling geometry.
Geometric objects (see :class:`GeometricObject`) are usually
used in :class:`~ihm.restraint.GeometricRestraint` objects.
"""
class Center:
"""Define the center of a geometric object in Cartesian space.
:param float x: x coordinate
:param float y: y coordinate
:param float z: z coordinate
"""
def __init__(self, x, y, z):
self.x, self.y, self.z = x, y, z
class Transformation:
"""Rotation and translation applied to an object.
Transformation objects are typically used in subclasses of
:class:`GeometricObject`, or by :class:`ihm.dataset.TransformedDataset`.
:param rot_matrix: Rotation matrix (as a 3x3 array of floats) that
places the object in its final position.
:param tr_vector: Translation vector (as a 3-element float list) that
places the object in its final position.
"""
def __init__(self, rot_matrix, tr_vector):
self.rot_matrix, self.tr_vector = rot_matrix, tr_vector
"""Return the identity transformation.
:return: A new identity Transformation.
:rtype: :class:`Transformation`
"""
@classmethod
def identity(cls):
return cls([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]], [0., 0., 0.])
class GeometricObject:
"""A generic geometric object. See also :class:`Sphere`, :class:`Torus`,
:class:`Axis`, :class:`Plane`.
Geometric objects are typically assigned to one or more
:class:`~ihm.restraint.GeometricRestraint` objects.
:param str name: A short user-provided name.
:param str description: A brief description of the object.
"""
type = 'other'
def __init__(self, name=None, description=None):
self.name, self.description = name, description
class Sphere(GeometricObject):
"""A sphere in Cartesian space.
:param center: Coordinates of the center of the sphere.
:type center: :class:`Center`
:param radius: Radius of the sphere.
:param transformation: Rotation and translation that moves the sphere
from the original center to its final location, if any.
:type transformation: :class:`Transformation`
:param str name: A short user-provided name.
:param str description: A brief description of the object.
"""
type = 'sphere'
def __init__(self, center, radius, transformation=None,
name=None, description=None):
super().__init__(name, description)
self.center, self.transformation = center, transformation
self.radius = radius
class Torus(GeometricObject):
"""A torus in Cartesian space.
:param center: Coordinates of the center of the torus.
:type center: :class:`Center`
:param major_radius: The major radius - the distance from the center of
the tube to the center of the torus.
:param minor_radius: The minor radius - the radius of the tube.
:param transformation: Rotation and translation that moves the torus
(which by default lies in the xy plane) from the original
center to its final location, if any.
:type transformation: :class:`Transformation`
:param str name: A short user-provided name.
:param str description: A brief description of the object.
"""
type = 'torus'
def __init__(self, center, major_radius, minor_radius, transformation=None,
name=None, description=None):
super().__init__(name, description)
self.center, self.transformation = center, transformation
self.major_radius, self.minor_radius = major_radius, minor_radius
class HalfTorus(GeometricObject):
"""A section of a :class:`Torus`. This is defined as a surface over part
of the torus with a given thickness, and is often used to represent
a membrane.
:param thickness: The thickness of the surface.
:param inner: True if the surface is the 'inner' half of the torus
(i.e. closer to the center), False for the outer surface, or
None for some other section (described in `description`).
See :class:`Torus` for a description of the other parameters.
"""
type = 'half-torus'
def __init__(self, center, major_radius, minor_radius, thickness,
transformation=None, inner=None, name=None, description=None):
super().__init__(name, description)
self.center, self.transformation = center, transformation
self.major_radius, self.minor_radius = major_radius, minor_radius
self.thickness, self.inner = thickness, inner
class Axis(GeometricObject):
"""One of the three Cartesian axes - see :class:`XAxis`, :class:`YAxis`,
:class:`ZAxis`.
:param transformation: Rotation and translation that moves the axis
from the original Cartesian axis to its final location, if any.
:type transformation: :class:`Transformation`
:param str name: A short user-provided name.
:param str description: A brief description of the object.
"""
type = 'axis'
def __init__(self, transformation=None, name=None, description=None):
super().__init__(name, description)
self.transformation = transformation
class XAxis(Axis):
"""The x Cartesian axis.
See :class:`GeometricObject` for a description of the parameters.
"""
axis_type = 'x-axis'
class YAxis(Axis):
"""The y Cartesian axis.
See :class:`GeometricObject` for a description of the parameters.
"""
axis_type = 'y-axis'
class ZAxis(Axis):
"""The z Cartesian axis.
See :class:`GeometricObject` for a description of the parameters.
"""
axis_type = 'z-axis'
class Plane(GeometricObject):
"""A plane in Cartesian space - see :class:`XYPlane`, :class:`YZPlane`,
:class:`XZPlane`.
:param transformation: Rotation and translation that moves the plane
from the original position to its final location, if any.
:type transformation: :class:`Transformation`
:param str name: A short user-provided name.
:param str description: A brief description of the object.
"""
type = 'plane'
def __init__(self, transformation=None, name=None, description=None):
super().__init__(name, description)
self.transformation = transformation
class XYPlane(Plane):
"""The xy plane in Cartesian space.
See :class:`GeometricObject` for a description of the parameters.
"""
plane_type = 'xy-plane'
class YZPlane(Plane):
"""The yz plane in Cartesian space.
See :class:`GeometricObject` for a description of the parameters.
"""
plane_type = 'yz-plane'
class XZPlane(Plane):
"""The xz plane in Cartesian space.
See :class:`GeometricObject` for a description of the parameters.
"""
plane_type = 'xz-plane'
python-ihm-2.7/ihm/location.py 0000664 0000000 0000000 00000035015 15035733372 0016431 0 ustar 00root root 0000000 0000000 """Classes for tracking external data used by mmCIF models.
"""
import os
class Location:
"""Identifies the location where a resource can be found.
Do not use this class itself, but one of its subclasses.
Typically the resource may be found in a file (either on the local
disk or at a DOI) - for this use one of the subclasses of
:class:`FileLocation`. Alternatively the resource may be found in
an experiment-specific database such as PDB or EMDB - for this use
:class:`DatabaseLocation` or one of its subclasses. A Location may
be passed to
- a :class:`~ihm.dataset.Dataset` to point to where an
experimental dataset may be found;
- an :class:`~ihm.model.Ensemble` to point to coordinates for an
entire ensemble, for example as a DCD file;
- a :class:`ihm.model.LocalizationDensity` to point to an external
localization density, for example in MRC format;
- :data:`ihm.System.locations` to point to other files relating
to the modeling in general, such as a modeling control script
(:class:`WorkflowFileLocation`) or a command script for a
visualization package such as ChimeraX
(:class:`VisualizationFileLocation`);
- a :class:`ihm.protocol.Step` or :class:`ihm.analysis.Step` to
describe an individual modeling step;
- or a :class:`~ihm.startmodel.StartingModel` to describe how a
starting model was constructed.
:param str details: Additional details about the dataset, if known.
"""
# 'details' can differ without affecting dataset equality
_eq_keys = []
_allow_duplicates = False
def __init__(self, details=None):
self.details = details
# Locations compare equal iff they are the same class, have the
# same attributes, and allow_duplicates=False
def _eq_vals(self):
if self._allow_duplicates:
return id(self)
else:
return tuple([self.__class__]
+ [getattr(self, x) for x in self._eq_keys])
def __eq__(self, other):
# We can never be equal to None
return other is not None and self._eq_vals() == other._eq_vals()
def __hash__(self):
return hash(self._eq_vals())
class DatabaseLocation(Location):
"""A dataset stored in an official database (PDB, EMDB, PRIDE, etc.).
Generally a subclass should be used specific to the database -
for example, :class:`PDBLocation`, :class:`EMDBLocation`, or
:class:`PRIDELocation`, although this base class can be used directly
for "other" databases not currently supported by the IHM dictionary.
:param str db_code: The accession code inside the database.
:param str version: The version of the dataset in the database.
:param str details: Additional details about the dataset, if known.
"""
_eq_keys = Location._eq_keys + ['db_name', 'access_code', 'version']
db_name = 'Other'
def __init__(self, db_code, version=None, details=None):
super().__init__(details)
self.access_code = db_code
self.version = version
def __str__(self):
return "<%s.%s(%s)>" % (self.__module__, self.__class__.__name__,
repr(self.access_code))
class EMDBLocation(DatabaseLocation):
"""Something stored in the EMDB database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'EMDB'
class PDBLocation(DatabaseLocation):
"""Something stored in the PDB database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'PDB'
class PDBDevLocation(DatabaseLocation):
"""Something stored in the PDB-Dev database.
This should only be used for legacy entries. All former PDB-Dev entries
(now PDB-IHM) should now have PDB identifiers; use :class:`PDBLocation`
instead.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'PDB-Dev'
class ModelArchiveLocation(DatabaseLocation):
"""Something stored in Model Archive.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'MODEL ARCHIVE'
class BMRBLocation(DatabaseLocation):
"""Something stored in the BMRB database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'BMRB'
class MassIVELocation(DatabaseLocation):
"""Something stored in the MassIVE database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'MASSIVE'
class EMPIARLocation(DatabaseLocation):
"""Something stored in the EMPIAR database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'EMPIAR'
class SASBDBLocation(DatabaseLocation):
"""Something stored in the SASBDB database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'SASBDB'
class PRIDELocation(DatabaseLocation):
"""Something stored in the PRIDE database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'PRIDE'
class JPOSTLocation(DatabaseLocation):
"""Something stored in the JPOST database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'jPOSTrepo'
class BioGRIDLocation(DatabaseLocation):
"""Something stored in the BioGRID database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'BioGRID'
class ProXLLocation(DatabaseLocation):
"""Something stored in the ProXL database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'ProXL'
class IProXLocation(DatabaseLocation):
"""Something stored in the iProX database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'iProX'
class AlphaFoldDBLocation(DatabaseLocation):
"""Something stored in the AlphaFoldDB database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'AlphaFoldDB'
class ProteomeXchangeLocation(DatabaseLocation):
"""Something stored in the ProteomeXchange database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'ProteomeXchange'
class BMRbigLocation(DatabaseLocation):
"""Something stored in the BMRbig database.
See :class:`DatabaseLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects."""
db_name = 'BMRbig'
class FileLocation(Location):
"""Base class for an individual file or directory stored externally.
:param str path: the location of the file or directory (this can
be `None` if `repo` is set, to refer to the entire repository)
:param repo: object that describes the repository
containing the file, or `None` if it is stored on the local disk
:type repo: :class:`Repository`
:param str details: optional description of the file
:param str file_format: optional file type (e.g. TXT, PNG, FASTA)
"""
_eq_keys = Location._eq_keys + ['repo', 'path', 'content_type']
content_type = 'Other'
def __init__(self, path, repo=None, details=None, file_format=None):
super().__init__(details)
self.repo, self.file_format = repo, file_format
if repo:
self.path = path
# Cannot determine file size if non-local
self.file_size = None
else:
if not os.path.exists(path):
raise ValueError("%s does not exist" % path)
self.file_size = os.stat(path).st_size
# Store absolute path in case the working directory changes later
self.path = os.path.abspath(path)
def __str__(self):
return "<%s.%s(%s)>" % (self.__module__, self.__class__.__name__,
repr(self.path))
class InputFileLocation(FileLocation):
"""An externally stored file used as input.
See :class:`FileLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects.
For example, any :class:`~ihm.dataset.Dataset` that isn't stored in
a domain-specific database would use this class."""
content_type = 'Input data or restraints'
class OutputFileLocation(FileLocation):
"""An externally stored file used for output.
See :class:`FileLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects.
For example, this can be used to point to an externally-stored
:class:`model ensemble ` or a
:class:`localization density `.
"""
content_type = "Modeling or post-processing output"
class WorkflowFileLocation(FileLocation):
"""An externally stored file that controls the workflow (e.g. a script).
See :class:`FileLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects.
Typically these objects are used to provide more information on how
a :class:`~ihm.startmodel.StartingModel` was generated, how an
individual :class:`ihm.protocol.Step` or :class:`ihm.analysis.Step`
was performed, or to describe the overall modeling (by addition
to :data:`ihm.System.locations`). This can be useful to capture fine
details of the modeling that aren't covered by the mmCIF dictionary,
and to allow models to be precisely reproduced.
"""
content_type = "Modeling workflow or script"
class VisualizationFileLocation(FileLocation):
"""An externally stored file that is used for visualization.
See :class:`FileLocation` for a description of the parameters
and :class:`Location` for discussion of the usage of these objects.
"""
content_type = "Visualization script"
class Repository:
"""A repository containing modeling files, i.e. a collection of related
files at a remote, public location. This can include code repositories
such as GitHub, file archival services such as Zenodo, or any other
service that provides a DOI, such as the supplementary information for
a publication.
This can also be used if the script plus related files are part of a
repository, which has been archived somewhere with a DOI.
This will be used to construct permanent references to files
used in this modeling, even if they haven't been uploaded to
a database such as PDB or EMDB.
See :meth:`ihm.System.update_locations_in_repositories`.
See also :class:`FileLocation`.
:param str doi: the Digital Object Identifier for the repository
:param str root: the path on the local disk to the top-level
directory of the repository, or `None` if files in this
repository aren't checked out.
:param str url: If given, a location that this repository can be
downloaded from.
:param str top_directory: If given, prefix all paths for files in
this repository with this value. This is useful when the
archived version of the repository is found in a subdirectory
at the URL or DOI (for example, GitHub repositories
archived at Zenodo get placed in a subdirectory named
for the repository and git hash).
:param str details: Additional text describing this repository
"""
reference_type = 'DOI'
# Two repositories compare equal if their DOIs and URLs are the same
def __eq__(self, other):
return self.doi == other.doi and self.url == other.url
def __hash__(self):
return hash((self.doi, self.url))
def __str__(self):
return "" % self.doi
def __init__(self, doi, root=None, url=None, top_directory=None,
details=None):
# todo: DOI should be optional (could also use URL, local path)
self.doi = doi
self.url, self.top_directory = url, top_directory
self.details = details
if root is not None:
# Store absolute path in case the working directory changes later
self._root = os.path.abspath(root)
reference = property(lambda self: self.doi)
def __get_reference_provider(self):
if self.reference and 'zenodo' in self.reference:
return 'Zenodo'
reference_provider = property(__get_reference_provider)
def __get_refers_to(self):
if self.url:
return 'Archive' if self.url.endswith(".zip") else 'File'
return 'Other'
refers_to = property(__get_refers_to)
@staticmethod
def _update_in_repos(fileloc, repos):
"""If the given FileLocation maps to somewhere within one of the
passed repositories, update it to reflect that."""
if fileloc.repo:
return
orig_path = fileloc.path
for repo in repos:
relpath = os.path.relpath(orig_path, repo._root)
if not relpath.startswith('..'):
# Prefer the shortest paths if multiple repositories can match
if fileloc.repo is None or len(fileloc.path) > len(relpath):
fileloc.repo = repo
fileloc.path = relpath
def _get_full_path(self, path):
"""Prefix the given path with our top-level directory"""
return os.path.join(self.top_directory or "", path)
python-ihm-2.7/ihm/metadata.py 0000664 0000000 0000000 00000127405 15035733372 0016406 0 ustar 00root root 0000000 0000000 """Classes to extract metadata from various input files.
Often input files contain metadata that would be useful to include in
the mmCIF file, but the metadata is stored in a different way for each
domain-specific file type. For example, MRC files used for electron
microscopy maps may contain an EMDB identifier, which the mmCIF file
can point to in preference to the local file.
This module provides classes for each file type to extract suitable
metadata where available.
"""
import ihm
from . import location, dataset, startmodel, util
from .startmodel import SequenceIdentityDenominator
import ihm.source
import ihm.citations
import ihm.reader
import ihm.format
import ihm.format_bcif
import operator
import struct
import json
import string
import warnings
import re
import collections
import urllib.request
import urllib.error
def _get_modeller(version, date):
return ihm.Software(
name='MODELLER', classification='comparative modeling',
description='Comparative modeling by satisfaction '
'of spatial restraints, build ' + date,
location='https://salilab.org/modeller/',
version=version,
citation=ihm.citations.modeller)
ModellerTemplate = collections.namedtuple(
'ModellerTemplate', ['name', 'template_begin', 'template_chain',
'template_end', 'target_begin', 'target_chain',
'target_end', 'pct_seq_id'])
def _handle_modeller_template(info, template_path_map, target_dataset,
alnfile):
"""Create a Template object from Modeller PDB header information."""
template_seq_id_range = (int(info.template_begin),
int(info.template_end))
seq_id_range = (int(info.target_begin), int(info.target_end))
sequence_identity = startmodel.SequenceIdentity(
float(info.pct_seq_id), SequenceIdentityDenominator.SHORTER_LENGTH)
# Assume a code of 1abc, 1abc_N, 1abcX, or 1abcX_N refers
# to a real PDB structure
m = re.match(r'(\d[a-zA-Z0-9]{3})[a-zA-Z]?(_.*)?$', info.name)
if m:
template_db_code = m.group(1).upper()
loc = location.PDBLocation(template_db_code)
else:
# Otherwise, look up the PDB file in TEMPLATE PATH remarks
fname = template_path_map[info.name]
loc = location.InputFileLocation(
fname, details="Template for comparative modeling")
d = dataset.PDBDataset(loc, details=loc.details)
# Make the comparative model dataset derive from the template's
target_dataset.parents.append(d)
return (info.target_chain,
startmodel.Template(
dataset=d, asym_id=info.template_chain,
seq_id_range=seq_id_range,
template_seq_id_range=template_seq_id_range,
sequence_identity=sequence_identity,
alignment_file=alnfile))
class Parser:
"""Base class for all metadata parsers."""
def parse_file(self, filename):
"""Extract metadata from the given file.
:param str filename: the file to extract metadata from.
:return: a dict with extracted metadata (generally including
a :class:`~ihm.dataset.Dataset`)."""
pass
class MRCParser(Parser):
"""Extract metadata from an EM density map (MRC file)."""
def parse_file(self, filename):
"""Extract metadata. See :meth:`Parser.parse_file` for details.
:return: a dict with key `dataset` pointing to the density map,
as an EMDB entry if the file contains EMDB headers,
otherwise to the file itself.
If the file turns out to be an EMDB entry, this will also query
the EMDB web API (if available) to extract version information
and details for the dataset.
"""
emdb = self._get_emdb(filename)
if emdb:
loc = _ParsedEMDBLocation(emdb)
else:
loc = location.InputFileLocation(
filename, details="Electron microscopy density map")
return {'dataset': dataset.EMDensityDataset(loc)}
def _get_emdb(self, filename):
"""Return the EMDB id of the file, or None."""
r = re.compile(b'EMDATABANK\\.org.*(EMD\\-\\d+)')
with open(filename, 'rb') as fh:
fh.seek(220) # Offset of number of labels
num_labels_raw = fh.read(4)
# Number of labels in MRC is usually a very small number, so it's
# very likely to be the smaller of the big-endian and little-endian
# interpretations of this field
num_labels_big, = struct.unpack_from('>i', num_labels_raw)
num_labels_little, = struct.unpack_from(' 62
and first_line[62] in string.digits):
self._parse_official_pdb(fh, first_line, ret)
elif first_line.startswith('EXPDTA DERIVED FROM PDB:'):
self._parse_derived_from_pdb(fh, first_line, local_file,
ret)
elif first_line.startswith('EXPDTA DERIVED FROM COMPARATIVE '
'MODEL, DOI:'):
self._parse_derived_from_comp_model(fh, first_line, local_file,
ret)
elif first_line.startswith('EXPDTA DERIVED FROM INTEGRATIVE '
'MODEL, DOI:'):
self._parse_derived_from_int_model(fh, first_line, local_file,
ret)
elif first_line.startswith(
'EXPDTA THEORETICAL MODEL, MODELLER'):
self._parse_modeller_model(fh, first_line, local_file,
filename, ret)
elif first_line.startswith('REMARK 99 Chain ID :'):
self._parse_phyre_model(fh, first_line, local_file,
filename, ret)
elif first_line.startswith('TITLE SWISS-MODEL SERVER'):
self._parse_swiss_model(fh, first_line, local_file,
filename, ret)
else:
self._parse_unknown_model(fh, first_line, local_file,
filename, ret)
return ret
def _parse_official_pdb(self, fh, first_line, ret):
"""Handle a file that's from the official PDB database."""
version, details, metadata, entity_source \
= self._parse_pdb_records(fh, first_line)
loc = location.PDBLocation(first_line[62:66].strip(), version, details)
ret['entity_source'] = entity_source
ret['metadata'] = metadata
ret['dataset'] = dataset.PDBDataset(loc, details=loc.details)
def _parse_derived_from_pdb(self, fh, first_line, local_file, ret):
# Model derived from a PDB structure; treat as a local experimental
# model with the official PDB as a parent
local_file.details = self._parse_details(fh)
db_code = first_line[27:].strip()
d = dataset.PDBDataset(local_file, details=local_file.details)
d.parents.append(dataset.PDBDataset(location.PDBLocation(db_code)))
ret['dataset'] = d
def _parse_derived_from_comp_model(self, fh, first_line, local_file, ret):
"""Model derived from a comparative model; link back to the original
model as a parent"""
self._parse_derived_from_model(
fh, first_line, local_file, ret, dataset.ComparativeModelDataset,
'comparative')
def _parse_derived_from_int_model(self, fh, first_line, local_file, ret):
"""Model derived from an integrative model; link back to the original
model as a parent"""
self._parse_derived_from_model(
fh, first_line, local_file, ret, dataset.IntegrativeModelDataset,
'integrative')
def _parse_derived_from_model(self, fh, first_line, local_file, ret,
dataset_class, model_type):
local_file.details = self._parse_details(fh)
d = dataset_class(local_file)
repo = location.Repository(doi=first_line[46:].strip())
# todo: better specify an unknown path
orig_loc = location.InputFileLocation(
repo=repo, path='.',
details="Starting %s model structure" % model_type)
d.parents.append(dataset_class(orig_loc))
ret['dataset'] = d
def _parse_modeller_model(self, fh, first_line, local_file, filename, ret):
version, date = first_line[38:].rstrip('\r\n').split(' ', 1)
s = _get_modeller(version, date)
ret['software'].append(s)
self._handle_comparative_model(local_file, filename, ret)
def _parse_phyre_model(self, fh, first_line, local_file, filename, ret):
# Model generated by Phyre2
s = ihm.Software(
name='Phyre2', classification='protein homology modeling',
description='Protein Homology/analogY Recognition '
'Engine V 2.0',
version='2.0', location='http://www.sbg.bio.ic.ac.uk/~phyre2/',
citation=ihm.citations.phyre2)
ret['software'].append(s)
self._handle_comparative_model(local_file, filename, ret)
def _parse_swiss_model(self, fh, first_line, local_file, filename, ret):
# Model generated by SWISS-MODEL
meta = _get_swiss_model_metadata(filename)
s = ihm.Software(
name='SWISS-MODEL', classification='protein homology modeling',
description='SWISS-MODEL: homology modelling of protein '
'structures and complexes, using %s engine'
% meta.get('info', {}).get('ENGIN', 'unknown'),
version=meta.get('info', {}).get('VERSN', ihm.unknown),
location='https://swissmodel.expasy.org/',
citation=ihm.citations.swiss_model)
ret['software'].append(s)
comp_model_ds = dataset.ComparativeModelDataset(local_file)
ret['dataset'] = comp_model_ds
ret['templates'] = self._add_swiss_model_templates(
local_file, meta, comp_model_ds, ret)
def _add_swiss_model_templates(self, local_file, meta, comp_model_ds, ret):
"""Add template information extracted from SWISS-MODEL PDB metadata"""
ret_templates = {}
templates = [v for k, v in sorted(((k, v) for k, v in meta.items()
if k.startswith('TEMPLATE')),
key=operator.itemgetter(0))]
for t in templates:
loc = location.PDBLocation(t['PDBID'])
d = dataset.PDBDataset(loc)
# Make the comparative model dataset derive from the template's
comp_model_ds.parents.append(d)
for chain in t['MMCIF']:
# todo: check we're using the right chain ID and that target
# and template chain IDs really are always the same
offset = int(t[chain, 'OFF'])
tgt_seq, tgt_len = _parse_seq(t[chain, 'TRG'])
tmpl_seq, tmpl_len = _parse_seq(t[chain, 'TPL'])
tgt_rng, tmpl_rng = _get_aligned_region(tgt_seq, tmpl_seq)
# apply offset
tmpl_rng = (tmpl_rng[0] + offset, tmpl_rng[1] + offset)
seq_id = float(t['SID'])
seq_id = startmodel.SequenceIdentity(
float(t['SID']),
SequenceIdentityDenominator.NUM_ALIGNED_WITHOUT_GAPS)
tmpl = startmodel.Template(
dataset=d, asym_id=chain, seq_id_range=tgt_rng,
template_seq_id_range=tmpl_rng, sequence_identity=seq_id,
alignment_file=local_file)
ret_templates[chain] = [tmpl]
return ret_templates
def _parse_unknown_model(self, fh, first_line, local_file, filename, ret):
# todo: revisit assumption that all unknown source PDBs are
# comparative models
self._handle_comparative_model(local_file, filename, ret)
def _handle_comparative_model(self, local_file, pdbname, ret):
d = dataset.ComparativeModelDataset(local_file)
ret['dataset'] = d
ret['templates'], ret['script'] \
= self._get_templates_script(pdbname, d)
def _get_templates_script(self, pdbname, target_dataset):
template_path_map = {}
alnfile = None
script = None
alnfilere = re.compile(r'REMARK 6 ALIGNMENT: (\S+)')
scriptre = re.compile(r'REMARK 6 SCRIPT: (\S+)')
tmppathre = re.compile(r'REMARK 6 TEMPLATE PATH (\S+) (\S+)')
tmpre = re.compile(r'REMARK 6 TEMPLATE: '
r'(\S+) (\S+):(\S+) \- (\S+):\S+ '
r'MODELS (\S+):(\S+) \- (\S+):\S+ AT (\S+)%')
template_info = []
with open(pdbname) as fh:
for line in fh:
if line.startswith('ATOM'): # Read only the header
break
m = tmppathre.match(line)
if m:
template_path_map[m.group(1)] = \
util._get_relative_path(pdbname, m.group(2))
m = alnfilere.match(line)
if m:
# Path to alignment is relative to that of the PDB file
fname = util._get_relative_path(pdbname, m.group(1))
alnfile = location.InputFileLocation(
fname,
details="Alignment for starting comparative model")
m = scriptre.match(line)
if m:
# Path to script is relative to that of the PDB file
fname = util._get_relative_path(pdbname, m.group(1))
script = location.WorkflowFileLocation(
fname, details="Script for starting comparative model")
m = tmpre.match(line)
if m:
t = ModellerTemplate(
name=m.group(1), template_begin=m.group(2),
template_chain=m.group(3), template_end=m.group(4),
target_begin=m.group(5), target_chain=m.group(6),
target_end=m.group(7), pct_seq_id=m.group(8))
template_info.append(t)
templates = {}
for t in template_info:
chain, template = _handle_modeller_template(
t, template_path_map, target_dataset, alnfile)
if chain not in templates:
templates[chain] = []
templates[chain].append(template)
# Sort templates by starting residue, then ending residue
for chain in templates.keys():
templates[chain] = sorted(templates[chain],
key=operator.attrgetter('seq_id_range'))
return templates, script
def _parse_pdb_records(self, fh, first_line):
"""Extract information from an official PDB"""
metadata = []
details = ''
compnd = ''
source = ''
for line in fh:
if line.startswith('TITLE'):
details += line[10:].rstrip()
elif line.startswith('COMPND'):
compnd += line[10:].rstrip()
elif line.startswith('SOURCE'):
source += line[10:].rstrip()
elif line.startswith('HELIX'):
metadata.append(startmodel.PDBHelix(line))
return (first_line[50:59].strip(),
details if details else None, metadata,
self._make_entity_source(compnd, source))
def _make_one_entity_source(self, compnd, source):
"""Make a single ihm.source.Source object"""
def make_from_source(cls):
return cls(scientific_name=source.get('ORGANISM_SCIENTIFIC'),
common_name=source.get('ORGANISM_COMMON'),
strain=source.get('STRAIN'),
ncbi_taxonomy_id=source.get('ORGANISM_TAXID'))
if compnd.get('ENGINEERED', None) == 'YES':
gene = make_from_source(ihm.source.Details)
host = ihm.source.Details(
scientific_name=source.get('EXPRESSION_SYSTEM'),
common_name=source.get('EXPRESSION_SYSTEM_COMMON'),
strain=source.get('EXPRESSION_SYSTEM_STRAIN'),
ncbi_taxonomy_id=source.get('EXPRESSION_SYSTEM_TAXID'))
return ihm.source.Manipulated(gene=gene, host=host)
else:
if source.get('SYNTHETIC', None) == 'YES':
cls = ihm.source.Synthetic
else:
cls = ihm.source.Natural
return make_from_source(cls)
def _make_entity_source(self, compnd, source):
"""Make ihm.source.Source objects given PDB COMPND and SOURCE lines"""
entity_source = {}
# Convert each string into dict of mol_id vs keys
compnd = self._parse_pdb_mol_id(compnd)
source = self._parse_pdb_mol_id(source)
for mol_id, c in compnd.items():
if mol_id in source and 'CHAIN' in c:
s = self._make_one_entity_source(c, source[mol_id])
for chain in c['CHAIN'].split(','):
entity_source[chain.strip()] = s
return entity_source
def _parse_pdb_mol_id(self, txt):
"""Convert text COMPND or SOURCE records to a dict of mol_id vs keys"""
d = {}
mol_id = None
for pair in txt.split(';'):
spl = pair.split(':')
if len(spl) == 2:
key = spl[0].upper().strip()
val = spl[1].upper().strip()
if key == 'MOL_ID':
mol_id = d[val] = {}
elif mol_id is not None:
mol_id[key] = val
return d
def _parse_details(self, fh):
"""Extract TITLE records from a PDB file"""
details = ''
for line in fh:
if line.startswith('TITLE'):
details += line[10:].rstrip()
elif line.startswith('ATOM'):
break
return details
class _Database2Handler(ihm.reader.Handler):
def __init__(self, m):
self.m = m
def __call__(self, database_id, database_code):
self.m['db'][database_id.upper()] = database_code
class _StructHandler(ihm.reader.Handler):
def __init__(self, m):
self.m = m
def __call__(self, title):
self.m['title'] = title
class _AuditRevHistHandler(ihm.reader.Handler):
def __init__(self, m):
self.m = m
def __call__(self, revision_date):
self.m['version'] = revision_date
class _ExptlHandler(ihm.reader.Handler):
def __init__(self, m):
self.m = m
def __call__(self, method):
# Modeller currently sets _exptl.method, not _software
if method.startswith('model, MODELLER Version '):
version, date = method[24:].split(' ', 1)
s = _get_modeller(version, date)
self.m['software'].append(s)
class _ModellerHandler(ihm.reader.Handler):
"""Handle the Modeller-specific _modeller category"""
def __init__(self, m, filename):
self.m = m
self.filename = filename
self.m['alnfile'] = self.m['script'] = None
def __call__(self, alignment, script):
if alignment:
# Paths are relative to that of the mmCIF file
fname = util._get_relative_path(self.filename, alignment)
self.m['alnfile'] = location.InputFileLocation(
fname, details="Alignment for starting comparative model")
if script:
fname = util._get_relative_path(self.filename, script)
self.m['script'] = location.WorkflowFileLocation(
fname, details="Script for starting comparative model")
class _ModellerTemplateHandler(ihm.reader.Handler):
"""Handle the Modeller-specific _modeller_template category"""
def __init__(self, m):
self.m = m
self.m['modeller_templates'] = []
def __call__(self, name, template_begin, template_end, target_begin,
target_end, pct_seq_id):
tmp_begin, tmp_chain = template_begin.split(':', 1)
tmp_end, tmp_chain = template_end.split(':', 1)
tgt_begin, tgt_chain = target_begin.split(':', 1)
tgt_end, tgt_chain = target_end.split(':', 1)
t = ModellerTemplate(name=name, template_begin=tmp_begin,
template_end=tmp_end, template_chain=tmp_chain,
target_begin=tgt_begin, target_end=tgt_end,
target_chain=tgt_chain, pct_seq_id=pct_seq_id)
self.m['modeller_templates'].append(t)
class _ModelCifAlignment:
"""Store alignment information from a ModelCIF file"""
def __init__(self):
self.target = self.template = self.seq_id = None
def get_template_object(self, target_dataset):
"""Convert the alignment information into an IHM Template object"""
return self.template.template.get_template_object(target_dataset,
aln=self)
class _TemplateRange:
"""Store information about a template residue range from a ModelCIF file"""
def __init__(self):
self.seq_id_range = None
self.template = None
class _TargetRange:
"""Store information about a target residue range from a ModelCIF file"""
def __init__(self):
self.seq_id_range = None
self.asym_id = None
class _Template:
"""Store template information from a ModelCIF file"""
# Map ModelCIF ma_template_ref_db_details.db_name to IHMCIF equivalents
_modelcif_dbmap = {'PDB': (dataset.PDBDataset, location.PDBLocation),
'PDB-DEV': (dataset.IntegrativeModelDataset,
location.PDBDevLocation),
'MA': (dataset.DeNovoModelDataset,
location.ModelArchiveLocation),
'ALPHAFOLDDB': (dataset.DeNovoModelDataset,
location.AlphaFoldDBLocation)}
def __init__(self):
self.auth_asym_id = self.db_name = self.db_accession_code = None
self.db_version_date = self.target_asym_id = None
def get_template_object(self, target_dataset, aln=None):
"""Convert the template information into an IHM Template object"""
dsetcls, loccls = self._modelcif_dbmap.get(
self.db_name.upper(),
(dataset.Dataset, location.DatabaseLocation))
loc = loccls(db_code=self.db_accession_code,
version=self.db_version_date)
d = dsetcls(location=loc)
# Make the computed model dataset derive from the template's
target_dataset.parents.append(d)
t = startmodel.Template(
dataset=d, asym_id=self.auth_asym_id,
seq_id_range=aln.target.seq_id_range if aln else (None, None),
template_seq_id_range=aln.template.seq_id_range
if aln else (None, None),
sequence_identity=aln.seq_id if aln else None)
return aln.target.asym_id if aln else self.target_asym_id, t
class _SystemReader:
"""A minimal implementation, so we can use some of the Handlers
in ihm.reader but get outputs in the results dict."""
def __init__(self, m):
self.software = ihm.reader.IDMapper(m['software'], ihm.Software,
*(None,) * 4)
self.citations = ihm.reader.IDMapper(None, ihm.Citation, *(None,) * 8)
self.alignments = ihm.reader.IDMapper(m['alignments'],
_ModelCifAlignment)
self.template_ranges = ihm.reader.IDMapper(None, _TemplateRange)
self.target_ranges = ihm.reader.IDMapper(None, _TargetRange)
self.templates = ihm.reader.IDMapper(m['templates'], _Template)
self.entities = ihm.reader.IDMapper(None, ihm.Entity, [])
self.asym_units = ihm.reader.IDMapper(m['asyms'], ihm.AsymUnit, None)
self.src_gens = ihm.reader.IDMapper(None, ihm.source.Manipulated)
self.src_nats = ihm.reader.IDMapper(None, ihm.source.Natural)
self.src_syns = ihm.reader.IDMapper(None, ihm.source.Synthetic)
class _TemplateDetailsHandler(ihm.reader.Handler):
"""Extract template information from a ModelCIF file"""
def __init__(self, sysr):
self.sysr = sysr
def __call__(self, template_id, target_asym_id, template_auth_asym_id):
template = self.sysr.templates.get_by_id(template_id)
template.auth_asym_id = template_auth_asym_id
template.target_asym_id = target_asym_id
class _TemplateRefDBDetailsHandler(ihm.reader.Handler):
"""Extract template database information from a ModelCIF file"""
def __init__(self, sysr):
self.sysr = sysr
def __call__(self, template_id, db_name, db_accession_code,
db_version_date):
template = self.sysr.templates.get_by_id(template_id)
template.db_name = db_name
template.db_accession_code = db_accession_code
template.db_version_date = db_version_date
class _TemplatePolySegmentHandler(ihm.reader.Handler):
"""Extract template residue range information from a ModelCIF file"""
def __init__(self, sysr):
self.sysr = sysr
def __call__(self, id, template_id, residue_number_begin,
residue_number_end):
tr = self.sysr.template_ranges.get_by_id(id)
tr.seq_id_range = (self.get_int(residue_number_begin),
self.get_int(residue_number_end))
tr.template = self.sysr.templates.get_by_id(template_id)
class _TemplatePolyMappingHandler(ihm.reader.Handler):
"""Extract target residue range information from a ModelCIF file"""
def __init__(self, sysr):
self.sysr = sysr
def __call__(self, id, template_segment_id, target_asym_id,
target_seq_id_begin, target_seq_id_end):
m = self.sysr.target_ranges.get_by_id((template_segment_id,
target_asym_id))
m.seq_id_range = (self.get_int(target_seq_id_begin),
self.get_int(target_seq_id_end))
class _SeqIDMapper:
"""Map ModelCIF sequence identity to IHMCIF equivalent"""
identity_map = {
"length of the shorter sequence":
SequenceIdentityDenominator.SHORTER_LENGTH,
"number of aligned positions (including gaps)":
SequenceIdentityDenominator.NUM_ALIGNED_WITH_GAPS}
def __call__(self, pct_id, denom):
denom = self.identity_map.get(
denom.lower() if denom else None,
SequenceIdentityDenominator.OTHER)
return startmodel.SequenceIdentity(
value=pct_id, denominator=denom)
class _AlignmentDetailsHandler(ihm.reader.Handler):
"""Read pairwise alignments (ma_alignment_details table)"""
def __init__(self, sysr):
self.sysr = sysr
self.seq_id_mapper = _SeqIDMapper()
def __call__(self, alignment_id, template_segment_id, target_asym_id,
percent_sequence_identity, sequence_identity_denominator):
aln = self.sysr.alignments.get_by_id(alignment_id)
aln.seq_id = self.seq_id_mapper(
self.get_float(percent_sequence_identity),
sequence_identity_denominator)
tgt_rng = self.sysr.target_ranges.get_by_id((template_segment_id,
target_asym_id))
tmpl_rng = self.sysr.template_ranges.get_by_id(template_segment_id)
aln.target = tgt_rng
aln.target.asym_id = target_asym_id
aln.template = tmpl_rng
class _ModBaseLocation(location.DatabaseLocation):
"""A model deposited in ModBase"""
def __init__(self, db_code, version=None, details=None):
# Use details to describe ModBase, ignoring the file title
super().__init__(
db_code, version=version,
details="ModBase database of comparative protein structure models")
class _CIFParserBase(Parser):
# Map PDBx database_2.database_name to IHMCIF equivalents
dbmap = {'PDB': (location.PDBLocation, dataset.PDBDataset),
'PDB-DEV': (location.PDBDevLocation,
dataset.IntegrativeModelDataset),
'MODELARCHIVE': (location.ModelArchiveLocation,
dataset.DeNovoModelDataset),
'ALPHAFOLDDB': (location.AlphaFoldDBLocation,
dataset.DeNovoModelDataset),
'MODBASE': (_ModBaseLocation, dataset.ComparativeModelDataset)}
def parse_file(self, filename):
m = {'db': {}, 'title': 'Starting model structure',
'software': [], 'templates': [], 'alignments': [],
'asyms': []}
with self._open_file(filename) as fh:
dbh = _Database2Handler(m)
structh = _StructHandler(m)
arevhisth = _AuditRevHistHandler(m)
exptlh = _ExptlHandler(m)
modellerh = _ModellerHandler(m, filename)
modtmplh = _ModellerTemplateHandler(m)
sysr = _SystemReader(m)
r = self._reader_class(
fh, {'_database_2': dbh, '_struct': structh,
'_pdbx_audit_revision_history': arevhisth,
'_exptl': exptlh, '_modeller': modellerh,
'_modeller_template': modtmplh,
'_software': ihm.reader._SoftwareHandler(sysr),
'_citation': ihm.reader._CitationHandler(sysr),
'_struct_asym': ihm.reader._StructAsymHandler(sysr),
'_entity': ihm.reader._EntityHandler(sysr),
'_entity_src_nat': ihm.reader._EntitySrcNatHandler(sysr),
'_pdbx_entity_src_syn':
ihm.reader._EntitySrcSynHandler(sysr),
'_entity_src_gen': ihm.reader._EntitySrcGenHandler(sysr),
'_citation_author':
ihm.reader._CitationAuthorHandler(sysr),
'_ma_template_details': _TemplateDetailsHandler(sysr),
'_ma_template_ref_db_details':
_TemplateRefDBDetailsHandler(sysr),
'_ma_template_poly_segment':
_TemplatePolySegmentHandler(sysr),
'_ma_target_template_poly_mapping':
_TemplatePolyMappingHandler(sysr),
'_ma_alignment_details': _AlignmentDetailsHandler(sysr)})
r.read_file()
dset = self._get_dataset(filename, m)
return {'dataset': dset, 'software': m['software'],
'templates': self._get_templates(filename, m, dset),
'entity_source': {asym.id: asym.entity.source
for asym in m['asyms']},
'script': m['script']}
def _get_dataset(self, filename, m):
# Check for known databases. Note that if a file is in multiple
# databases, we currently return one "at random"
for dbid, dbcode in m['db'].items():
if dbid in self.dbmap:
loccls, dsetcls = self.dbmap[dbid]
loc = loccls(db_code=dbcode, version=m.get('version'),
details=m['title'])
return dsetcls(location=loc, details=loc.details)
# Fall back to a local file
loc = location.InputFileLocation(filename, details=m['title'])
return dataset.ComparativeModelDataset(
location=loc, details=loc.details)
def _get_templates(self, filename, m, dset):
alnfile = m['alnfile']
template_path_map = {}
templates = {}
def _handle_templates():
# Use Modeller-provided templates if available
if m['modeller_templates']:
for t in m['modeller_templates']:
yield _handle_modeller_template(
t, template_path_map, dset, alnfile)
# Otherwise, use ModelCIF templates
else:
seen_templates = set()
for aln in m['alignments']:
seen_templates.add(aln.template.template)
yield aln.get_template_object(dset)
# Handle any unaligned templates (e.g. AlphaFold)
for t in m['templates']:
if t not in seen_templates:
yield t.get_template_object(dset)
for chain, template in _handle_templates():
if chain not in templates:
templates[chain] = []
templates[chain].append(template)
# Sort templates by starting residue, then ending residue
for chain in templates.keys():
templates[chain] = sorted(templates[chain],
key=operator.attrgetter('seq_id_range'))
return templates
class CIFParser(_CIFParserBase):
"""Extract metadata (e.g. PDB ID, comparative modeling templates)
from an mmCIF file. This currently handles mmCIF files from the PDB
database itself, models compliant with the ModelCIF dictionary,
plus files from Model Archive or the outputs from the
MODELLER comparative modeling package.
See also :class:`PDBParser` for coordinate files in legacy PDB format,
or :class:`BinaryCIFParser` for BinaryCIF format.
"""
_reader_class = ihm.format.CifReader
def _open_file(self, filename):
return open(filename)
def parse_file(self, filename):
"""Extract metadata. See :meth:`Parser.parse_file` for details.
:param str filename: the file to extract metadata from.
:return: a dict with key `dataset` pointing to the coordinate file,
as an entry in the PDB or Model Archive databases if the
file contains appropriate headers, otherwise to the
file itself;
'templates' pointing to a dict with keys the asym (chain)
IDs in the PDB file and values the list of comparative
model templates used to model that chain as
:class:`ihm.startmodel.Template` objects;
'entity_source' pointing to a dict with keys the asym IDs
and values :class:`ihm.source.Source` objects;
'software' pointing to a list of software used to generate
the file (as :class:`ihm.Software` objects);
'script' pointing to the script used to generate the
file, if any (as :class:`ihm.location.WorkflowFileLocation`
objects).
"""
return super().parse_file(filename)
class BinaryCIFParser(_CIFParserBase):
"""Extract metadata from a BinaryCIF file. This works in a very similar
fashion to :class:`CIFParser`; see that class for more information.
"""
_reader_class = ihm.format_bcif.BinaryCifReader
def _open_file(self, filename):
return open(filename, 'rb')
python-ihm-2.7/ihm/model.py 0000664 0000000 0000000 00000053411 15035733372 0015721 0 ustar 00root root 0000000 0000000 """Classes for handling models (sets of coordinates) as well as
groups of models.
"""
import struct
import itertools
from ihm.util import _text_choice_property, _check_residue_range
class Sphere:
"""Coordinates of part of the model represented by a sphere.
See :meth:`Model.get_spheres` for more details.
:param asym_unit: The asymmetric unit that this sphere represents
:type asym_unit: :class:`ihm.AsymUnit`
:param tuple seq_id_range: The range of residues represented by this
sphere (as a two-element tuple)
:param float x: x coordinate of the center of the sphere
:param float y: y coordinate of the center of the sphere
:param float z: z coordinate of the center of the sphere
:param float radius: radius of the sphere
:param float rmsf: root-mean-square fluctuation of the coordinates
"""
# Reduce memory usage
__slots__ = ['asym_unit', 'seq_id_range', 'x', 'y', 'z', 'radius', 'rmsf']
def __init__(self, asym_unit, seq_id_range, x, y, z, radius, rmsf=None):
self.asym_unit = asym_unit
self.seq_id_range = seq_id_range
self.x, self.y, self.z = x, y, z
self.radius, self.rmsf = radius, rmsf
class Atom:
"""Coordinates of part of the model represented by an atom.
See :meth:`Model.get_atoms` for more details. Note that this class
is used only to represent the coordinates of an atom. To access
atom-specific properties of the model, see the :class:`ihm.Atom` class.
:param asym_unit: The asymmetric unit that this atom represents
:type asym_unit: :class:`ihm.AsymUnit`
:param int seq_id: The sequence ID of the residue represented by this
atom. This should generally be a number starting at 1 for any
polymer chain, water, or oligosaccharide. For ligands, a seq_id
is not needed (as a given asym can only contain a single ligand),
so either 1 or None can be used.
:param str atom_id: The name of the atom in the residue
:param str type_symbol: Element name
:param float x: x coordinate of the atom
:param float y: y coordinate of the atom
:param float z: z coordinate of the atom
:param bool het: True for HETATM sites, False (default) for ATOM
:param float biso: Temperature factor or equivalent (if applicable)
:param float occupancy: Fraction of the atom type present
(if applicable)
:param float alt_id: Alternate conformation indicator
(if applicable)
"""
# Reduce memory usage
__slots__ = ['asym_unit', 'seq_id', 'atom_id', 'type_symbol',
'x', 'y', 'z', 'het', 'biso', 'occupancy', 'alt_id']
def __init__(self, asym_unit, seq_id, atom_id, type_symbol, x, y, z,
het=False, biso=None, occupancy=None, alt_id=None):
self.asym_unit = asym_unit
self.seq_id, self.atom_id = seq_id, atom_id
self.type_symbol = type_symbol
self.x, self.y, self.z = x, y, z
self.het, self.biso = het, biso
self.occupancy = occupancy
self.alt_id = alt_id
class Model:
"""A single set of coordinates (conformation).
Models are added to the system by placing them inside
:class:`ModelGroup` objects, which in turn are placed inside
:class:`State` objects, which are grouped in
:class:`StateGroup` objects, which are finally added to the system
via :attr:`ihm.System.state_groups`.
:param assembly: The parts of the system that were modeled.
:type assembly: :class:`~ihm.Assembly`
:param protocol: Description of how the modeling was done.
:type protocol: :class:`~ihm.protocol.Protocol`
:param representation: Level of detail at which the system
was represented.
:type representation: :class:`~ihm.representation.Representation`
:param str name: Descriptive name for this model.
"""
def __init__(self, assembly, protocol, representation, name=None):
# Note that a similar Model class is used in python-modelcif but it
# is not a subclass. So be careful when modifying this class to not
# break the API (e.g. by adding new members).
self.assembly, self.protocol = assembly, protocol
self.representation, self.name = representation, name
self._atoms = []
self._spheres = []
#: List of residue ranges that were explicitly not modeled. See
#: :class:`NotModeledResidueRange`.
self.not_modeled_residue_ranges = []
def get_spheres(self):
"""Yield :class:`Sphere` objects that represent this model.
The default implementation simply iterates over an internal
list of spheres, but this is not very memory-efficient, particularly
if the spheres are already stored somewhere else, e.g. in the
software's own data structures. It is recommended to subclass
and provide a more efficient implementation. For example, the
`modeling of Nup133 `_
uses a `custom subclass `_
to pass `BioPython `_ objects through
to python-ihm.
Note that the set of spheres should match the model's
:class:`~ihm.representation.Representation`. This is not currently
enforced.
""" # noqa: E501
for s in self._spheres:
yield s
def add_sphere(self, sphere):
"""Add to the model's set of :class:`Sphere` objects.
See :meth:`get_spheres` for more details.
"""
self._spheres.append(sphere)
def get_atoms(self):
"""Yield :class:`Atom` objects that represent this model.
See :meth:`get_spheres` for more details.
"""
for a in self._atoms:
yield a
def add_atom(self, atom):
"""Add to the model's set of :class:`Atom` objects.
See :meth:`get_spheres` for more details.
Note that for branched entities, the `seq_id` of the new atom
is provisional. It should be mapped to the correct ID once the
input file is completely read, using :attr:`ihm.AsymUnit.num_map`.
This is done automatically by ihm.reader when using the default
implementation.
"""
self._atoms.append(atom)
class ModelRepresentative:
"""A single model that represents all models in a :class:`ModelGroup`.
See :attr:`ModelGroup.representatives`.
:param model: The actual representative Model.
:type model: :class:`Model`
:param str selection_criteria: How the representative was chosen
"""
def __init__(self, model, selection_criteria):
self.model, self.selection_criteria = model, selection_criteria
selection_criteria = _text_choice_property(
"selection_criteria",
["medoid", "closest to the average", "lowest energy",
"target function", "fewest violations", "minimized average structure",
"best scoring model", "centroid", "other selction criteria"],
doc="How the representative was chosen")
class ModelGroup(list):
"""A set of related models. See :class:`Model`. It is implemented as
a simple list of the models.
These objects are typically stored in a :class:`State`,
:class:`Ensemble`, or :class:`OrderedProcess`.
:param elements: Initial set of models in the group.
:param str name: Descriptive name for the group.
:param str details: Additional text describing this group.
"""
def __init__(self, elements=(), name=None, details=None):
self.name = name
self.details = details
super().__init__(elements)
#: Any representative structural model(s).
#: See :class:`ModelRepresentative`.
self.representatives = []
# Kind of ugly but needed so we can use ModelGroup as keys for
# the ihm.restraint.CrossLink.fits dict
def __hash__(self):
return hash(tuple(self))
class State(list):
"""A set of model groups that constitute a single state of the system.
It is implemented as a simple list of the model groups.
See :class:`StateGroup`.
:param elements: The initial set of :class:`ModelGroup` objects in
this state.
"""
def __init__(self, elements=(), type=None, name=None, details=None,
experiment_type=None, population_fraction=None):
self.type, self.name, self.details = type, name, details
self.experiment_type = experiment_type
self.population_fraction = population_fraction
super().__init__(elements)
class StateGroup(list):
"""A set of related states. See :class:`State` and
:attr:`ihm.System.state_groups`. It is implemented as a simple
list of the states.
:param elements: Initial set of states in the group.
"""
def __init__(self, elements=()):
super().__init__(elements)
class Ensemble:
"""Details about a model cluster or ensemble.
See :attr:`ihm.System.ensembles`.
:param model_group: The set of models in this ensemble.
:type model_group: :class:`ModelGroup`
:param int num_models: The total number of models in this ensemble. This
may be more than the number of models in `model_group`, for
example if only representative or top-scoring models
are deposited.
:param post_process: The final analysis step that generated this
ensemble.
:type post_process: :class:`ihm.analysis.Step`
:param str clustering_method: The method used to obtain the ensemble,
if applicable.
:param str clustering_feature: The feature used for clustering
the models, if applicable.
:param str name: A descriptive name for this ensemble.
:param float precision: The precision of the entire ensemble.
:param file: A reference to an external file containing coordinates
for the entire ensemble, for example as a DCD file
(see :class:`DCDWriter`). See also :attr:`subsamples`.
:type file: :class:`ihm.location.OutputFileLocation`
:param str details: Additional text describing this ensemble
:param bool superimposed: True if the models in the group are
structurally aligned.
"""
_num_deposited = None
def __init__(self, model_group, num_models, post_process=None,
clustering_method=None, clustering_feature=None, name=None,
precision=None, file=None, details=None, superimposed=None):
self.model_group, self.num_models = model_group, num_models
self.post_process = post_process
self.clustering_method = clustering_method
self.clustering_feature = clustering_feature
self.name, self.precision, self.file = name, precision, file
self.details = details
self.superimposed = superimposed
#: All localization densities for this ensemble, as
#: :class:`LocalizationDensity` objects
self.densities = []
#: All subsamples that make up this ensemble (if applicable),
#: as :class:`Subsample` objects
self.subsamples = []
def _get_num_deposited(self):
# Generally we require an associated model_group; however, it is not
# required by the dictionary and so input files may not have one,
# but use any provided value of num_model_deposited in this case.
if self.model_group is None:
return self._num_deposited
else:
return len(self.model_group)
num_models_deposited = property(_get_num_deposited,
doc="Number of models in this ensemble "
"that are in the mmCIF file")
clustering_method = _text_choice_property(
"clustering_method",
["Hierarchical", "Other", "Partitioning (k-means)",
"Density based threshold-clustering"],
doc="The clustering method used to obtain the ensemble, if applicable")
clustering_feature = _text_choice_property(
"clustering_feature", ["RMSD", "dRMSD", "other"],
doc="The feature used for clustering the models, if applicable")
class NotModeledResidueRange:
"""A range of residues that were explicitly not modeled.
See :attr:`Model.not_modeled_residue_ranges`.
:param asym_unit: The asymmetric unit to which the residues belong.
:type asym_unit: :class:`~ihm.AsymUnit`
:param int seq_id_begin: Starting residue in the range.
:param int seq_id_end: Ending residue in the range.
:param str reason: Optional text describing why the residues were
not modeled.
"""
def __init__(self, asym_unit, seq_id_begin, seq_id_end, reason=None):
self.asym_unit = asym_unit
self.seq_id_begin, self.seq_id_end = seq_id_begin, seq_id_end
self.reason = reason
_check_residue_range((seq_id_begin, seq_id_end), asym_unit.entity)
reason = _text_choice_property(
"reason",
["Highly variable models with poor precision",
"Models do not adequately satisfy input data", "Other"],
doc="Reason why the residues were not modeled.")
class OrderedProcess:
"""Details about a process that orders two or more model groups.
A process is represented as a directed graph, where the nodes
are :class:`ModelGroup` objects and the edges represent transitions.
These objects are generally added to
:attr:`ihm.System.ordered_processes`.
:param str ordered_by: Text that explains how the ordering is done,
such as "time steps".
:param str description: Text that describes this process.
"""
def __init__(self, ordered_by, description=None):
self.ordered_by, self.description = ordered_by, description
#: All steps in this process, as a simple list of
#: :class:`ProcessStep` objects
self.steps = []
class ProcessStep(list):
"""A single step in an :class:`OrderedProcess`.
This is implemented as a simple list of :class:`ProcessEdge` objects,
each of which orders two :class:`ModelGroup` objects. (To order more
than two groups, for example to represent a branched reaction step
that generates two products, simply add multiple edges to the step.)
:param sequence elements: Initial set of :class:`ProcessEdge` objects.
:param str description: Text that describes this step.
"""
def __init__(self, elements=(), description=None):
self.description = description
super().__init__(elements)
class ProcessEdge:
"""A single directed edge in the graph for a :class:`OrderedProcess`,
representing the transition from one :class:`ModelGroup` to another.
These objects are added to :class:`ProcessStep` objects.
:param group_begin: The set of models at the origin of the edge.
:type group_begin: :class:`ModelGroup`
:param group_end: The set of models at the end of the edge.
:type group_end: :class:`ModelGroup`
:param str description: Text that describes this edge.
"""
def __init__(self, group_begin, group_end, description=None):
self.group_begin, self.group_end = group_begin, group_end
self.description = description
class LocalizationDensity:
"""Localization density of part of the system, over all models
in an ensemble.
See :attr:`Ensemble.densities`.
:param file: A reference to an external file containing the density,
for example as an MRC file.
:type file: :class:`ihm.location.OutputFileLocation`
:param asym_unit: The asymmetric unit (or part of one) that
this density represents.
:type asym_unit: :class:`~ihm.AsymUnit` or :class:`~ihm.AsymUnitRange`
"""
def __init__(self, file, asym_unit):
self.file, self.asym_unit = file, asym_unit
class Subsample:
"""Base class for a subsample within an ensemble.
In some cases the models that make up an :class:`Ensemble` may be
partitioned into subsamples, for example to determine if the
sampling was exhaustive
(see `Viswanath et al. 2017 `_).
This base class can be used to describe the set of models in the
subsample, for example by pointing to an externally-deposited
set of conformations.
Usually a derived class (:class:`RandomSubsample` or
:class:`IndependentSubsample`) is used instead of this class.
Instances are stored in :attr:`Ensemble.subsamples`. All of the
subsamples in a given ensemble must be of the same type.
:param str name: A descriptive name for this sample
:param int num_models: The total number of models in this sample
:param model_group: The set of models in this sample, if applicable.
:type model_group: :class:`ModelGroup`
:param file: A reference to an external file containing coordinates
for the entire sample, for example as a DCD file
(see :class:`DCDWriter`).
:type file: :class:`ihm.location.OutputFileLocation`
""" # noqa: E501
sub_sampling_type = 'other'
def __init__(self, name, num_models, model_group=None, file=None):
self.name, self.num_models = name, num_models
self.model_group, self.file = model_group, file
num_models_deposited = property(
lambda self: len(self.model_group) if self.model_group else 0,
doc="Number of models in this subsample that are in the mmCIF file")
class RandomSubsample(Subsample):
"""A subsample generated by picking a random subset of the models that
make up the entire ensemble. See :class:`Subsample`.
"""
sub_sampling_type = 'random'
class IndependentSubsample(Subsample):
"""A subsample generated in the same fashion as other subsamples
but by an independent simulation. See :class:`Subsample`.
"""
sub_sampling_type = 'independent'
class DCDWriter:
"""Utility class to write model coordinates to a binary DCD file.
See :class:`Ensemble` and :class:`Model`. Since mmCIF is a text-based
format, it is not efficient to store entire ensembles in this format.
Instead, representative models should be deposited as mmCIF and
the :class:`Ensemble` then linked to an external file containing
only model coordinates. One such format is CHARMM/NAMD's DCD, which
is written out by this class. The DCD files simply contain the xyz
coordinates of all :class:`Atom` and :class:`Sphere` objects in each
:class:`Model`. (Note that no other data is stored, such as sphere
radii or restraint parameters.)
:param file fh: The filelike object to write the coordinates to. This
should be open in binary mode and should be a seekable object.
"""
def __init__(self, fh):
self.fh = fh
self.nframes = 0
def add_model(self, model):
"""Add the coordinates for the given :class:`Model` to the file as
a new frame. All models in the file should have the same number of
atoms and/or spheres, in the same order.
:param model: Model with coordinates to write to the file.
:type model: :class:`Model`
"""
x = []
y = []
z = []
for a in itertools.chain(model.get_atoms(), model.get_spheres()):
x.append(a.x)
y.append(a.y)
z.append(a.z)
self._write_frame(x, y, z)
def _write_frame(self, x, y, z):
self.nframes += 1
if self.nframes == 1:
self.ncoord = len(x)
remarks = [
b'Produced by python-ihm, https://github.com/ihmwg/python-ihm',
b'This file is designed to be used in combination with an '
b'mmCIF file',
b'See PDB-IHM at https://pdb-ihm.org/ for more details']
self._write_header(self.ncoord, remarks)
else:
if len(x) != self.ncoord:
raise ValueError(
"Frame size mismatch - frames contain %d "
"coordinates but attempting to write a frame "
"containing %d coordinates" % (self.ncoord, len(x)))
# Update number of frames
self.fh.seek(self._pos_nframes)
self.fh.write(struct.pack('i', self.nframes))
self.fh.seek(0, 2) # Move back to end of file
# Write coordinates
frame_size = struct.pack('i', struct.calcsize("%df" % self.ncoord))
for coord in x, y, z:
self.fh.write(frame_size)
self.fh.write(struct.pack("%df" % self.ncoord, *coord))
self.fh.write(frame_size)
def _write_header(self, natoms, remarks):
self.fh.write(struct.pack('i', 84) + b'CORD')
self._pos_nframes = self.fh.tell()
self.fh.write(struct.pack('i', self.nframes))
self.fh.write(struct.pack('i', 0)) # istart
self.fh.write(struct.pack('i', 0)) # nsavc
self.fh.write(struct.pack('5i', 0, 0, 0, 0, 0))
self.fh.write(struct.pack('i', 0)) # number of fixed atoms
self.fh.write(struct.pack('d', 0.)) # delta
self.fh.write(struct.pack('10i', 0, 0, 0, 0, 0, 0, 0, 0, 0, 84))
remark_size = struct.calcsize('i') + 80 * len(remarks)
self.fh.write(struct.pack('i', remark_size))
self.fh.write(struct.pack('i', len(remarks)))
for r in remarks:
self.fh.write(r.ljust(80)[:80])
self.fh.write(struct.pack('i', remark_size))
self.fh.write(struct.pack('i', struct.calcsize('i')))
self.fh.write(struct.pack('i', natoms)) # total number of atoms
self.fh.write(struct.pack('i', struct.calcsize('i')))
python-ihm-2.7/ihm/multi_state_scheme.py 0000664 0000000 0000000 00000027316 15035733372 0020504 0 ustar 00root root 0000000 0000000 # coding=utf-8
import ihm
from ihm.model import _text_choice_property
"""Classes for handling connected/ordered schemes formed by multiple state
together with information on kinetic schemes"""
class MultiStateScheme:
"""MultiStateScheme collects information about a collection of
multiple states, that can form a connected/ordered scheme.
A special case is a kinetic scheme, for which kinetic rates and
relaxation times are available.
:param str name: The name of the multi-state scheme.
:param str details: Details on the scheme.
:param connectivities: A list of connectivities that belong to
the scheme.
:type connectivities: List of :class:`Connectivity`
:param relaxation_times: A list of relaxation times not assigned
to specific connectivities, but to the scheme
:type relaxation_times: List of :class:`RelaxationTime`
"""
def __init__(self, name, details=None, connectivities=None,
relaxation_times=None):
self.name = name
self.details = details
self._connectivity_list = []
self._relaxation_time_list = []
# states is filled automatically based on connectivity_list
self._states = []
if connectivities is not None:
for c in connectivities:
if c not in self._connectivity_list:
self.add_connectivity(c)
if relaxation_times is not None:
for r in relaxation_times:
if r not in self._relaxation_time_list:
self.add_relaxation_time(r)
def add_connectivity(self, connectivity):
"""Add a connectivity to the scheme.
:param connectivity: The connectivity to add to the scheme
:type connectivity: :class:`Connectivity`
"""
if connectivity is None:
return
if connectivity not in self._connectivity_list:
# Make sure that the connectivity has not been assigned to
# another scheme
if not connectivity._assigned_to_scheme:
connectivity.set_assigned_to_scheme()
self._connectivity_list.append(connectivity)
# If the connectivity has been assigned to another scheme,
# create a copy of the connectivity and use that
else:
old_connectivity = connectivity
connectivity = \
ihm.multi_state_scheme.Connectivity(
begin_state=old_connectivity.begin_state,
end_state=old_connectivity.end_state,
details=old_connectivity.details,
dataset_group=old_connectivity.dataset_group,
kinetic_rate=old_connectivity.kinetic_rate,
relaxation_time=old_connectivity.relaxation_time
)
connectivity.set_assigned_to_scheme()
self._connectivity_list.append(connectivity)
# Add the states that belong to the connectivity
self._add_state(connectivity.begin_state)
self._add_state(connectivity.end_state)
def _add_state(self, state):
"""Add a state to the self._states list if it is not present yet.
This function checks whether the state has optional properties,
such as a name. If this is the case, the name is compared to the names
already in the list. If the state does not have a name, it might only
be a list of elements. Then only the contents of the list are checked
This is important for empty states, i.e. those that do not have
models associated.
:param state: The state to add.
:type state: :class:`ihm.model.State`
"""
if state is None:
return
for tmp_state in self._states:
# Check whether both states have the name attributes
if hasattr(state, 'name') and hasattr(tmp_state, 'name'):
# compare the properties of the two states and the elements of
# the lists
if state.__dict__ == tmp_state.__dict__ \
and state == tmp_state:
# state found
return
# If neither of the two states has the name attribute, only compare
# the elements of the lists
if not hasattr(state, 'name') and not hasattr(tmp_state, 'name'):
# If the two states have the same elements
if state == tmp_state:
# state found
return
# If the state was not found in the list yet, add it
self._states.append(state)
def add_relaxation_time(self, relaxation_time):
"""Add a relaxation time to the scheme. This relaxation time is not
assigned to a connectivity.
:param relaxation_time: The relaxation time to add to the scheme.
:type relaxation_time: :class:`RelaxationTime`
"""
if relaxation_time is not None:
self._relaxation_time_list.append(relaxation_time)
def get_connectivities(self):
"""Return the connectivities assigned to a scheme"""
return self._connectivity_list
def get_relaxation_times(self):
"""Return the relaxation times assigned to a scheme"""
return self._relaxation_time_list
def get_states(self):
"""Return the states involved in a scheme"""
return self._states
def __eq__(self, other):
return ((self.__dict__ == other.__dict__)
and (self._connectivity_list ==
other._connectivity_list)
and (self._relaxation_time_list ==
other._relaxation_time_list))
class Connectivity:
"""A connectivity between states. Used to describe the directed
edge of graph.
If no end_state is given, the state is not connected to another state.
This could be the case for states where no connection to other states
could be resolved.
:param begin_state: The start state of the connectivity.
:type begin_state: :class:`ihm.model.State`
:param end_state: The end state of the connectivity. Can be None in case
of states that are not connected to others.
:type end_state: :class:`ihm.model.State`
:param details: Details to the connectivity.
:param dataset_group: The DatasetGroup that was used to obtain information
on the connectivity.
:type dataset_group: :class:`ihm.dataset.DatasetGroup`
:param kinetic_rate: A kinetic rate assigned to the connectivity.
:type kinetic_rate: :class:`KineticRate`
:param relaxation_time: A relaxation time assigned to the connectivity.
:type relaxation_time: :class:`RelaxationTime`
"""
def __init__(self, begin_state, end_state=None, details=None,
dataset_group=None, kinetic_rate=None, relaxation_time=None):
self.begin_state = begin_state
self.end_state = end_state
self.details = details
self.dataset_group = dataset_group
self.kinetic_rate = kinetic_rate
self.relaxation_time = relaxation_time
# The _assigned_to_scheme variable tracks whether the connectivity
# has been assigned to a scheme. This is to ensure that each
# connectivity is only assigned to a single scheme.
self._assigned_to_scheme = False
def set_assigned_to_scheme(self):
self._assigned_to_scheme = True
def __eq__(self, other):
return self.__dict__ == other.__dict__
class KineticRate:
"""A base class for a kinetic rate that can be assigned to a connectivity.
The kinetic rate could be a transition_rate_constant or
an equilibrium_constant. Alternatively, both could be provided.
:param float transition_rate_constant: A transition rate constant
describing the exchange between two states. Unit: per second.
:param equilibrium_constant: An equilibrium constant describing the
exchange between two states
:type equilibrium_constant: :class:`EquilibriumConstant` or
:class:`PopulationEquilibriumConstant` or
:class:`KineticRateEquilibriumConstant`
:param str details: Details on the kinetic rate.
:param dataset_group: The DatasetGroup used to determine the kinetic rate.
:type dataset_group: :class:`ihm.dataset.DatasetGroup`
:param file: External file containing measurement data for the kinetic
rate.
:type file: :class:`ihm.location.OutputFileLocation`
"""
def __init__(self,
transition_rate_constant=None,
equilibrium_constant=None,
details=None,
dataset_group=None,
file=None):
self.transition_rate_constant = transition_rate_constant
self.equilibrium_constant = equilibrium_constant
self.details = details
self.dataset_group = dataset_group
self.external_file = file
def __eq__(self, other):
return self.__dict__ == other.__dict__
class EquilibriumConstant:
"""Base class for an equilibrium constant.
This class handles the case that none of the derived classes is applicable.
:param float value: The value of the equilibrium constant
:param str unit: Unit of the equilibrium constant. Depending on what
the process describes, a unit might be applicable or not"""
def __init__(self, value, unit=None):
self.method = 'equilibrium constant is determined from another ' \
'method not listed'
self.value = value
self.unit = unit
def __eq__(self, other):
if other is None:
return False
return self.__dict__ == other.__dict__
class PopulationEquilibriumConstant(EquilibriumConstant):
"""An equilibrium constant determined from population"""
def __init__(self, value, unit=None):
super().__init__(value, unit)
self.method = 'equilibrium constant is determined from population'
class KineticRateEquilibriumConstant(EquilibriumConstant):
"""An equilibrium constant determined from kinetic rates as kAB/kBA"""
def __init__(self, value, unit=None):
super().__init__(value, unit)
self.method = 'equilibrium constant is determined from kinetic ' \
'rates, kAB/kBA'
class RelaxationTime:
"""A relaxation time determined for a scheme.
The relaxation time can either be connected to a specific connectivity
in the scheme or to the scheme in general if no assignment is possible.
:param float value: The relaxation time.
:param str unit: The unit of the relaxation time. Options are
['seconds','milliseconds', microseconds']
:param float amplitude: The amplitude of the relaxation time if determined.
:param str details: Details on the relaxation time.
:param dataset_group: DatasetGroup used to determine the relaxation time.
:type dataset_group: :class:`ihm.dataset.DatasetGroup`
:param file: An external file containing measurement data for
the relaxation time.
:type file: :class:`ihm.location.OutputFileLocation`
"""
def __init__(self, value, unit, amplitude=None,
details=None, dataset_group=None, file=None):
self.value = value
self.unit = unit
self.amplitude = amplitude
self.details = details
self.dataset_group = dataset_group
self.external_file = file
def __eq__(self, other):
return self.__dict__ == other.__dict__
# Check whether the given unit is within the allowed options
allowed_relaxation_time_units = ['seconds',
'milliseconds',
'microseconds']
unit = _text_choice_property(
"unit",
allowed_relaxation_time_units,
doc="The unit of the relaxation time.")
python-ihm-2.7/ihm/protocol.py 0000664 0000000 0000000 00000006720 15035733372 0016463 0 ustar 00root root 0000000 0000000 """Classes for handling modeling protocols.
"""
class Step:
"""A single step in a :class:`Protocol`.
:param assembly: The part of the system modeled in this step
:type assembly: :class:`~ihm.Assembly`
:param dataset_group: The collection of datasets used in this modeling
:type dataset_group: :class:`~ihm.dataset.DatasetGroup`
:param str method: Description of the method used (e.g. "Monte Carlo")
:param str name: A descriptive name for the step
:param int num_models_begin: The number of models at the beginning
of the step
:param int num_models_end: The number of models at the end of the step
:param software: The software used in this step
:type software: :class:`~ihm.Software`
:param script_file: Reference to the external file containing the
script used in this step (usually a
:class:`~ihm.location.WorkflowFileLocation`).
:type script_file: :class:`~ihm.location.Location`
:param bool multi_scale: Indicates if the modeling is multi-scale
:param bool multi_state: Indicates if the modeling is multi-state
:param bool ordered: Indicates if the modeling is ordered
:param bool ensemble: Indicates if the modeling involves an ensemble;
the default if unspecified is True iff the system contains
at least one :class:`~ihm.model.Ensemble`.
:param str description: Additional text describing the step
"""
def __init__(self, assembly, dataset_group, method, num_models_begin=None,
num_models_end=None, software=None, script_file=None,
multi_scale=False, multi_state=False, ordered=False,
ensemble='default', name=None, description=None):
self.assembly = assembly
self.dataset_group = dataset_group
self.method = method
self.num_models_begin = num_models_begin
self.num_models_end = num_models_end
self.multi_scale, self.multi_state = multi_scale, multi_state
self.software, self.ordered, self.name = software, ordered, name
self.ensemble = ensemble
self.script_file = script_file
self.description = description
def _get_report(self):
def _get_flags():
if self.multi_scale:
yield "multi-scale"
if self.multi_state:
yield "multi-state"
if self.ordered:
yield "ordered"
return ("%s (%s) (%s->%s models)"
% (self.name or "Unnamed step",
"; ".join([self.method] + list(_get_flags())),
self.num_models_begin, self.num_models_end))
class Protocol:
"""A modeling protocol.
Each protocol consists of a number of protocol steps (e.g. sampling,
refinement) followed by a number of analyses.
Normally a protocol is passed to one or more :class:`~ihm.model.Model`
objects, although unused protocols can still be included in the file
if desired by adding them to :attr:`~ihm.System.orphan_protocols`.
:param str name: Optional name for the protocol
:param str details: Additional text describing the protocol
"""
def __init__(self, name=None, details=None):
self.name = name
self.details = details
#: All modeling steps (:class:`Step` objects)
self.steps = []
#: All analyses (:class:`~ihm.analysis.Analysis` objects)
self.analyses = []
python-ihm-2.7/ihm/reader.py 0000664 0000000 0000000 00000515550 15035733372 0016072 0 ustar 00root root 0000000 0000000 """Utility classes to read in information in mmCIF or BinaryCIF format"""
import ihm.format
import ihm.format_bcif
import ihm.location
import ihm.dataset
import ihm.representation
import ihm.reference
import ihm.startmodel
import ihm.protocol
import ihm.analysis
import ihm.model
import ihm.restraint
import ihm.geometry
import ihm.source
import ihm.cross_linkers
import ihm.multi_state_scheme
import ihm.flr
import inspect
import warnings
import collections
from . import util
try:
from . import _format
except ImportError:
_format = None
class OldFileError(Exception):
"""Exception raised if a file conforms to too old a version of the
IHM extension dictionary. See :func:`read`."""
pass
def _make_new_entity():
"""Make a new Entity object"""
e = ihm.Entity([])
# make sequence mutable (see also SystemReader.finalize)
e.sequence = list(e.sequence)
# disable residue range checks during file read (see also
# _finalize_entities)
e._range_check = False
return e
def _finalize_entities(system):
"""Finalize all Entities in the given System.
This is done here and not in SystemReader.finalize so that it happens
both for python-ihm and for python-modelcif; it is also not done in
_EntityHandler.finalize as we want to be sure all other finalization
is done first."""
for e in system.entities:
e._range_check = True
def _get_vector3(d, key):
"""Return a 3D vector (as a list) from d[key+[1..3]]
or leave as is if None or ihm.unknown"""
if d[key + '1'] in (None, ihm.unknown):
return d[key + '1']
else:
# Assume if one element is present, all are
return [float(d[key + "%d" % k]) for k in (1, 2, 3)]
def _get_matrix33(d, key):
"""Return a 3x3 matrix (as a list of lists) from d[key+[1..3][1..3]]]
or leave as is if None or ihm.unknown"""
if d[key + '11'] in (None, ihm.unknown):
return d[key + '11']
else:
# Assume if one element is present, all are
return [[float(d[key + "%d%d" % (i, j)]) for j in (1, 2, 3)]
for i in (1, 2, 3)]
class IDMapper:
"""Utility class to handle mapping from mmCIF IDs to Python objects.
:param list system_list: The list in :class:`ihm.System` that keeps
track of these objects.
:param class cls: The base class for the Python objects.
"""
# The attribute in the class used to store the ID
id_attr = '_id'
def __init__(self, system_list, cls, *cls_args, **cls_keys):
self.system_list = system_list
self._obj_by_id = {}
self._cls = cls
self._cls_args = cls_args
self._cls_keys = cls_keys
# Fill in any existing IDs if available, so that we can add objects
# to an existing system
# todo: handle objects where system_list is None
# e.g. some handlers use FLRListAdapter, which doesn't
# support iteration
if system_list and hasattr(system_list, '__iter__'):
for obj in system_list:
self._obj_by_id[getattr(obj, self.id_attr)] = obj
def get_all(self):
"""Yield all objects seen so far (unordered)"""
return self._obj_by_id.values()
def _make_new_object(self, newcls=None):
if newcls is None:
newcls = self._cls
return newcls(*self._cls_args, **self._cls_keys)
def _update_old_object(self, obj, newcls=None):
# If this object was referenced by another table before it was
# created, it may have the wrong class - fix that retroactively
# (need to be careful that old and new classes are compatible)
if newcls:
obj.__class__ = newcls
def get_by_id(self, objid, newcls=None):
"""Get the object with given ID, creating it if it doesn't already
exist. If `newcls` is specified, the object will be an instance
of that class (this is commonly used when different subclasses
are employed depending on a type specified in the mmCIF file, such
as the various subclasses of :class:`ihm.dataset.Dataset`)."""
if objid in self._obj_by_id:
obj = self._obj_by_id[objid]
self._update_old_object(obj, newcls)
return obj
else:
newobj = self._make_new_object(newcls)
self._set_object_id(newobj, objid)
self._obj_by_id[objid] = newobj
if self.system_list is not None:
self.system_list.append(newobj)
return newobj
def _set_object_id(self, obj, objid):
if self.id_attr is not None:
setattr(obj, self.id_attr, objid)
def get_by_id_or_none(self, objid, newcls=None):
"""Get the object with given ID, creating it if it doesn't already
exist. If ID is None or ihm.unknown, return None instead."""
return (None if objid in (None, ihm.unknown)
else self.get_by_id(objid, newcls))
class _ChemCompIDMapper(IDMapper):
"""Add extra handling to IDMapper for the chem_comp category"""
id_attr = 'id'
def __init__(self, *args, **keys):
super().__init__(*args, **keys)
# get standard residue types
alphabets = [x[1] for x in inspect.getmembers(ihm, inspect.isclass)
if issubclass(x[1], ihm.Alphabet)
and x[1] is not ihm.Alphabet]
self._standard_by_id = {}
for alphabet in alphabets:
self._standard_by_id.update((item[1].id, item[1])
for item in alphabet._comps.items())
def get_by_id(self, objid, newcls=None):
# Don't modify class of standard residue types
if objid in self._standard_by_id:
obj = self._standard_by_id[objid]
if objid not in self._obj_by_id:
self._obj_by_id[objid] = obj
self.system_list.append(obj)
return obj
else:
# Assign nonpolymer class based on the ID
if newcls is ihm.NonPolymerChemComp or newcls is ihm.WaterChemComp:
newcls = (ihm.WaterChemComp if objid == 'HOH'
else ihm.NonPolymerChemComp)
return super().get_by_id(objid, newcls)
def _make_new_object(self, newcls=None):
if newcls is None:
newcls = self._cls
if newcls is ihm.NonPolymerChemComp:
return newcls(None)
elif newcls is ihm.WaterChemComp:
return newcls()
else:
return newcls(*self._cls_args, **self._cls_keys)
class RangeIDMapper:
"""Utility class to handle mapping from mmCIF IDs to
:class:`ihm.AsymUnitRange` or :class:`ihm.EntityRange` objects."""
def __init__(self):
self._id_map = {}
def set(self, range_id, seq_id_begin, seq_id_end):
"""Add a range.
:param str range_id: mmCIF ID
:param int seq_id_begin: Index of the start of the range
:param int seq_id_end: Index of the end of the range
"""
self._id_map[range_id] = (seq_id_begin, seq_id_end)
def get(self, asym_or_entity, range_id):
"""Get a range from an ID.
:param asym_or_entity: An :class:`ihm.Entity` or
:class:`ihm.AsymUnit` object representing the part of
the system to which the range will be applied.
:param str range_id: mmCIF ID
:return: A range as a :class:`ihm.Entity`, :class:`ihm.AsymUnit`,
:class:`ihm.EntityRange` or :class:`ihm.AsymUnitRange`
object.
"""
# range_id can be None if the entire asym/entity should be selected
# (e.g. for a non-polymer)
if range_id is None:
return asym_or_entity
else:
# Allow reading out-of-range ranges
return asym_or_entity(*self._id_map[range_id])
class _AnalysisIDMapper(IDMapper):
"""Add extra handling to IDMapper for Analysis objects"""
def _set_object_id(self, obj, objid):
# Analysis objects are referenced by (protocol_id, analysis_id) but
# we only want to store analysis_id in the Analysis object itself
if self.id_attr is not None:
setattr(obj, self.id_attr, objid[1])
class _AnalysisStepIDMapper(IDMapper):
"""Add extra handling to IDMapper for the post processing category"""
def _make_new_object(self, newcls=None):
if newcls is None:
newcls = self._cls
if newcls is ihm.analysis.EmptyStep:
return newcls()
else:
return newcls(*self._cls_args, **self._cls_keys)
class _FeatureIDMapper(IDMapper):
"""Add extra handling to IDMapper for restraint features"""
def _make_new_object(self, newcls=None):
if newcls is None:
# Make Feature base class (takes no args)
return self._cls()
elif newcls is ihm.restraint.PseudoSiteFeature:
# Pseudo site constructor needs "site" argument
return newcls(None)
else:
# Make subclass (takes one ranges/atoms argument)
return newcls([])
def _update_old_object(self, obj, newcls=None):
super()._update_old_object(obj, newcls)
# Add missing members if the base class was originally instantianted
if (newcls is ihm.restraint.ResidueFeature
and not hasattr(obj, 'ranges')):
obj.ranges = []
elif (newcls is ihm.restraint.AtomFeature
and not hasattr(obj, 'atoms')):
obj.atoms = []
elif (newcls is ihm.restraint.NonPolyFeature
and not hasattr(obj, 'objs')):
obj.objs = []
elif (newcls is ihm.restraint.PseudoSiteFeature
and not hasattr(obj, 'site')):
obj.site = None
class _GeometryIDMapper(IDMapper):
"""Add extra handling to IDMapper for geometric objects"""
_members = {ihm.geometry.Sphere: ('center', 'radius', 'transformation'),
ihm.geometry.Torus: ('center', 'transformation',
'major_radius', 'minor_radius'),
ihm.geometry.HalfTorus: ('center', 'transformation',
'major_radius', 'minor_radius',
'thickness'),
ihm.geometry.XAxis: ('transformation',),
ihm.geometry.YAxis: ('transformation',),
ihm.geometry.ZAxis: ('transformation',),
ihm.geometry.XYPlane: ('transformation',),
ihm.geometry.YZPlane: ('transformation',),
ihm.geometry.XZPlane: ('transformation',)}
def _make_new_object(self, newcls=None):
if newcls is None:
# Make GeometricObject base class (takes no args)
return self._cls()
else:
# Make subclass (takes variable number of args)
len_args = {ihm.geometry.Sphere: 2,
ihm.geometry.Torus: 3,
ihm.geometry.HalfTorus: 4}.get(newcls, 0)
return newcls(*(None,) * len_args)
def _update_old_object(self, obj, newcls=None):
# Don't revert a HalfTorus back to a Torus
if newcls is ihm.geometry.Torus \
and isinstance(obj, ihm.geometry.HalfTorus):
return
# Don't revert a derived class back to a base class
elif newcls and isinstance(obj, newcls):
return
super()._update_old_object(obj, newcls)
# Add missing members if the base class was originally instantianted
for member in self._members.get(newcls, ()):
if not hasattr(obj, member):
setattr(obj, member, None)
class _CrossLinkIDMapper(IDMapper):
"""Add extra handling to IDMapper for cross links"""
def _make_new_object(self, newcls=None):
if newcls is None:
# Make base class (takes no args)
obj = self._cls()
# Need fits in case we never decide on a type
obj.fits = {}
return obj
elif newcls is ihm.restraint.AtomCrossLink:
return newcls(*(None,) * 6)
else:
return newcls(*(None,) * 4)
class _ReferenceIDMapper(IDMapper):
"""Add extra handling to IDMapper for ihm.reference.Reference objects"""
def _make_new_object(self, newcls=None):
if newcls is None or newcls is ihm.reference.Sequence:
return self._cls(*(None,) * 4)
else:
return newcls(*(None,) * 3)
class _FLRListAdapter:
"""Take objects from IDMapper and place them in objects in FLRData."""
def __init__(self, collection_dict, collection_list, flr_data):
self.collection_dict, self.flr_data = collection_dict, flr_data
self.collection_list = collection_list
def append(self, obj):
# We generally only have a single FLRData object, id=1
d = self.flr_data.get_by_id(1)
# Store in collection dict (by ID) in FLRData rather than a
# list in System
collection_dict = getattr(d, self.collection_dict)
collection_dict[obj._id] = obj
# Also store in list in FLRData if applicable
if self.collection_list is not None:
collection_list = getattr(d, self.collection_list)
collection_list.append(obj)
class _FLRIDMapper(IDMapper):
"""Handle mapping from mmCIF IDs to FLR Python objects.
This differs from the base IDMapper class in that created objects
are stored in the FLRData object, not in the System."""
def __init__(self, collection_dict, collection_list, flr_data, cls,
*args, **keys):
system_list = _FLRListAdapter(collection_dict, collection_list,
flr_data)
super().__init__(system_list, cls, *args, **keys)
class _DatasetAssemblyIDMapper:
"""Handle mapping from mmCIF dataset IDs to Python objects.
This is similar to IDMapper but is intended for objects like restraints
that don't have their own IDs but instead use the dataset ID.
:param list system_list: The list in :class:`ihm.System` that keeps
track of these objects.
:param datasets: Mapping from IDs to Dataset objects.
:param class cls: The base class for the Python objects. Its constructor
is expected to take a Dataset object as the first argument.
"""
def __init__(self, system_list, datasets, cls, *cls_args, **cls_keys):
self.system_list = system_list
self.datasets = datasets
self._obj_by_id = {}
self._cls = cls
self._cls_args = cls_args
self._cls_keys = cls_keys
def get_by_dataset(self, dataset_id, assembly_id):
dataset = self.datasets.get_by_id(dataset_id)
k = (dataset._id, assembly_id)
if k not in self._obj_by_id:
r = self._cls(dataset, *self._cls_args, **self._cls_keys)
self.system_list.append(r)
self._obj_by_id[k] = r
else:
r = self._obj_by_id[k]
return r
class _XLRestraintMapper:
"""Map entries to CrossLinkRestraint"""
def __init__(self, system_list):
self.system_list = system_list
self._seen_rsrs = {}
def get_by_attrs(self, dataset, linker):
"""Group all crosslinks with same dataset and linker in one
CrossLinkRestraint object"""
k = (dataset._id, linker)
if k not in self._seen_rsrs:
r = ihm.restraint.CrossLinkRestraint(dataset, linker)
self.system_list.append(r)
self._seen_rsrs[k] = r
return self._seen_rsrs[k]
def get_all(self):
"""Yield all objects seen so far (unordered)"""
return self._seen_rsrs.values()
class SystemReader:
"""Utility class to track global information for a :class:`ihm.System`
being read from a file, such as the mapping from IDs to objects
(as :class:`IDMapper` objects). This can be used by :class:`Handler`
subclasses."""
def __init__(self, model_class, starting_model_class, system=None):
#: The :class:`ihm.System` object being read in
self.system = system or ihm.System()
#: Mapping from ID to :class:`ihm.Software` objects
self.software = IDMapper(self.system.software, ihm.Software,
*(None,) * 4)
#: Mapping from ID to :class:`ihm.Citation` objects
self.citations = IDMapper(self.system.citations, ihm.Citation,
*(None,) * 8)
#: Mapping from ID to :class:`ihm.Revision` objects
self.revisions = IDMapper(self.system.revisions, ihm.Revision,
*(None,) * 4)
#: Mapping from ID to :class:`ihm.Entity` objects
self.entities = IDMapper(self.system.entities, _make_new_entity)
#: Mapping from ID to :class:`ihm.source.Manipulated` objects
self.src_gens = IDMapper(None, ihm.source.Manipulated)
#: Mapping from ID to :class:`ihm.source.Natural` objects
self.src_nats = IDMapper(None, ihm.source.Natural)
#: Mapping from ID to :class:`ihm.source.Synthetic` objects
self.src_syns = IDMapper(None, ihm.source.Synthetic)
#: Mapping from ID to :class:`ihm.AsymUnit` objects
self.asym_units = IDMapper(self.system.asym_units, ihm.AsymUnit, None)
#: Mapping from ID to :class:`ihm.ChemComp` objects
self.chem_comps = _ChemCompIDMapper(self.system._orphan_chem_comps,
ihm.ChemComp, *(None,) * 3)
#: Mapping from ID to :class:`ihm.reference.Alignment` objects
self.alignments = IDMapper(None, ihm.reference.Alignment)
#: Mapping from ID to :class:`ihm.reference.Reference` objects
self.references = _ReferenceIDMapper(None, ihm.reference.Sequence)
#: Mapping from ID to :class:`ihm.ChemDescriptor` objects
self.chem_descriptors = IDMapper(self.system.orphan_chem_descriptors,
ihm.ChemDescriptor, None)
#: Mapping from ID to :class:`ihm.Assembly` objects
self.assemblies = IDMapper(self.system.orphan_assemblies, ihm.Assembly)
#: Mapping from ID to :class:`ihm.AsymUnitRange`
#: or :class:`ihm.EntityRange` objects
self.ranges = RangeIDMapper()
#: Mapping from ID to :class:`ihm.location.Repository` objects
self.repos = IDMapper(self.system._orphan_repos,
ihm.location.Repository, None)
#: Mapping from ID to :class:`ihm.location.FileLocation` objects
self.external_files = IDMapper(self.system.locations,
ihm.location.FileLocation,
'/') # should always exist?
#: Mapping from ID to :class:`ihm.location.DatabaseLocation` objects
self.db_locations = IDMapper(self.system.locations,
ihm.location.DatabaseLocation, None, None)
#: Mapping from ID to :class:`ihm.dataset.Dataset` objects
self.datasets = IDMapper(self.system.orphan_datasets,
ihm.dataset.Dataset, None)
#: Mapping from ID to :class:`ihm.dataset.DatasetGroup` objects
self.dataset_groups = IDMapper(self.system.orphan_dataset_groups,
ihm.dataset.DatasetGroup)
#: Mapping from ID to :class:`ihm.startmodel.StartingModel` objects
self.starting_models = IDMapper(self.system.orphan_starting_models,
starting_model_class, *(None,) * 3)
#: Mapping from ID to :class:`ihm.representation.Representation`
#: objects
self.representations = IDMapper(self.system.orphan_representations,
ihm.representation.Representation)
#: Mapping from ID to :class:`ihm.protocol.Protocol` objects
self.protocols = IDMapper(self.system.orphan_protocols,
ihm.protocol.Protocol)
#: Mapping from ID to :class:`ihm.analysis.Step` objects
self.analysis_steps = _AnalysisStepIDMapper(None, ihm.analysis.Step,
*(None,) * 3)
#: Mapping from ID to :class:`ihm.analysis.Analysis` objects
self.analyses = _AnalysisIDMapper(None, ihm.analysis.Analysis)
#: Mapping from ID to :class:`ihm.model.Model` objects
self.models = IDMapper(None, model_class, *(None,) * 3)
#: Mapping from ID to :class:`ihm.model.ModelGroup` objects
self.model_groups = IDMapper(None, ihm.model.ModelGroup)
#: Mapping from ID to :class:`ihm.model.State` objects
self.states = IDMapper(None, ihm.model.State)
#: Mapping from ID to :class:`ihm.model.StateGroup` objects
self.state_groups = IDMapper(self.system.state_groups,
ihm.model.StateGroup)
#: Mapping from ID to :class:`ihm.model.Ensemble` objects
self.ensembles = IDMapper(self.system.ensembles,
ihm.model.Ensemble, *(None,) * 2)
#: Mapping from ID to :class:`ihm.model.LocalizationDensity` objects
self.densities = IDMapper(None,
ihm.model.LocalizationDensity, *(None,) * 2)
#: Mapping from ID to :class:`ihm.restraint.EM3DRestraint` objects
self.em3d_restraints = _DatasetAssemblyIDMapper(
self.system.restraints, self.datasets,
ihm.restraint.EM3DRestraint, None)
#: Mapping from ID to :class:`ihm.restraint.EM2DRestraint` objects
self.em2d_restraints = IDMapper(self.system.restraints,
ihm.restraint.EM2DRestraint,
*(None,) * 2)
#: Mapping from ID to :class:`ihm.restraint.SASRestraint` objects
self.sas_restraints = _DatasetAssemblyIDMapper(
self.system.restraints, self.datasets,
ihm.restraint.SASRestraint, None)
#: Mapping from ID to :class:`ihm.restraint.Feature` objects
self.features = _FeatureIDMapper(self.system.orphan_features,
ihm.restraint.Feature)
#: Mapping from ID to :class:`ihm.restraint.PseudoSite` objects
self.pseudo_sites = IDMapper(self.system.orphan_pseudo_sites,
ihm.restraint.PseudoSite, *(None,) * 3)
#: Mapping from ID to :class:`ihm.restraint.DerivedDistanceRestraint`
#: objects
self.dist_restraints = IDMapper(
self.system.restraints, ihm.restraint.DerivedDistanceRestraint,
*(None,) * 4)
#: Mapping from ID to :class:`ihm.restraint.HDXRestraint` objects
self.hdx_restraints = IDMapper(
self.system.restraints, ihm.restraint.HDXRestraint,
*(None,) * 2)
#: Mapping from ID to :class:`ihm.restraint.PredictedContactRestraint`
#: objects
self.pred_cont_restraints = IDMapper(
self.system.restraints, ihm.restraint.PredictedContactRestraint,
*(None,) * 5)
#: Mapping from ID to :class:`ihm.restraint.RestraintGroup` of
#: :class:`ihm.restraint.DerivedDistanceRestraint` objects
self.dist_restraint_groups = IDMapper(
self.system.restraint_groups, ihm.restraint.RestraintGroup)
#: Mapping from ID to :class:`ihm.restraint.RestraintGroup` of
#: :class:`ihm.restraint.PredictedContactRestraint` objects
self.pred_cont_restraint_groups = IDMapper(
self.system.restraint_groups, ihm.restraint.RestraintGroup)
#: Mapping from ID to :class:`ihm.geometry.GeometricObject` objects
self.geometries = _GeometryIDMapper(
self.system.orphan_geometric_objects, ihm.geometry.GeometricObject)
#: Mapping from ID to :class:`ihm.geometry.Center` objects
self.centers = IDMapper(self.system._orphan_centers,
ihm.geometry.Center, *(None,) * 3)
#: Mapping from ID to :class:`ihm.geometry.Transformation` objects
self.transformations = IDMapper(
self.system._orphan_geometric_transforms,
ihm.geometry.Transformation, *(None,) * 2)
#: Mapping from ID to :class:`ihm.geometry.Transformation` objects
#: used by :class:`ihm.dataset.TransformedDataset` objects (this is
#: distinct from :attr:`transformations` since they are stored in
#: separate tables, with different IDs, in the mmCIF file).
self.data_transformations = IDMapper(
self.system._orphan_dataset_transforms,
ihm.geometry.Transformation, *(None,) * 2)
#: Mapping from ID to :class:`ihm.restraint.GeometricRestraint` objects
self.geom_restraints = IDMapper(
self.system.restraints, ihm.restraint.GeometricRestraint,
*(None,) * 4)
#: Mapping from ID to :class:`ihm.restraint.CrossLinkRestraint` objects
self.xl_restraints = _XLRestraintMapper(self.system.restraints)
#: Mapping from ID to groups of
#: :class:`ihm.restraint.ExperimentalCrossLink` objects
self.experimental_xl_groups = IDMapper(None, list)
self.experimental_xl_groups.id_attr = None
#: Mapping from ID to :class:`ihm.restraint.ExperimentalCrossLink`
#: objects
self.experimental_xls = IDMapper(
None, ihm.restraint.ExperimentalCrossLink, *(None,) * 2)
#: Mapping from ID to :class:`ihm.restraint.CrossLink`
self.cross_links = _CrossLinkIDMapper(
None, ihm.restraint.CrossLink)
#: Mapping from ID to :class:`ihm.restraint.CrossLinkPseudoSite`
self.cross_link_pseudo_sites = IDMapper(
None, ihm.restraint.CrossLinkPseudoSite, None)
#: Mapping from ID to :class:`ihm.model.OrderedProcess` objects
self.ordered_procs = IDMapper(self.system.ordered_processes,
ihm.model.OrderedProcess, None)
#: Mapping from ID to :class:`ihm.model.ProcessStep` objects
self.ordered_steps = IDMapper(None, ihm.model.ProcessStep)
#: Mapping from ID to :class:`ihm.multi_state_scheme.MultiStateScheme`
#: objects
self.multi_state_schemes = IDMapper(
self.system.multi_state_schemes,
ihm.multi_state_scheme.MultiStateScheme,
None)
#: Mapping from ID to
#: :class:`ihm.multi_state_scheme.Connectivity` objects
self.multi_state_scheme_connectivities = IDMapper(
None,
ihm.multi_state_scheme.Connectivity,
None)
#: Mapping from ID to :class:`ihm.multi_state_scheme.KineticRate`
#: objects
self.kinetic_rates = IDMapper(
None,
ihm.multi_state_scheme.KineticRate)
#: Mapping from ID to
#: :class:`ihm.multi_state_scheme.RelaxationTime` objects
self.relaxation_times = IDMapper(
self.system._orphan_relaxation_times,
ihm.multi_state_scheme.RelaxationTime,
*(None,) * 2)
# FLR part
#: Mapping from ID to :class:`ihm.flr.FLRData` objects
self.flr_data = IDMapper(self.system.flr_data, ihm.flr.FLRData)
#: Mapping from ID to :class:`ihm.flr.InstSetting` objects
self.flr_inst_settings = _FLRIDMapper('_collection_flr_inst_setting',
None, self.flr_data,
ihm.flr.InstSetting)
#: Mapping from ID to :class:`ihm.flr.ExpCondition` objects
self.flr_exp_conditions = _FLRIDMapper('_collection_flr_exp_condition',
None, self.flr_data,
ihm.flr.ExpCondition)
#: Mapping from ID to :class:`ihm.flr.Instrument` objects
self.flr_instruments = _FLRIDMapper('_collection_flr_instrument',
None, self.flr_data,
ihm.flr.Instrument)
#: Mapping from ID to :class:`ihm.flr.EntityAssembly` objects
self.flr_entity_assemblies = _FLRIDMapper(
'_collection_flr_entity_assembly', None, self.flr_data,
ihm.flr.EntityAssembly)
#: Mapping from ID to :class:`ihm.flr.SampleCondition` objects
self.flr_sample_conditions = _FLRIDMapper(
'_collection_flr_sample_condition', None, self.flr_data,
ihm.flr.SampleCondition)
#: Mapping from ID to :class:`ihm.flr.Sample` objects
self.flr_samples = _FLRIDMapper('_collection_flr_sample', None,
self.flr_data, ihm.flr.Sample,
*(None,) * 6)
#: Mapping from ID to :class:`ihm.flr.Experiment` objects
self.flr_experiments = _FLRIDMapper('_collection_flr_experiment', None,
self.flr_data, ihm.flr.Experiment)
#: Mapping from ID to :class:`ihm.flr.Probe` objects
self.flr_probes = _FLRIDMapper('_collection_flr_probe', None,
self.flr_data, ihm.flr.Probe)
#: Mapping from ID to :class:`ihm.flr.PolyProbePosition` objects
self.flr_poly_probe_positions = _FLRIDMapper(
'_collection_flr_poly_probe_position', None, self.flr_data,
ihm.flr.PolyProbePosition, None)
#: Mapping from ID to :class:`ihm.flr.SampleProbeDetails` objects
self.flr_sample_probe_details = _FLRIDMapper(
'_collection_flr_sample_probe_details', None, self.flr_data,
ihm.flr.SampleProbeDetails, *(None,) * 5)
#: Mapping from ID to :class:`ihm.flr.PolyProbeConjugate` objects
self.flr_poly_probe_conjugates = _FLRIDMapper(
'_collection_flr_poly_probe_conjugate', 'poly_probe_conjugates',
self.flr_data, ihm.flr.PolyProbeConjugate, *(None,) * 4)
#: Mapping from ID to :class:`ihm.flr.FRETForsterRadius` objects
self.flr_fret_forster_radius = _FLRIDMapper(
'_collection_flr_fret_forster_radius', None, self.flr_data,
ihm.flr.FRETForsterRadius, *(None,) * 4)
#: Mapping from ID to :class:`ihm.flr.FRETCalibrationParameters`
#: objects
self.flr_fret_calibration_parameters = _FLRIDMapper(
'_collection_flr_fret_calibration_parameters', None, self.flr_data,
ihm.flr.FRETCalibrationParameters, *(None,) * 8)
#: Mapping from ID to :class:`ihm.flr.FRETAnalysis` objects
self.flr_fret_analyses = _FLRIDMapper(
'_collection_flr_fret_analysis', None, self.flr_data,
ihm.flr.FRETAnalysis, *(None,) * 9)
#: Mapping from ID to :class:`ihm.flr.LifetimeFitModel` objects
self.flr_lifetime_fit_models = _FLRIDMapper(
'_collection_flr_lifetime_fit_model', None, self.flr_data,
ihm.flr.LifetimeFitModel, *(None,) * 4)
#: Mapping from ID to :class:`ihm.flr.RefMeasurementGroup` objects
self.flr_ref_measurement_groups = _FLRIDMapper(
'_collection_flr_ref_measurement_group', None, self.flr_data,
ihm.flr.RefMeasurementGroup, *(None,))
#: Mapping from ID to :class:`ihm.flr.RefMeasurement` objects
self.flr_ref_measurements = _FLRIDMapper(
'_collection_flr_ref_measurement', None, self.flr_data,
ihm.flr.RefMeasurement, *(None,) * 3)
#: Mapping from ID to :class:`ihm.flr.RefMeasurementLifetime` objects
self.flr_ref_measurement_lifetimes = _FLRIDMapper(
'_collection_flr_ref_measurement_lifetime', None, self.flr_data,
ihm.flr.RefMeasurementLifetime, *(None,) * 3)
#: Mapping from ID to :class:`ihm.flr.PeakAssignment` objects
self.flr_peak_assignments = _FLRIDMapper(
'_collection_flr_peak_assignment', None,
self.flr_data, ihm.flr.PeakAssignment, *(None,) * 2)
#: Mapping from ID to :class:`ihm.flr.FRETDistanceRestraint` objects
self.flr_fret_distance_restraints = _FLRIDMapper(
'_collection_flr_fret_distance_restraint', None,
self.flr_data, ihm.flr.FRETDistanceRestraint, *(None,) * 10)
#: Mapping from ID to :class:`ihm.flr.FRETDistanceRestraintGroup`
#: objects
self.flr_fret_distance_restraint_groups = _FLRIDMapper(
'_collection_flr_fret_distance_restraint_group',
'distance_restraint_groups', self.flr_data,
ihm.flr.FRETDistanceRestraintGroup)
#: Mapping from ID to :class:`ihm.flr.FRETModelQuality` objects
self.flr_fret_model_qualities = _FLRIDMapper(
'_collection_flr_fret_model_quality', 'fret_model_qualities',
self.flr_data, ihm.flr.FRETModelQuality, *(None,) * 5)
#: Mapping from ID to :class:`ihm.flr.FRETModelDistance` objects
self.flr_fret_model_distances = _FLRIDMapper(
'_collection_flr_fret_model_distance', 'fret_model_distances',
self.flr_data, ihm.flr.FRETModelDistance, *(None,) * 4)
#: Mapping from ID to :class:`ihm.flr.FPSModeling` objects
self.flr_fps_modeling = _FLRIDMapper(
'_collection_flr_fps_modeling', None, self.flr_data,
ihm.flr.FPSModeling, *(None,) * 5)
#: Mapping from ID to :class:`ihm.flr.FPSGlobalParameters` objects
self.flr_fps_global_parameters = _FLRIDMapper(
'_collection_flr_fps_global_parameters', None,
self.flr_data, ihm.flr.FPSGlobalParameters, *(None,) * 20)
#: Mapping from ID to :class:`ihm.flr.FPSAVParameter` objects
self.flr_fps_av_parameters = _FLRIDMapper(
'_collection_flr_fps_av_parameter', None,
self.flr_data, ihm.flr.FPSAVParameter, *(None,) * 6)
#: Mapping from ID to :class:`ihm.flr.FPSAVModeling` objects
self.flr_fps_av_modeling = _FLRIDMapper(
'_collection_flr_fps_av_modeling', 'fps_modeling',
self.flr_data, ihm.flr.FPSAVModeling, *(None,) * 3)
#: Mapping from ID to :class:`ihm.flr.FPSMeanProbePosition` objects
self.flr_fps_mean_probe_positions = _FLRIDMapper(
'_collection_flr_fps_mean_probe_position', None,
self.flr_data, ihm.flr.FPSMeanProbePosition, *(None,) * 4)
#: Mapping from ID to :class:`ihm.flr.FPSMPPAtomPositionGroup` objects
self.flr_fps_mpp_atom_position_groups = IDMapper(
None, ihm.flr.FPSMPPAtomPositionGroup)
#: Mapping from ID to :class:`ihm.flr.FPSMPPAtomPosition` objects
self.flr_fps_mpp_atom_positions = _FLRIDMapper(
'_collection_flr_fps_mpp_atom_position', None,
self.flr_data, ihm.flr.FPSMPPAtomPosition, *(None,) * 4)
#: Mapping from ID to :class:`ihm.flr.FPSMPPModeling` objects
self.flr_fps_mpp_modeling = _FLRIDMapper(
'_collection_flr_fps_mpp_modeling', 'fps_modeling',
self.flr_data, ihm.flr.FPSMPPModeling, *(None,) * 3)
#: Mapping from ID to
#: :class:`ihm.flr.KineticRateFretAnalysisConnection` objects
self.flr_kinetic_rate_fret_analysis_connection = _FLRIDMapper(
'_collection_flr_kinetic_rate_fret_analysis_connection',
'kinetic_rate_fret_analysis_connections',
self.flr_data,
ihm.flr.KineticRateFretAnalysisConnection,
*(None,) * 3)
#: Mapping from ID to
#: :class:`ihm.flr.RelaxationTimeFretAnalysisConnection` objects
self.flr_relaxation_time_fret_analysis_connection = _FLRIDMapper(
'_collection_flr_relaxation_time_fret_analysis_connection',
'relaxation_time_fret_analysis_connections',
self.flr_data,
ihm.flr.RelaxationTimeFretAnalysisConnection,
*(None,) * 3)
def finalize(self):
# make sequence immutable (see also _make_new_entity)
for e in self.system.entities:
e.sequence = tuple(e.sequence)
class Handler:
"""Base class for all handlers of mmCIF data.
Each class handles a single category in the mmCIF or BinaryCIF file.
To add a new handler (for example to handle a custom category)
make a subclass and set the class attribute
`category` to the mmCIF category name (e.g. `_struct`). Provide
a `__call__` method. This will be called for each category (multiple
times for loop constructs) with the parameters to `__call__` filled in
with the same-named mmCIF keywords. For example the class::
class CustomHandler(Handler):
category = "_custom"
def __call__(self, key1, key2: int, key3: float):
pass
will be called with arguments `"x", 42, 1.0` when given the
mmCIF input::
_custom.key1 x
_custom.key2 42
_custom.key3 1.0
By default, the arguments will be passed as strings. Type annotations
(as above) can be used to get arguments as integers, floating-point
values, or booleans, using the annotations `int`, `float`, or `bool`
respectively (no other type annotations are permitted).
"""
#: Value passed to `__call__` for keywords not in the file
not_in_file = None
#: Value passed to `__call__` for data marked as omitted ('.') in the file
omitted = None
#: Value passed to `__call__` for data marked as unknown ('?') in the file
unknown = ihm.unknown
#: Keywords which are explicitly ignored (read() will not warn about their
#: presence in the file). These are usually things like ordinal fields
#: which we don't use.
ignored_keywords = []
def __init__(self, sysr):
#: Utility class to map IDs to Python objects.
self.sysr = sysr
def get_int(self, val):
"""Return int(val) or leave as is if None or ihm.unknown"""
return int(val) if val is not None and val is not ihm.unknown else val
def get_int_or_string(self, val):
"""Return val as an int or str as appropriate,
or leave as is if None or ihm.unknown"""
if val is None or val is ihm.unknown:
return val
else:
return int(val) if isinstance(val, int) or val.isdigit() else val
def get_float(self, val):
"""Return float(val) or leave as is if None or ihm.unknown"""
return (float(val) if val is not None
and val is not ihm.unknown else val)
_boolmap = {'YES': True, 'NO': False}
def get_bool(self, val):
"""Convert val to bool and return, or leave as is if None
or ihm.unknown"""
return (self._boolmap.get(val.upper(), None)
if val is not None and val is not ihm.unknown else val)
def get_lower(self, val):
"""Return lowercase string val or leave as is if None or ihm.unknown"""
return (val.lower()
if val is not None and val is not ihm.unknown else val)
def finalize(self):
"""Called at the end of each data block."""
pass
def end_save_frame(self):
"""Called at the end of each save frame."""
pass
def _get_asym_or_entity(self, asym_id, entity_id):
"""Return an :class:`AsymUnit`, or an :class:`Entity`
if asym_id is omitted"""
asym = self.sysr.asym_units.get_by_id_or_none(asym_id)
return asym if asym else self.sysr.entities.get_by_id(entity_id)
def copy_if_present(self, obj, data, keys=[], mapkeys={}):
"""Set obj.x from data['x'] for each x in keys if present in data.
The dict mapkeys is handled similarly except that its keys are
looked up in data and the corresponding value used to set obj."""
for key in keys:
d = data.get(key)
if d is not None:
setattr(obj, key, d)
for key, val in mapkeys.items():
d = data.get(key)
if d is not None:
setattr(obj, val, d)
system = property(lambda self: self.sysr.system,
doc="The :class:`ihm.System` object to read into")
class _CollectionHandler(Handler):
category = '_ihm_entry_collection'
def __call__(self, id, name, details):
c = ihm.Collection(id=id, name=name, details=details)
self.system.collections.append(c)
class _StructHandler(Handler):
category = '_struct'
def __call__(self, title, entry_id, pdbx_model_details):
self.copy_if_present(self.system, locals(), keys=('title',),
mapkeys={'entry_id': 'id',
'pdbx_model_details': 'model_details'})
class _AuditConformHandler(Handler):
category = '_audit_conform'
def __call__(self, dict_name, dict_version):
# Reject old file versions if we can parse the version
if dict_name == 'ihm-extension.dic':
try:
major, minor = [int(x) for x in dict_version.split('.')]
if (major, minor) < (1, 0):
raise OldFileError(
"This version of python-ihm only supports reading "
"files that conform to version 1.0 or later of the "
"IHM extension dictionary. This file conforms to "
"version %s." % dict_version)
except ValueError:
pass
class _SoftwareHandler(Handler):
category = '_software'
def __call__(self, pdbx_ordinal, name, classification, description,
version, type, location, citation_id):
s = self.sysr.software.get_by_id(pdbx_ordinal)
self.copy_if_present(
s, locals(),
keys=('name', 'classification', 'description', 'version',
'type', 'location'))
s.citation = self.sysr.citations.get_by_id_or_none(citation_id)
class _CitationHandler(Handler):
category = '_citation'
def __call__(self, id, title, year, pdbx_database_id_pubmed,
journal_abbrev, journal_volume, pdbx_database_id_doi,
page_first, page_last):
s = self.sysr.citations.get_by_id(id)
s.is_primary = (id == 'primary')
self.copy_if_present(
s, locals(), keys=('title', 'year'),
mapkeys={'pdbx_database_id_pubmed': 'pmid',
'journal_abbrev': 'journal',
'journal_volume': 'volume',
'pdbx_database_id_doi': 'doi'})
if page_first is not None:
if page_last is not None:
s.page_range = (page_first, page_last)
else:
s.page_range = page_first
class _AuditAuthorHandler(Handler):
category = '_audit_author'
ignored_keywords = ['pdbx_ordinal']
def __call__(self, name):
self.system.authors.append(name)
class _AuditRevisionHistoryHandler(Handler):
category = '_pdbx_audit_revision_history'
def __call__(self, ordinal, data_content_type, major_revision: int,
minor_revision: int, revision_date):
r = self.sysr.revisions.get_by_id(ordinal)
r.data_content_type = data_content_type
r.major = major_revision
r.minor = minor_revision
r.date = util._get_iso_date(revision_date)
class _AuditRevisionDetailsHandler(Handler):
category = '_pdbx_audit_revision_details'
def __call__(self, revision_ordinal, provider, type, description):
r = self.sysr.revisions.get_by_id(revision_ordinal)
d = ihm.RevisionDetails(provider=provider, type=type,
description=description)
r.details.append(d)
class _AuditRevisionGroupHandler(Handler):
category = '_pdbx_audit_revision_group'
def __call__(self, revision_ordinal, group):
r = self.sysr.revisions.get_by_id(revision_ordinal)
r.groups.append(group)
class _AuditRevisionCategoryHandler(Handler):
category = '_pdbx_audit_revision_category'
def __call__(self, revision_ordinal, category):
r = self.sysr.revisions.get_by_id(revision_ordinal)
r.categories.append(category)
class _AuditRevisionItemHandler(Handler):
category = '_pdbx_audit_revision_item'
def __call__(self, revision_ordinal, item):
r = self.sysr.revisions.get_by_id(revision_ordinal)
r.items.append(item)
class _DataUsageHandler(Handler):
category = '_pdbx_data_usage'
# Map type to corresponding subclass of ihm.DataUsage
_type_map = dict((x[1].type.lower(), x[1])
for x in inspect.getmembers(ihm, inspect.isclass)
if issubclass(x[1], ihm.DataUsage))
def __call__(self, type, name, details, url):
typ = type.lower() if type else 'other'
cls = self._type_map.get(typ, ihm.DataUsage)
self.system.data_usage.append(cls(details=details, name=name, url=url))
class _GrantHandler(Handler):
category = '_pdbx_audit_support'
def __call__(self, funding_organization, country, grant_number):
g = ihm.Grant(funding_organization=funding_organization,
country=country, grant_number=grant_number)
self.system.grants.append(g)
class _CitationAuthorHandler(Handler):
category = '_citation_author'
ignored_keywords = ['ordinal']
def __call__(self, citation_id, name):
s = self.sysr.citations.get_by_id(citation_id)
if name is not None:
s.authors.append(name)
class _DatabaseHandler(Handler):
category = '_database_2'
def __call__(self, database_code, database_id, pdbx_doi,
pdbx_database_accession):
d = ihm.Database(id=database_id, code=database_code,
doi=pdbx_doi, accession=pdbx_database_accession)
self.system.databases.append(d)
class _DatabaseStatusHandler(Handler):
category = '_pdbx_database_status'
# placeholder; the reader will otherwise only return strings or None
not_in_file = 0
_keys = ['entry_id', 'sg_entry', 'author_approval_type',
'author_release_status_code', 'date_author_approval',
'date_author_release_request', 'date_begin_deposition',
'date_begin_processing', 'date_begin_release_preparation',
'date_chemical_shifts', 'date_coordinates',
'date_deposition_form', 'date_end_processing',
'date_hold_chemical_shifts', 'date_hold_coordinates',
'date_hold_nmr_constraints', 'date_hold_struct_fact',
'date_manuscript', 'date_nmr_constraints', 'date_of_pdb_release',
'date_of_cs_release', 'date_of_mr_release', 'date_of_sf_release',
'date_struct_fact', 'date_submitted',
'dep_release_code_chemical_shifts',
'dep_release_code_coordinates',
'dep_release_code_nmr_constraints', 'dep_release_code_sequence',
'dep_release_code_struct_fact', 'deposit_site',
'hold_for_publication', 'methods_development_category',
'name_depositor', 'pdb_date_of_author_approval',
'pdb_format_compatible', 'process_site', 'rcsb_annotator',
'recvd_author_approval', 'recvd_chemical_shifts',
'recvd_coordinates', 'recvd_deposit_form',
'recvd_initial_deposition_date', 'recvd_internal_approval',
'recvd_manuscript', 'recvd_nmr_constraints', 'recvd_struct_fact',
'status_code', 'status_code_cs', 'status_code_mr',
'status_code_sf']
def __call__(self, *args):
# Just pass through all data items present in the file, as a dict
self.system.database_status._map = dict(
(k, v) for (k, v) in zip(self._keys, args)
if v != self.not_in_file)
class _ChemCompHandler(Handler):
category = '_chem_comp'
def __init__(self, *args):
super().__init__(*args)
# Map _chem_comp.type to corresponding subclass of ihm.ChemComp
self.type_map = dict((x[1].type.lower(), x[1])
for x in inspect.getmembers(ihm, inspect.isclass)
if issubclass(x[1], ihm.ChemComp))
def __call__(self, type, id, name, formula):
typ = 'other' if type is None else type.lower()
s = self.sysr.chem_comps.get_by_id(
id, self.type_map.get(typ, ihm.ChemComp))
self.copy_if_present(s, locals(), keys=('name', 'formula'))
class _ChemDescriptorHandler(Handler):
category = '_ihm_chemical_component_descriptor'
def __call__(self, id, auth_name, chemical_name, common_name,
smiles, smiles_canonical, inchi, inchi_key):
d = self.sysr.chem_descriptors.get_by_id(id)
self.copy_if_present(
d, locals(),
keys=('auth_name', 'chemical_name',
'common_name', 'smiles', 'smiles_canonical', 'inchi',
'inchi_key'))
class _EntityHandler(Handler):
category = '_entity'
def __init__(self, *args):
super().__init__(*args)
self.src_map = dict(
(x[1].src_method.lower(), x[1])
for x in inspect.getmembers(ihm.source, inspect.isclass)
if issubclass(x[1], ihm.source.Source)
and x[1] is not ihm.source.Source)
def __call__(self, id, details, type, src_method, formula_weight,
pdbx_description, pdbx_number_of_molecules):
s = self.sysr.entities.get_by_id(id)
self.copy_if_present(
s, locals(), keys=('details',),
mapkeys={'pdbx_description': 'description',
'pdbx_number_of_molecules': 'number_of_molecules'})
if src_method:
source_cls = self.src_map.get(src_method.lower(), None)
if source_cls and s.source is None:
s.source = source_cls()
# Force polymer if _entity.type says so, even if it doesn't look like
# one (e.g. just a single amino acid)
if type and type.lower() == 'polymer':
s._force_polymer = True
# Encourage branched if _entity.type says so (otherwise empty entities
# are assumed to be polymer)
if type and type.lower() == 'branched':
s._hint_branched = True
class _EntitySrcNatHandler(Handler):
category = '_entity_src_nat'
def __call__(self, entity_id, pdbx_src_id, pdbx_ncbi_taxonomy_id,
pdbx_organism_scientific, common_name, strain):
e = self.sysr.entities.get_by_id(entity_id)
s = self.sysr.src_nats.get_by_id(pdbx_src_id)
s.ncbi_taxonomy_id = pdbx_ncbi_taxonomy_id
s.scientific_name = pdbx_organism_scientific
s.common_name = common_name
s.strain = strain
e.source = s
class _EntitySrcSynHandler(Handler):
category = '_pdbx_entity_src_syn'
# Note that _pdbx_entity_src_syn.strain is not used in current PDB entries
def __call__(self, entity_id, pdbx_src_id, ncbi_taxonomy_id,
organism_scientific, organism_common_name):
e = self.sysr.entities.get_by_id(entity_id)
s = self.sysr.src_syns.get_by_id(pdbx_src_id)
s.ncbi_taxonomy_id = ncbi_taxonomy_id
s.scientific_name = organism_scientific
s.common_name = organism_common_name
e.source = s
class _StructRefHandler(Handler):
category = '_struct_ref'
def __init__(self, *args):
super().__init__(*args)
# Map db_name to subclass of ihm.reference.Sequence
self.type_map = dict(
(x[1]._db_name.lower(), x[1])
for x in inspect.getmembers(ihm.reference, inspect.isclass)
if issubclass(x[1], ihm.reference.Sequence)
and x[1] is not ihm.reference.Sequence)
def __call__(self, id, entity_id, db_name, db_code, pdbx_db_accession,
pdbx_seq_one_letter_code, details):
# todo: handle things that aren't sequences
e = self.sysr.entities.get_by_id(entity_id)
typ = self.type_map.get(db_name.lower())
ref = self.sysr.references.get_by_id(id, typ)
# Strip newlines and whitespace from code
if pdbx_seq_one_letter_code not in (None, ihm.unknown):
pdbx_seq_one_letter_code \
= pdbx_seq_one_letter_code.replace('\n', '').replace(' ', '')
self.copy_if_present(
ref, locals(), keys=('db_name', 'db_code', 'details'),
mapkeys={'pdbx_db_accession': 'accession',
'pdbx_seq_one_letter_code': '_partial_sequence'})
e.references.append(ref)
def finalize(self):
# The mmCIF file only contains the subset of the sequence that
# overlaps with our entities, but we need the full sequence. Pad it
# out with gaps if necessary so that indexing works correctly.
for e in self.system.entities:
for r in e.references:
if hasattr(r, '_partial_sequence'):
if r._partial_sequence in (None, ihm.unknown):
r.sequence = r._partial_sequence
else:
db_begin = min(a.db_begin for a in r._get_alignments())
r.sequence = '-' * (db_begin - 1) + r._partial_sequence
del r._partial_sequence
class _StructRefSeqHandler(Handler):
category = '_struct_ref_seq'
def __call__(self, align_id, ref_id, seq_align_beg: int,
seq_align_end: int, db_align_beg: int, db_align_end: int):
ref = self.sysr.references.get_by_id(ref_id)
align = self.sysr.alignments.get_by_id(align_id)
align.db_begin = db_align_beg
align.db_end = db_align_end
align.entity_begin = seq_align_beg
align.entity_end = seq_align_end
ref.alignments.append(align)
class _StructRefSeqDifHandler(Handler):
category = '_struct_ref_seq_dif'
def __call__(self, align_id, seq_num: int, db_mon_id, mon_id, details):
align = self.sysr.alignments.get_by_id(align_id)
db_monomer = self.sysr.chem_comps.get_by_id_or_none(db_mon_id)
monomer = self.sysr.chem_comps.get_by_id_or_none(mon_id)
sd = ihm.reference.SeqDif(seq_num, db_monomer, monomer, details)
align.seq_dif.append(sd)
class _EntitySrcGenHandler(Handler):
category = '_entity_src_gen'
def __call__(self, entity_id, pdbx_src_id, pdbx_gene_src_ncbi_taxonomy_id,
pdbx_gene_src_scientific_name, gene_src_common_name,
gene_src_strain, pdbx_host_org_ncbi_taxonomy_id,
pdbx_host_org_scientific_name, host_org_common_name,
pdbx_host_org_strain):
e = self.sysr.entities.get_by_id(entity_id)
s = self.sysr.src_gens.get_by_id(pdbx_src_id)
s.gene = ihm.source.Details(
ncbi_taxonomy_id=pdbx_gene_src_ncbi_taxonomy_id,
scientific_name=pdbx_gene_src_scientific_name,
common_name=gene_src_common_name, strain=gene_src_strain)
s.host = ihm.source.Details(
ncbi_taxonomy_id=pdbx_host_org_ncbi_taxonomy_id,
scientific_name=pdbx_host_org_scientific_name,
common_name=host_org_common_name,
strain=pdbx_host_org_strain)
e.source = s
class _EntityPolySeqHandler(Handler):
category = '_entity_poly_seq'
def __call__(self, entity_id, num, mon_id):
s = self.sysr.entities.get_by_id(entity_id)
seq_id = int(num)
if seq_id > len(s.sequence):
s.sequence.extend([None] * (seq_id - len(s.sequence)))
s.sequence[seq_id - 1] = self.sysr.chem_comps.get_by_id(mon_id)
class _EntityPolyHandler(Handler):
category = '_entity_poly'
def __init__(self, *args):
super().__init__(*args)
self._entity_info = {}
def __call__(self, entity_id, type, pdbx_seq_one_letter_code,
pdbx_seq_one_letter_code_can):
class EntityInfo:
pass
e = EntityInfo()
e.one_letter = tuple(util._get_codes(pdbx_seq_one_letter_code))
e.one_letter_can = tuple(util._get_codes(pdbx_seq_one_letter_code_can))
e.sequence_type = type
self._entity_info[entity_id] = e
def finalize(self):
for e in self.system.entities:
ei = self._entity_info.get(e._id, None)
if ei is None:
continue
# Fill in missing information (one-letter codes) for nonstandard
# residues
# todo: also add info for residues that aren't in entity_poly_seq
# at all
for i, comp in enumerate(e.sequence):
if comp.code is None and i < len(ei.one_letter):
comp.code = ei.one_letter[i]
if (comp.code_canonical is None
and i < len(ei.one_letter_can)):
comp.code_canonical = ei.one_letter_can[i]
class _EntityPolySegmentHandler(Handler):
category = '_ihm_entity_poly_segment'
def __call__(self, id, seq_id_begin: int, seq_id_end: int):
self.sysr.ranges.set(id, seq_id_begin, seq_id_end)
class _EntityNonPolyHandler(Handler):
category = '_pdbx_entity_nonpoly'
def __call__(self, entity_id, comp_id):
s = self.sysr.entities.get_by_id(entity_id)
s.sequence = (self.sysr.chem_comps.get_by_id(comp_id),)
class _StructAsymHandler(Handler):
category = '_struct_asym'
def __call__(self, id, entity_id, details):
s = self.sysr.asym_units.get_by_id(id)
# Keep this ID (like a user-assigned ID); don't reassign it on output
s.id = id
s.entity = self.sysr.entities.get_by_id(entity_id)
self.copy_if_present(s, locals(), keys=('details',))
class _AssemblyHandler(Handler):
category = '_ihm_struct_assembly'
def __call__(self, id, name, description):
s = self.sysr.assemblies.get_by_id(id)
self.copy_if_present(s, locals(), keys=('name', 'description'))
class _AssemblyDetailsHandler(Handler):
category = '_ihm_struct_assembly_details'
ignored_keywords = ['ordinal_id', 'entity_description']
def __init__(self, *args):
super().__init__(*args)
self._read_args = []
def __call__(self, assembly_id, parent_assembly_id, entity_poly_segment_id,
asym_id, entity_id):
a_id = assembly_id
a = self.sysr.assemblies.get_by_id(a_id)
parent_id = parent_assembly_id
if parent_id and parent_id != a_id and not a.parent:
a.parent = self.sysr.assemblies.get_by_id(parent_id)
if asym_id:
obj = self.sysr.asym_units.get_by_id(asym_id)
else:
obj = self.sysr.entities.get_by_id(entity_id)
# Postpone filling in range until finalize time, as we may not have
# read segments yet
self._read_args.append((a, obj, entity_poly_segment_id))
def finalize(self):
for (a, obj, entity_poly_segment_id) in self._read_args:
a.append(self.sysr.ranges.get(obj, entity_poly_segment_id))
self.system._make_complete_assembly()
# The order of components should not matter, so put in a consistent
# order so we can compare against other assemblies
complete = sorted(self.system.complete_assembly,
key=lambda x: id(x))
for a in self.system.orphan_assemblies:
# Any EntityRange or AsymUnitRange which covers an entire entity,
# replace with Entity or AsymUnit object
a[:] = [self._handle_component(x) for x in a]
# If the input file defines the complete assembly, transfer
# user-provided info to system.complete_assembly
if sorted(a, key=lambda x: id(x)) == complete:
self.system.complete_assembly.name = a.name
self.system.complete_assembly.description = a.description
def _handle_component(self, comp):
if isinstance(comp, ihm.EntityRange) \
and comp.seq_id_range == comp.entity.seq_id_range:
return comp.entity
if isinstance(comp, ihm.AsymUnitRange) \
and comp.seq_id_range == comp.asym.seq_id_range:
return comp.asym
else:
return comp
class _LocalFiles(ihm.location.Repository):
"""Placeholder for files stored locally"""
reference_provider = None
reference_type = 'Supplementary Files'
reference = None
refers_to = 'Other'
url = None
class _ExtRefHandler(Handler):
category = '_ihm_external_reference_info'
def __init__(self, *args):
super().__init__(*args)
self.type_map = {'doi': ihm.location.Repository,
'supplementary files': _LocalFiles}
def __call__(self, reference_id, reference_type, reference, associated_url,
details):
ref_id = reference_id
typ = 'doi' if reference_type is None else reference_type.lower()
repo = self.sysr.repos.get_by_id(
ref_id, self.type_map.get(typ, ihm.location.Repository))
self.copy_if_present(
repo, locals(), keys=('details',),
mapkeys={'reference': 'doi', 'associated_url': 'url'})
def finalize(self):
# Map use of placeholder _LocalFiles repository to repo=None
for location in self.system.locations:
if hasattr(location, 'repo') \
and isinstance(location.repo, _LocalFiles):
location.repo = None
class _ExtFileHandler(Handler):
category = '_ihm_external_files'
def __init__(self, *args):
super().__init__(*args)
# Map _ihm_external_files.content_type to corresponding
# subclass of ihm.location.FileLocation
self.type_map = dict(
(x[1].content_type.lower(), x[1])
for x in inspect.getmembers(ihm.location, inspect.isclass)
if issubclass(x[1], ihm.location.FileLocation)
and x[1] is not ihm.location.FileLocation)
def __call__(self, content_type, id, reference_id, details, file_path,
file_format, file_size_bytes):
typ = None if content_type is None else content_type.lower()
f = self.sysr.external_files.get_by_id(
id, self.type_map.get(typ, ihm.location.FileLocation))
f.repo = self.sysr.repos.get_by_id(reference_id)
# IHMCIF dictionary defines file size as a float, although only int
# values make sense, so allow for either ints or floats here
try:
f.file_size = self.get_int(file_size_bytes)
except ValueError:
f.file_size = self.get_float(file_size_bytes)
self.copy_if_present(
f, locals(), keys=['details', 'file_format'],
mapkeys={'file_path': 'path'})
# Handle DOI that is itself a file
if file_path is None:
f.path = '.'
class _DatasetListHandler(Handler):
category = '_ihm_dataset_list'
def __init__(self, *args):
super().__init__(*args)
# Map data_type to corresponding
# subclass of ihm.dataset.Dataset
self.type_map = dict(
(x[1].data_type.lower(), x[1])
for x in inspect.getmembers(ihm.dataset, inspect.isclass)
if issubclass(x[1], ihm.dataset.Dataset))
# Map old 'CX-MS' data to new class
self.type_map['cx-ms data'] = ihm.dataset.CXMSDataset
def __call__(self, data_type, id, details):
typ = None if data_type is None else data_type.lower()
f = self.sysr.datasets.get_by_id(
id, self.type_map.get(typ, ihm.dataset.Dataset))
f.details = details
f._allow_duplicates = True
class _DatasetGroupHandler(Handler):
category = '_ihm_dataset_group'
ignored_keywords = ['ordinal_id']
def __call__(self, id, name, application, details):
g = self.sysr.dataset_groups.get_by_id(id)
self.copy_if_present(g, locals(),
keys=('name', 'application', 'details'))
class _DatasetGroupLinkHandler(Handler):
category = '_ihm_dataset_group_link'
def __call__(self, group_id, dataset_list_id):
g = self.sysr.dataset_groups.get_by_id(group_id)
ds = self.sysr.datasets.get_by_id(dataset_list_id)
g.append(ds)
class _DatasetExtRefHandler(Handler):
category = '_ihm_dataset_external_reference'
def __call__(self, file_id, dataset_list_id):
ds = self.sysr.datasets.get_by_id(dataset_list_id)
f = self.sysr.external_files.get_by_id(file_id)
ds._add_location(f)
class _DatasetDBRefHandler(Handler):
category = '_ihm_dataset_related_db_reference'
def __init__(self, *args):
super().__init__(*args)
# Map data_type to corresponding
# subclass of ihm.location.DatabaseLocation
# or ihm.location.DatabaseLocation itself
self.type_map = dict(
(x[1].db_name.lower(), x[1])
for x in inspect.getmembers(ihm.location, inspect.isclass)
if issubclass(x[1], ihm.location.DatabaseLocation))
def __call__(self, dataset_list_id, db_name, id, version, details,
accession_code):
ds = self.sysr.datasets.get_by_id(dataset_list_id)
typ = None if db_name is None else db_name.lower()
dbloc = self.sysr.db_locations.get_by_id(id,
self.type_map.get(typ, None))
# Preserve user-provided name for unknown databases
if dbloc.db_name == 'Other' and db_name is not None:
dbloc.db_name = db_name
ds._add_location(dbloc)
self.copy_if_present(
dbloc, locals(), keys=['version', 'details'],
mapkeys={'accession_code': 'access_code'})
class _DataTransformationHandler(Handler):
category = '_ihm_data_transformation'
def __call__(self, id, tr_vector1, tr_vector2, tr_vector3, rot_matrix11,
rot_matrix21, rot_matrix31, rot_matrix12, rot_matrix22,
rot_matrix32, rot_matrix13, rot_matrix23, rot_matrix33):
t = self.sysr.data_transformations.get_by_id(id)
t.rot_matrix = _get_matrix33(locals(), 'rot_matrix')
t.tr_vector = _get_vector3(locals(), 'tr_vector')
class _RelatedDatasetsHandler(Handler):
category = '_ihm_related_datasets'
ignored_keywords = ['ordinal_id']
def __call__(self, dataset_list_id_derived, dataset_list_id_primary,
transformation_id):
derived = self.sysr.datasets.get_by_id(dataset_list_id_derived)
primary = self.sysr.datasets.get_by_id(dataset_list_id_primary)
trans = self.sysr.data_transformations.get_by_id_or_none(
transformation_id)
if trans:
primary = ihm.dataset.TransformedDataset(
dataset=primary, transform=trans)
derived.parents.append(primary)
class _ModelRepresentationHandler(Handler):
category = '_ihm_model_representation'
def __call__(self, id, name, details):
rep = self.sysr.representations.get_by_id(id)
self.copy_if_present(rep, locals(), keys=('name', 'details'))
def _make_atom_segment(asym, rigid, primitive, count, smodel, description):
return ihm.representation.AtomicSegment(
asym_unit=asym, rigid=rigid, starting_model=smodel,
description=description)
def _make_residue_segment(asym, rigid, primitive, count, smodel, description):
return ihm.representation.ResidueSegment(
asym_unit=asym, rigid=rigid, primitive=primitive,
starting_model=smodel, description=description)
def _make_multi_residue_segment(asym, rigid, primitive, count, smodel,
description):
return ihm.representation.MultiResidueSegment(
asym_unit=asym, rigid=rigid, primitive=primitive,
starting_model=smodel, description=description)
def _make_feature_segment(asym, rigid, primitive, count, smodel, description):
return ihm.representation.FeatureSegment(
asym_unit=asym, rigid=rigid, primitive=primitive,
count=count, starting_model=smodel, description=description)
class _ModelRepresentationDetailsHandler(Handler):
category = '_ihm_model_representation_details'
ignored_keywords = ['entity_description']
_rigid_map = {'rigid': True, 'flexible': False, None: None}
_segment_factory = {'by-atom': _make_atom_segment,
'by-residue': _make_residue_segment,
'multi-residue': _make_multi_residue_segment,
'by-feature': _make_feature_segment}
def __init__(self, *args):
super().__init__(*args)
self._read_args = []
def __call__(self, entity_asym_id, entity_poly_segment_id,
representation_id, starting_model_id, model_object_primitive,
model_granularity, model_object_count: int, model_mode,
description):
# Postpone until finalize time as we may not have segments yet
self._read_args.append(
(entity_asym_id, entity_poly_segment_id,
representation_id, starting_model_id, model_object_primitive,
model_granularity, model_object_count, model_mode, description))
def finalize(self):
for (entity_asym_id, entity_poly_segment_id,
representation_id, starting_model_id, model_object_primitive,
model_granularity, model_object_count, model_mode,
description) in self._read_args:
asym = self.sysr.ranges.get(
self.sysr.asym_units.get_by_id(entity_asym_id),
entity_poly_segment_id)
rep = self.sysr.representations.get_by_id(representation_id)
smodel = self.sysr.starting_models.get_by_id_or_none(
starting_model_id)
primitive = self.get_lower(model_object_primitive)
gran = self.get_lower(model_granularity)
primitive = self.get_lower(model_object_primitive)
rigid = self._rigid_map[self.get_lower(model_mode)]
segment = self._segment_factory[gran](
asym, rigid, primitive, model_object_count,
smodel, description)
rep.append(segment)
# todo: support user subclass of StartingModel, pass it coordinates, seqdif
class _StartingModelDetailsHandler(Handler):
category = '_ihm_starting_model_details'
ignored_keywords = ['entity_description']
def __call__(self, starting_model_id, asym_id, entity_poly_segment_id,
dataset_list_id, starting_model_auth_asym_id,
starting_model_sequence_offset, description):
m = self.sysr.starting_models.get_by_id(starting_model_id)
# We might not have a suitable range yet for this ID, so fill this
# in at finalize time
m.asym_unit = (asym_id, entity_poly_segment_id)
m.dataset = self.sysr.datasets.get_by_id(dataset_list_id)
self.copy_if_present(
m, locals(), keys=('description',),
mapkeys={'starting_model_auth_asym_id': 'asym_id'})
if starting_model_sequence_offset is not None:
m.offset = int(starting_model_sequence_offset)
def finalize(self):
for m in self.sysr.system.orphan_starting_models:
# Skip any auto-generated models without range info
if m.asym_unit is None:
continue
# Replace tuple with real Asym/Entity range object
(asym_id, entity_poly_segment_id) = m.asym_unit
m.asym_unit = self.sysr.ranges.get(
self.sysr.asym_units.get_by_id(asym_id),
entity_poly_segment_id)
class _StartingComputationalModelsHandler(Handler):
category = '_ihm_starting_computational_models'
def __call__(self, starting_model_id, script_file_id, software_id):
m = self.sysr.starting_models.get_by_id(starting_model_id)
if script_file_id is not None:
m.script_file = self.sysr.external_files.get_by_id(script_file_id)
if software_id is not None:
m.software = self.sysr.software.get_by_id(software_id)
class _StartingComparativeModelsHandler(Handler):
category = '_ihm_starting_comparative_models'
ignored_keywords = ['ordinal_id']
def __call__(self, starting_model_id, template_dataset_list_id,
alignment_file_id, template_auth_asym_id,
starting_model_seq_id_begin: int,
starting_model_seq_id_end: int,
template_seq_id_begin: int, template_seq_id_end: int,
template_sequence_identity: float,
template_sequence_identity_denominator: int):
m = self.sysr.starting_models.get_by_id(starting_model_id)
dataset = self.sysr.datasets.get_by_id(template_dataset_list_id)
aln = self.sysr.external_files.get_by_id_or_none(alignment_file_id)
asym_id = template_auth_asym_id
seq_id_range = (starting_model_seq_id_begin, starting_model_seq_id_end)
template_seq_id_range = (template_seq_id_begin, template_seq_id_end)
identity = ihm.startmodel.SequenceIdentity(
template_sequence_identity, template_sequence_identity_denominator)
t = ihm.startmodel.Template(
dataset, asym_id, seq_id_range, template_seq_id_range,
identity, aln)
m.templates.append(t)
class _ProtocolHandler(Handler):
category = '_ihm_modeling_protocol'
ignored_keywords = ['ordinal_id', 'struct_assembly_description']
def __call__(self, id, protocol_name, num_steps, details):
p = self.sysr.protocols.get_by_id(id)
self.copy_if_present(p, locals(), mapkeys={'protocol_name': 'name'},
keys=['details'])
class _ProtocolDetailsHandler(Handler):
category = '_ihm_modeling_protocol_details'
def __call__(self, protocol_id, step_id, num_models_begin: int,
num_models_end: int, multi_scale_flag: bool,
multi_state_flag: bool, ordered_flag: bool,
ensemble_flag: bool, struct_assembly_id,
dataset_group_id, software_id, script_file_id, step_name,
step_method, description):
p = self.sysr.protocols.get_by_id(protocol_id)
assembly = self.sysr.assemblies.get_by_id_or_none(struct_assembly_id)
dg = self.sysr.dataset_groups.get_by_id_or_none(dataset_group_id)
software = self.sysr.software.get_by_id_or_none(software_id)
script = self.sysr.external_files.get_by_id_or_none(script_file_id)
s = ihm.protocol.Step(assembly=assembly, dataset_group=dg,
method=None, num_models_begin=num_models_begin,
num_models_end=num_models_end,
multi_scale=multi_scale_flag,
multi_state=multi_state_flag,
ordered=ordered_flag, ensemble=ensemble_flag,
software=software, script_file=script,
description=description)
s._id = step_id
self.copy_if_present(
s, locals(),
mapkeys={'step_name': 'name', 'step_method': 'method'})
p.steps.append(s)
class _PostProcessHandler(Handler):
category = '_ihm_modeling_post_process'
def __init__(self, *args):
super().__init__(*args)
# Map _ihm_modeling_post_process.type to corresponding subclass
# of ihm.analysis.Step
self.type_map = dict((x[1].type.lower(), x[1])
for x in inspect.getmembers(ihm.analysis,
inspect.isclass)
if issubclass(x[1], ihm.analysis.Step)
and x[1] is not ihm.analysis.Step)
def __call__(self, protocol_id, analysis_id, type, id,
num_models_begin: int, num_models_end: int,
struct_assembly_id, dataset_group_id,
software_id, script_file_id, feature, details):
protocol = self.sysr.protocols.get_by_id(protocol_id)
analysis = self.sysr.analyses.get_by_id((protocol_id, analysis_id))
if analysis._id not in [a._id for a in protocol.analyses]:
protocol.analyses.append(analysis)
typ = type.lower() if type is not None else 'other'
step = self.sysr.analysis_steps.get_by_id(
id, self.type_map.get(typ, ihm.analysis.Step))
analysis.steps.append(step)
step.details = details
if typ == 'none':
# If this step was forward referenced, feature will have been set
# to Python None - set it to explicit 'none' instead
step.feature = 'none'
else:
step.num_models_begin = num_models_begin
step.num_models_end = num_models_end
step.assembly = self.sysr.assemblies.get_by_id_or_none(
struct_assembly_id)
step.dataset_group = self.sysr.dataset_groups.get_by_id_or_none(
dataset_group_id)
step.software = self.sysr.software.get_by_id_or_none(software_id)
step.script_file = self.sysr.external_files.get_by_id_or_none(
script_file_id)
# Default to "other" if invalid method/feature read
try:
self.copy_if_present(step, locals(), keys=['feature'])
except ValueError:
step.feature = "other"
class _ModelListHandler(Handler):
category = '_ihm_model_list'
def __call__(self, model_id, model_name,
assembly_id, representation_id, protocol_id):
model = self.sysr.models.get_by_id(model_id)
self.copy_if_present(model, locals(), mapkeys={'model_name': 'name'})
model.assembly = self.sysr.assemblies.get_by_id_or_none(assembly_id)
model.representation = self.sysr.representations.get_by_id_or_none(
representation_id)
model.protocol = self.sysr.protocols.get_by_id_or_none(protocol_id)
class _ModelGroupHandler(Handler):
category = '_ihm_model_group'
def __call__(self, id, name, details):
model_group = self.sysr.model_groups.get_by_id(id)
self.copy_if_present(model_group, locals(), keys=('name', 'details'))
def finalize(self):
# Put all model groups not assigned to a state in their own state
model_groups_in_states = set()
for sg in self.system.state_groups:
for state in sg:
for model_group in state:
model_groups_in_states.add(model_group._id)
mgs = [mg for mgid, mg in self.sysr.model_groups._obj_by_id.items()
if mgid not in model_groups_in_states]
if mgs:
s = ihm.model.State(mgs)
self.system.state_groups.append(ihm.model.StateGroup([s]))
# Put all models not in a group in their own group in its own state
# (e.g. this will catch models from a non-IHM file)
models_in_groups = set()
for mg in self.sysr.model_groups._obj_by_id.values():
for m in mg:
models_in_groups.add(m._id)
ms = [m for mid, m in self.sysr.models._obj_by_id.items()
if mid not in models_in_groups]
if ms:
mg = ihm.model.ModelGroup(ms)
s = ihm.model.State([mg])
self.system.state_groups.append(ihm.model.StateGroup([s]))
class _ModelGroupLinkHandler(Handler):
category = '_ihm_model_group_link'
def __call__(self, group_id, model_id):
model_group = self.sysr.model_groups.get_by_id(group_id)
model = self.sysr.models.get_by_id(model_id)
model_group.append(model)
class _ModelRepresentativeHandler(Handler):
category = '_ihm_model_representative'
def __call__(self, model_group_id, model_id, selection_criteria):
model_group = self.sysr.model_groups.get_by_id(model_group_id)
model = self.sysr.models.get_by_id(model_id)
# Default to "other" if invalid criteria read
try:
rep = ihm.model.ModelRepresentative(model, selection_criteria)
except ValueError:
rep = ihm.model.ModelRepresentative(model,
"other selction criteria")
model_group.representatives.append(rep)
class _MultiStateHandler(Handler):
category = '_ihm_multi_state_modeling'
def __call__(self, state_group_id, state_id, population_fraction: float,
experiment_type, details, state_name, state_type):
state_group = self.sysr.state_groups.get_by_id(state_group_id)
state = self.sysr.states.get_by_id(state_id)
state_group.append(state)
state.population_fraction = population_fraction
self.copy_if_present(
state, locals(),
keys=['experiment_type', 'details'],
mapkeys={'state_name': 'name', 'state_type': 'type'})
class _MultiStateLinkHandler(Handler):
category = '_ihm_multi_state_model_group_link'
def __call__(self, state_id, model_group_id):
state = self.sysr.states.get_by_id(state_id)
model_group = self.sysr.model_groups.get_by_id(model_group_id)
state.append(model_group)
class _EnsembleHandler(Handler):
category = '_ihm_ensemble_info'
# Map subsample type to corresponding subclass
_type_map = dict((x[1].sub_sampling_type.lower(), x[1])
for x in inspect.getmembers(ihm.model, inspect.isclass)
if issubclass(x[1], ihm.model.Subsample))
def __call__(self, ensemble_id, model_group_id, post_process_id,
ensemble_file_id, num_ensemble_models: int,
ensemble_precision_value: float, ensemble_name,
ensemble_clustering_method, ensemble_clustering_feature,
details, sub_sampling_type,
num_ensemble_models_deposited: int,
model_group_superimposed_flag: bool):
ensemble = self.sysr.ensembles.get_by_id(ensemble_id)
mg = self.sysr.model_groups.get_by_id_or_none(model_group_id)
pp = self.sysr.analysis_steps.get_by_id_or_none(post_process_id)
f = self.sysr.external_files.get_by_id_or_none(ensemble_file_id)
ensemble.model_group = mg
ensemble.num_models = num_ensemble_models
ensemble._num_deposited = num_ensemble_models_deposited
ensemble.precision = ensemble_precision_value
if sub_sampling_type:
ensemble._sub_sampling_type = sub_sampling_type.lower()
# note that num_ensemble_models_deposited is ignored (should be size of
# model group anyway)
ensemble.post_process = pp
ensemble.file = f
ensemble.details = details
ensemble.superimposed = model_group_superimposed_flag
# Default to "other" if invalid method/feature read
try:
ensemble.clustering_method = ensemble_clustering_method
except ValueError:
ensemble.clustering_method = "Other"
try:
ensemble.clustering_feature = ensemble_clustering_feature
except ValueError:
ensemble.clustering_feature = "other"
self.copy_if_present(
ensemble, locals(),
mapkeys={'ensemble_name': 'name'})
def finalize(self):
for e in self.sysr.system.ensembles:
if hasattr(e, '_sub_sampling_type'):
t = self._type_map.get(e._sub_sampling_type,
ihm.model.Subsample)
for s in e.subsamples:
s.__class__ = t
del e._sub_sampling_type
class _NotModeledResidueRangeHandler(Handler):
category = '_ihm_residues_not_modeled'
def __call__(self, model_id, asym_id, seq_id_begin, seq_id_end,
reason):
model = self.sysr.models.get_by_id(model_id)
asym = self.sysr.asym_units.get_by_id(asym_id)
# Allow for out-of-range seq_ids for now
rr = ihm.model.NotModeledResidueRange(
asym, int(seq_id_begin), int(seq_id_end))
# Default to "Other" if invalid reason read
try:
rr.reason = reason
except ValueError:
rr.reason = "Other"
model.not_modeled_residue_ranges.append(rr)
class _SubsampleHandler(Handler):
category = '_ihm_ensemble_sub_sample'
def __call__(self, name, ensemble_id, num_models: int, model_group_id,
file_id):
ensemble = self.sysr.ensembles.get_by_id(ensemble_id)
mg = self.sysr.model_groups.get_by_id_or_none(model_group_id)
f = self.sysr.external_files.get_by_id_or_none(file_id)
# We don't know the type yet (not until ensemble is read); this
# will be corrected by EnsembleHandler.finalize()
ss = ihm.model.Subsample(
name=name, num_models=num_models, model_group=mg, file=f)
ensemble.subsamples.append(ss)
class _DensityHandler(Handler):
category = '_ihm_localization_density_files'
def __init__(self, *args):
super().__init__(*args)
self._read_args = []
def __call__(self, id, ensemble_id, file_id, asym_id,
entity_poly_segment_id):
# Postpone handling until finalize time, since we might not have
# ranges to resolve entity_poly_segment_id yet
self._read_args.append((id, ensemble_id, file_id, asym_id,
entity_poly_segment_id))
def finalize(self):
for (id, ensemble_id, file_id, asym_id,
entity_poly_segment_id) in self._read_args:
density = self.sysr.densities.get_by_id(id)
ensemble = self.sysr.ensembles.get_by_id(ensemble_id)
f = self.sysr.external_files.get_by_id(file_id)
asym = self.sysr.ranges.get(
self.sysr.asym_units.get_by_id(asym_id),
entity_poly_segment_id)
density.asym_unit = asym
density.file = f
ensemble.densities.append(density)
class _EM3DRestraintHandler(Handler):
category = '_ihm_3dem_restraint'
def __call__(self, dataset_list_id, struct_assembly_id,
fitting_method_citation_id, map_segment_flag: bool,
fitting_method, number_of_gaussians: int, model_id,
cross_correlation_coefficient: float, details):
# EM3D restraints don't have their own IDs - they use the dataset
# and assembly IDs
r = self.sysr.em3d_restraints.get_by_dataset(dataset_list_id,
struct_assembly_id)
r.assembly = self.sysr.assemblies.get_by_id_or_none(struct_assembly_id)
r.fitting_method_citation = self.sysr.citations.get_by_id_or_none(
fitting_method_citation_id)
self.copy_if_present(r, locals(), keys=('fitting_method', 'details'))
r.segment = map_segment_flag
r.number_of_gaussians = number_of_gaussians
model = self.sysr.models.get_by_id(model_id)
ccc = cross_correlation_coefficient
r.fits[model] = ihm.restraint.EM3DRestraintFit(ccc)
class _EM2DRestraintHandler(Handler):
category = '_ihm_2dem_class_average_restraint'
def __call__(self, id, dataset_list_id, number_raw_micrographs: int,
pixel_size_width: float, pixel_size_height: float,
image_resolution: float, image_segment_flag: bool,
number_of_projections: int, struct_assembly_id, details):
r = self.sysr.em2d_restraints.get_by_id(id)
r.dataset = self.sysr.datasets.get_by_id(dataset_list_id)
r.number_raw_micrographs = number_raw_micrographs
r.pixel_size_width = pixel_size_width
r.pixel_size_height = pixel_size_height
r.image_resolution = image_resolution
r.segment = image_segment_flag
r.number_of_projections = number_of_projections
r.assembly = self.sysr.assemblies.get_by_id_or_none(
struct_assembly_id)
self.copy_if_present(r, locals(), keys=('details',))
class _EM2DFittingHandler(Handler):
category = '_ihm_2dem_class_average_fitting'
def __call__(self, restraint_id, model_id,
cross_correlation_coefficient: float,
tr_vector1, tr_vector2, tr_vector3, rot_matrix11,
rot_matrix21, rot_matrix31, rot_matrix12, rot_matrix22,
rot_matrix32, rot_matrix13, rot_matrix23, rot_matrix33):
r = self.sysr.em2d_restraints.get_by_id(restraint_id)
model = self.sysr.models.get_by_id(model_id)
ccc = cross_correlation_coefficient
tr_vector = _get_vector3(locals(), 'tr_vector')
rot_matrix = _get_matrix33(locals(), 'rot_matrix')
r.fits[model] = ihm.restraint.EM2DRestraintFit(
cross_correlation_coefficient=ccc, rot_matrix=rot_matrix,
tr_vector=tr_vector)
class _SASRestraintHandler(Handler):
category = '_ihm_sas_restraint'
def __call__(self, dataset_list_id, struct_assembly_id,
profile_segment_flag: bool, fitting_atom_type, fitting_method,
details, fitting_state, radius_of_gyration: float,
number_of_gaussians: int, model_id, chi_value: float):
# SAS restraints don't have their own IDs - they use the dataset and
# assembly IDs
r = self.sysr.sas_restraints.get_by_dataset(dataset_list_id,
struct_assembly_id)
r.assembly = self.sysr.assemblies.get_by_id_or_none(
struct_assembly_id)
r.segment = profile_segment_flag
self.copy_if_present(
r, locals(),
keys=('fitting_atom_type', 'fitting_method', 'details'))
fs = (fitting_state if fitting_state not in (None, ihm.unknown)
else 'Single')
r.multi_state = fs.lower() != 'single'
r.radius_of_gyration = radius_of_gyration
r.number_of_gaussians = number_of_gaussians
model = self.sysr.models.get_by_id(model_id)
r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi_value)
class _SphereObjSiteHandler(Handler):
category = '_ihm_sphere_obj_site'
ignored_keywords = ['ordinal_id']
def __call__(self, model_id, asym_id, rmsf: float, seq_id_begin,
seq_id_end, cartn_x, cartn_y, cartn_z, object_radius):
model = self.sysr.models.get_by_id(model_id)
asym = self.sysr.asym_units.get_by_id(asym_id)
s = ihm.model.Sphere(
asym_unit=asym, seq_id_range=(int(seq_id_begin), int(seq_id_end)),
x=float(cartn_x), y=float(cartn_y), z=float(cartn_z),
radius=float(object_radius), rmsf=rmsf)
model.add_sphere(s)
class _AtomSiteHandler(Handler):
category = '_atom_site'
def __init__(self, *args):
super().__init__(*args)
self._missing_sequence = collections.defaultdict(dict)
# Mapping from asym+auth_seq_id to internal ID
self._seq_id_map = {}
def _get_seq_id_from_auth(self, auth_seq_id, pdbx_pdb_ins_code, asym):
"""Get an internal seq_id for something not a polymer (nonpolymer,
water, branched), given author-provided info"""
if asym._id not in self._seq_id_map:
self._seq_id_map[asym._id] = {}
m = self._seq_id_map[asym._id]
# Treat ? and . missing insertion codes equivalently
if pdbx_pdb_ins_code is ihm.unknown:
pdbx_pdb_ins_code = None
auth = (auth_seq_id, pdbx_pdb_ins_code)
if auth not in m:
# Assign a new ID starting from 1
seq_id = len(m) + 1
m[auth] = seq_id
# Add this info to the seq_id -> auth_seq_id mapping too
if asym.auth_seq_id_map == 0:
asym.auth_seq_id_map = {}
asym.auth_seq_id_map[seq_id] = (auth_seq_id, pdbx_pdb_ins_code)
return m[auth]
def __call__(self, pdbx_pdb_model_num, label_asym_id,
b_iso_or_equiv: float, label_seq_id: int, label_atom_id,
type_symbol, cartn_x: float, cartn_y: float, cartn_z: float,
occupancy: float, group_pdb, auth_seq_id, pdbx_pdb_ins_code,
auth_asym_id, label_comp_id, label_alt_id):
# seq_id can be None for non-polymers (HETATM)
seq_id = label_seq_id
# todo: handle fields other than those output by us
model = self.sysr.models.get_by_id(pdbx_pdb_model_num)
if label_asym_id is None:
# If no asym_id is provided (e.g. minimal PyMOL output) then
# use the author-provided ID instead
asym = self.sysr.asym_units.get_by_id(auth_asym_id)
# Chances are the entity_poly table is missing too, so remember
# the comp_id to help us construct missing sequence info
self._missing_sequence[asym][seq_id] = label_comp_id
else:
asym = self.sysr.asym_units.get_by_id(label_asym_id)
auth_seq_id = self.get_int_or_string(auth_seq_id)
if seq_id is None:
# Fill in our internal seq_id using author-provided info
our_seq_id = self._get_seq_id_from_auth(
auth_seq_id, pdbx_pdb_ins_code, asym)
else:
our_seq_id = seq_id
group = 'ATOM' if group_pdb is None else group_pdb
a = ihm.model.Atom(
asym_unit=asym, seq_id=our_seq_id, atom_id=label_atom_id,
type_symbol=type_symbol, x=cartn_x, y=cartn_y,
z=cartn_z, het=group != 'ATOM', biso=b_iso_or_equiv,
occupancy=occupancy, alt_id=label_alt_id)
model.add_atom(a)
# Note any residues that have different seq_id and auth_seq_id
if (auth_seq_id is not None and seq_id is not None and
(seq_id != auth_seq_id
or pdbx_pdb_ins_code not in (None, ihm.unknown))):
if asym.auth_seq_id_map == 0:
asym.auth_seq_id_map = {}
asym.auth_seq_id_map[seq_id] = auth_seq_id, pdbx_pdb_ins_code
def finalize(self):
# Fill in missing Entity information from comp_ids
entity_from_seq = {}
for asym, comp_from_seq_id in self._missing_sequence.items():
if asym.entity is None:
# Fill in gaps in seq_id with UNK residues
seq_len = max(comp_from_seq_id.keys())
unk = ihm.LPeptideAlphabet()['UNK']
seq = [unk] * seq_len
for seq_id, comp_id in comp_from_seq_id.items():
seq[seq_id - 1] = self.sysr.chem_comps.get_by_id(comp_id)
seq = tuple(seq) # Lists are not hashable
if seq in entity_from_seq:
asym.entity = entity_from_seq[seq]
else:
asym.entity = ihm.Entity(seq)
entity_from_seq[seq] = asym.entity
self.system.entities.append(asym.entity)
class _StartingModelCoordHandler(Handler):
category = '_ihm_starting_model_coord'
def __call__(self, starting_model_id, group_pdb, type_symbol, atom_id,
asym_id, seq_id: int, cartn_x: float, cartn_y: float,
cartn_z: float, b_iso_or_equiv: float):
model = self.sysr.starting_models.get_by_id(starting_model_id)
asym = self.sysr.asym_units.get_by_id(asym_id)
# seq_id can be None for non-polymers (HETATM)
group = 'ATOM' if group_pdb is None else group_pdb
a = ihm.model.Atom(
asym_unit=asym, seq_id=seq_id, atom_id=atom_id,
type_symbol=type_symbol, x=cartn_x, y=cartn_y,
z=cartn_z, het=group != 'ATOM', biso=b_iso_or_equiv)
model.add_atom(a)
class _StartingModelSeqDifHandler(Handler):
category = '_ihm_starting_model_seq_dif'
def __call__(self, starting_model_id, db_seq_id: int, seq_id: int,
db_comp_id, details):
model = self.sysr.starting_models.get_by_id(starting_model_id)
sd = ihm.startmodel.SeqDif(db_seq_id=db_seq_id, seq_id=seq_id,
db_comp_id=db_comp_id,
details=details)
model.add_seq_dif(sd)
class _PolyResidueFeatureHandler(Handler):
category = '_ihm_poly_residue_feature'
def __call__(self, feature_id, entity_id, asym_id, seq_id_begin,
seq_id_end):
f = self.sysr.features.get_by_id(
feature_id, ihm.restraint.ResidueFeature)
asym_or_entity = self._get_asym_or_entity(asym_id, entity_id)
r1 = int(seq_id_begin)
r2 = int(seq_id_end)
# allow out-of-range ranges
f.ranges.append(asym_or_entity(r1, r2))
class _FeatureListHandler(Handler):
category = '_ihm_feature_list'
def __call__(self, feature_id, details):
if details:
f = self.sysr.features.get_by_id(feature_id)
f.details = details
class _PolyAtomFeatureHandler(Handler):
category = '_ihm_poly_atom_feature'
def __call__(self, feature_id, entity_id, asym_id, seq_id, atom_id):
f = self.sysr.features.get_by_id(
feature_id, ihm.restraint.AtomFeature)
asym_or_entity = self._get_asym_or_entity(asym_id, entity_id)
seq_id = int(seq_id)
atom = asym_or_entity.residue(seq_id).atom(atom_id)
f.atoms.append(atom)
class _NonPolyFeatureHandler(Handler):
category = '_ihm_non_poly_feature'
def __call__(self, feature_id, entity_id, asym_id, atom_id):
asym_or_entity = self._get_asym_or_entity(asym_id, entity_id)
if atom_id is None:
f = self.sysr.features.get_by_id(
feature_id, ihm.restraint.NonPolyFeature)
f.objs.append(asym_or_entity)
else:
f = self.sysr.features.get_by_id(
feature_id, ihm.restraint.AtomFeature)
# todo: handle multiple copies, e.g. waters?
atom = asym_or_entity.residue(1).atom(atom_id)
f.atoms.append(atom)
class _PseudoSiteFeatureHandler(Handler):
category = '_ihm_pseudo_site_feature'
def __call__(self, feature_id, pseudo_site_id):
f = self.sysr.features.get_by_id(feature_id,
ihm.restraint.PseudoSiteFeature)
p = self.sysr.pseudo_sites.get_by_id(pseudo_site_id)
f.site = p
class _PseudoSiteHandler(Handler):
category = '_ihm_pseudo_site'
def __call__(self, id, cartn_x: float, cartn_y: float, cartn_z: float,
radius: float, description):
p = self.sysr.pseudo_sites.get_by_id(id)
p.x = cartn_x
p.y = cartn_y
p.z = cartn_z
p.radius = radius
p.description = description
def _make_harmonic(low, up):
return ihm.restraint.HarmonicDistanceRestraint(up if low is None else low)
def _make_upper_bound(low, up):
return ihm.restraint.UpperBoundDistanceRestraint(up)
def _make_lower_bound(low, up):
return ihm.restraint.LowerBoundDistanceRestraint(low)
def _make_lower_upper_bound(low, up):
return ihm.restraint.LowerUpperBoundDistanceRestraint(
distance_lower_limit=low, distance_upper_limit=up)
def _make_unknown_distance(low, up):
return ihm.restraint.DistanceRestraint()
_handle_distance = {'harmonic': _make_harmonic,
'upper bound': _make_upper_bound,
'lower bound': _make_lower_bound,
'lower and upper bound': _make_lower_upper_bound,
None: _make_unknown_distance}
class _DerivedDistanceRestraintHandler(Handler):
category = '_ihm_derived_distance_restraint'
_cond_map = {'ALL': True, 'ANY': False, None: None}
def __call__(self, id, group_id, dataset_list_id, feature_id_1,
feature_id_2, restraint_type, group_conditionality,
probability: float, mic_value: float,
distance_lower_limit: float, distance_upper_limit: float):
r = self.sysr.dist_restraints.get_by_id(id)
if group_id is not None:
rg = self.sysr.dist_restraint_groups.get_by_id(group_id)
rg.append(r)
r.dataset = self.sysr.datasets.get_by_id_or_none(dataset_list_id)
r.feature1 = self.sysr.features.get_by_id(feature_id_1)
r.feature2 = self.sysr.features.get_by_id(feature_id_2)
r.distance = _handle_distance[restraint_type](distance_lower_limit,
distance_upper_limit)
r.restrain_all = self._cond_map[group_conditionality]
r.probability = probability
r.mic_value = mic_value
class _HDXRestraintHandler(Handler):
category = '_ihm_hdx_restraint'
def __call__(self, id, dataset_list_id, feature_id,
protection_factor: float, details):
r = self.sysr.hdx_restraints.get_by_id(id)
r.dataset = self.sysr.datasets.get_by_id_or_none(dataset_list_id)
r.feature = self.sysr.features.get_by_id(feature_id)
r.protection_factor = protection_factor
r.details = details
class _PredictedContactRestraintHandler(Handler):
category = '_ihm_predicted_contact_restraint'
def _get_resatom(self, asym_id, seq_id, atom_id):
asym = self.sysr.asym_units.get_by_id(asym_id)
resatom = asym.residue(seq_id)
if atom_id:
resatom = resatom.atom(atom_id)
return resatom
def __call__(self, id, group_id, dataset_list_id, asym_id_1,
seq_id_1: int, rep_atom_1, asym_id_2, seq_id_2: int,
rep_atom_2, restraint_type, probability: float,
distance_lower_limit: float, distance_upper_limit: float,
model_granularity, software_id):
r = self.sysr.pred_cont_restraints.get_by_id(id)
if group_id is not None:
rg = self.sysr.pred_cont_restraint_groups.get_by_id(group_id)
rg.append(r)
r.dataset = self.sysr.datasets.get_by_id_or_none(dataset_list_id)
r.resatom1 = self._get_resatom(asym_id_1, seq_id_1, rep_atom_1)
r.resatom2 = self._get_resatom(asym_id_2, seq_id_2, rep_atom_2)
r.distance = _handle_distance[restraint_type](distance_lower_limit,
distance_upper_limit)
r.by_residue = self.get_lower(model_granularity) == 'by-residue'
r.probability = probability
r.software = self.sysr.software.get_by_id_or_none(software_id)
class _CenterHandler(Handler):
category = '_ihm_geometric_object_center'
def __call__(self, id, xcoord: float, ycoord: float, zcoord: float):
c = self.sysr.centers.get_by_id(id)
c.x = xcoord
c.y = ycoord
c.z = zcoord
class _TransformationHandler(Handler):
category = '_ihm_geometric_object_transformation'
def __call__(self, id, tr_vector1, tr_vector2, tr_vector3, rot_matrix11,
rot_matrix21, rot_matrix31, rot_matrix12, rot_matrix22,
rot_matrix32, rot_matrix13, rot_matrix23, rot_matrix33):
t = self.sysr.transformations.get_by_id(id)
t.rot_matrix = _get_matrix33(locals(), 'rot_matrix')
t.tr_vector = _get_vector3(locals(), 'tr_vector')
class _GeometricObjectHandler(Handler):
category = '_ihm_geometric_object_list'
# Map object_type to corresponding subclass (but not subsubclasses such
# as XYPlane)
_type_map = dict((x[1].type.lower(), x[1])
for x in inspect.getmembers(ihm.geometry, inspect.isclass)
if issubclass(x[1], ihm.geometry.GeometricObject)
and ihm.geometry.GeometricObject in x[1].__bases__)
def __call__(self, object_type, object_id, object_name,
object_description):
typ = object_type.lower() if object_type is not None else 'other'
g = self.sysr.geometries.get_by_id(
object_id, self._type_map.get(typ, ihm.geometry.GeometricObject))
self.copy_if_present(g, locals(),
mapkeys={'object_name': 'name',
'object_description': 'description'})
class _SphereHandler(Handler):
category = '_ihm_geometric_object_sphere'
def __call__(self, object_id, center_id, transformation_id,
radius_r: float):
s = self.sysr.geometries.get_by_id(object_id, ihm.geometry.Sphere)
s.center = self.sysr.centers.get_by_id_or_none(center_id)
s.transformation = self.sysr.transformations.get_by_id_or_none(
transformation_id)
s.radius = radius_r
class _TorusHandler(Handler):
category = '_ihm_geometric_object_torus'
def __call__(self, object_id, center_id, transformation_id,
major_radius_r: float, minor_radius_r: float):
t = self.sysr.geometries.get_by_id(object_id, ihm.geometry.Torus)
t.center = self.sysr.centers.get_by_id_or_none(center_id)
t.transformation = self.sysr.transformations.get_by_id_or_none(
transformation_id)
t.major_radius = major_radius_r
t.minor_radius = minor_radius_r
class _HalfTorusHandler(Handler):
category = '_ihm_geometric_object_half_torus'
_inner_map = {'inner half': True, 'outer half': False}
def __call__(self, object_id, thickness_th: float, section):
t = self.sysr.geometries.get_by_id(object_id,
ihm.geometry.HalfTorus)
t.thickness = thickness_th
section = section.lower() if section is not None else ''
t.inner = self._inner_map.get(section, None)
class _AxisHandler(Handler):
category = '_ihm_geometric_object_axis'
# Map axis_type to corresponding subclass
_type_map = dict((x[1].axis_type.lower(), x[1])
for x in inspect.getmembers(ihm.geometry, inspect.isclass)
if issubclass(x[1], ihm.geometry.Axis)
and x[1] is not ihm.geometry.Axis)
def __call__(self, axis_type, object_id, transformation_id):
typ = axis_type.lower() if axis_type is not None else 'other'
a = self.sysr.geometries.get_by_id(
object_id, self._type_map.get(typ, ihm.geometry.Axis))
a.transformation = self.sysr.transformations.get_by_id_or_none(
transformation_id)
class _PlaneHandler(Handler):
category = '_ihm_geometric_object_plane'
# Map plane_type to corresponding subclass
_type_map = dict((x[1].plane_type.lower(), x[1])
for x in inspect.getmembers(ihm.geometry, inspect.isclass)
if issubclass(x[1], ihm.geometry.Plane)
and x[1] is not ihm.geometry.Plane)
def __call__(self, plane_type, object_id, transformation_id):
typ = plane_type.lower() if plane_type is not None else 'other'
a = self.sysr.geometries.get_by_id(
object_id, self._type_map.get(typ, ihm.geometry.Plane))
a.transformation = self.sysr.transformations.get_by_id_or_none(
transformation_id)
class _GeometricRestraintHandler(Handler):
category = '_ihm_geometric_object_distance_restraint'
_cond_map = {'ALL': True, 'ANY': False, None: None}
# Map object_characteristic to corresponding subclass
_type_map = dict((x[1].object_characteristic.lower(), x[1])
for x in inspect.getmembers(ihm.restraint,
inspect.isclass)
if issubclass(x[1], ihm.restraint.GeometricRestraint))
def __call__(self, object_characteristic, id, dataset_list_id, object_id,
feature_id, restraint_type, harmonic_force_constant: float,
group_conditionality, distance_lower_limit: float,
distance_upper_limit: float):
typ = (object_characteristic or 'other').lower()
r = self.sysr.geom_restraints.get_by_id(
id, self._type_map.get(typ, ihm.restraint.GeometricRestraint))
r.dataset = self.sysr.datasets.get_by_id_or_none(dataset_list_id)
r.geometric_object = self.sysr.geometries.get_by_id(object_id)
r.feature = self.sysr.features.get_by_id(feature_id)
r.distance = _handle_distance[restraint_type](distance_lower_limit,
distance_upper_limit)
r.harmonic_force_constant = harmonic_force_constant
r.restrain_all = self._cond_map[group_conditionality]
class _PolySeqSchemeHandler(Handler):
category = '_pdbx_poly_seq_scheme'
if _format is not None:
_add_c_handler = _format.add_poly_seq_scheme_handler
# Note: do not change the ordering of the first 6 parameters to this
# function; the C parser expects them in this order
def __call__(self, asym_id, seq_id, pdb_seq_num, auth_seq_num,
pdb_ins_code, pdb_strand_id):
asym = self.sysr.asym_units.get_by_id(asym_id)
seq_id = self.get_int(seq_id)
if pdb_strand_id not in (None, ihm.unknown, asym_id):
asym._strand_id = pdb_strand_id
pdb_seq_num = self.get_int_or_string(pdb_seq_num)
auth_seq_num = self.get_int_or_string(auth_seq_num)
# Note any residues that have different seq_id and pdb_seq_num
if seq_id is not None and pdb_seq_num is not None \
and (seq_id != pdb_seq_num
or pdb_ins_code not in (None, ihm.unknown)):
if asym.auth_seq_id_map == 0:
asym.auth_seq_id_map = {}
asym.auth_seq_id_map[seq_id] = pdb_seq_num, pdb_ins_code
# Note any residues that have different pdb_seq_num and auth_seq_num
if (seq_id is not None and auth_seq_num is not None
and pdb_seq_num is not None and auth_seq_num != pdb_seq_num):
if asym.orig_auth_seq_id_map is None:
asym.orig_auth_seq_id_map = {}
asym.orig_auth_seq_id_map[seq_id] = auth_seq_num
def finalize(self):
for asym in self.sysr.system.asym_units:
# If every residue in auth_seq_id_map is offset by the same
# amount, and no insertion codes, replace the map with a
# simple offset
offset = self._get_auth_seq_id_offset(asym)
if offset is not None:
asym.auth_seq_id_map = offset
def _get_auth_seq_id_offset(self, asym):
"""Get the offset from seq_id to auth_seq_id. Return None if no
consistent offset exists."""
# Do nothing if the entity is not polymeric
if asym.entity is None or not asym.entity.is_polymeric():
return
# Do nothing if no map exists
if asym.auth_seq_id_map == 0:
return
rng = asym.seq_id_range
offset = None
for seq_id in range(rng[0], rng[1] + 1):
# If a residue isn't in the map, it has an effective offset of 0,
# which has to be inconsistent (since everything in the map has
# a nonzero offset by construction)
if seq_id not in asym.auth_seq_id_map:
return
auth_seq_id, ins_code = asym.auth_seq_id_map[seq_id]
# If auth_seq_id is a string, we can't use any offset
if not isinstance(auth_seq_id, int):
return
# If insertion codes are provided, we can't use any offset
if ins_code not in (None, ihm.unknown):
return
this_offset = auth_seq_id - seq_id
if offset is None:
offset = this_offset
elif offset != this_offset:
# Offset is inconsistent
return
return offset
class _NonPolySchemeHandler(Handler):
category = '_pdbx_nonpoly_scheme'
def __init__(self, *args):
super().__init__(*args)
self._scheme = {}
def __call__(self, asym_id, entity_id, pdb_seq_num, mon_id, pdb_ins_code,
pdb_strand_id, ndb_seq_num: int, auth_seq_num):
entity = self.sysr.entities.get_by_id(entity_id)
# nonpolymer entities generally have information on their chemical
# component in pdbx_entity_nonpoly, but if that's missing, at least
# get the name from mon_id here, so that we don't end up with an
# empty sequence
if len(entity.sequence) == 0 and mon_id:
if mon_id == 'HOH':
s = ihm.WaterChemComp()
else:
s = ihm.NonPolymerChemComp(
mon_id, name=entity.description)
entity.sequence.append(s)
asym = self.sysr.asym_units.get_by_id(asym_id)
if pdb_strand_id not in (None, ihm.unknown, asym_id):
asym._strand_id = pdb_strand_id
pdb_seq_num = self.get_int_or_string(pdb_seq_num)
auth_seq_num = self.get_int_or_string(auth_seq_num)
# Make mapping from author-provided numbering (*pdb_seq_num*, not
# auth_seq_num) to original and NDB numbering. We will use this at
# finalize time to map internal ID ("seq_id") to auth, orig_auth,
# and NDB numbering.
if asym_id not in self._scheme:
self._scheme[asym_id] = []
self._scheme[asym_id].append((pdb_seq_num, pdb_ins_code,
auth_seq_num, ndb_seq_num))
def finalize(self):
for asym in self.system.asym_units:
entity = asym.entity
if entity is None or entity.is_polymeric() or entity.is_branched():
continue
self._finalize_asym(asym)
def _finalize_asym(self, asym):
# Add mapping info from scheme tables (to that already extracted
# from atom_site); if a mismatch we use atom_site info
scheme = self._scheme.get(asym._id)
if scheme:
if not asym.auth_seq_id_map:
asym.auth_seq_id_map = {}
if not asym.orig_auth_seq_id_map:
asym.orig_auth_seq_id_map = {}
# Make reverse mapping from author-provided info to internal ID
auth_map = {}
for key, val in asym.auth_seq_id_map.items():
auth_map[val] = key
for pdb_seq_num, pdb_ins_code, auth_seq_num, ndb_seq_num in scheme:
auth = (pdb_seq_num, pdb_ins_code)
seq_id = auth_map.get(auth)
if seq_id is None:
seq_id = len(asym.auth_seq_id_map) + 1
asym.auth_seq_id_map[seq_id] = auth
if pdb_seq_num != auth_seq_num:
asym.orig_auth_seq_id_map[seq_id] = auth_seq_num
if not asym.orig_auth_seq_id_map:
asym.orig_auth_seq_id_map = None
if asym.entity.type == 'water':
# Replace AsymUnit with WaterAsymUnit if necessary
if not isinstance(asym, ihm.WaterAsymUnit):
asym.__class__ = ihm.WaterAsymUnit
asym.number = len(asym.auth_seq_id_map)
asym._water_sequence = [asym.entity.sequence[0]] * asym.number
# todo: add mapping from seq_id to ndb numbering?
class _BranchSchemeHandler(Handler):
category = '_pdbx_branch_scheme'
def __init__(self, *args):
super().__init__(*args)
self._scheme = {}
def __call__(self, asym_id, num: int, pdb_seq_num, auth_seq_num,
pdb_asym_id, pdb_ins_code):
asym = self.sysr.asym_units.get_by_id(asym_id)
if pdb_asym_id not in (None, ihm.unknown, asym_id):
asym._strand_id = pdb_asym_id
pdb_seq_num = self.get_int_or_string(pdb_seq_num)
auth_seq_num = self.get_int_or_string(auth_seq_num)
# Make mapping from author-provided numbering (*pdb_seq_num*, not
# auth_seq_num) to original and "num" numbering. We will use this at
# finalize time to map internal ID ("seq_id") to auth, orig_auth,
# and "num" numbering.
if asym_id not in self._scheme:
self._scheme[asym_id] = []
self._scheme[asym_id].append((pdb_seq_num, pdb_ins_code,
auth_seq_num, num))
def finalize(self):
need_map_num = False
for asym in self.system.asym_units:
entity = asym.entity
if entity is None or not entity.is_branched():
continue
self._finalize_asym(asym)
if asym.num_map:
need_map_num = True
if need_map_num:
self._reassign_seq_ids()
def _reassign_seq_ids(self):
"""Change provisional seq_ids so that they match
_pdbx_branch_scheme.num"""
for m in self.sysr.models.get_all():
for atom in m._atoms:
if atom.asym_unit.num_map:
atom.seq_id = atom.asym_unit.num_map[atom.seq_id]
def _finalize_asym(self, asym):
# Populate auth_seq_id mapping from scheme tables, and correct
# any incorrect seq_ids assigned in atom_site to use num
scheme = self._scheme.get(asym._id, [])
# Make reverse mapping from atom_site author-provided info
# to internal ID
auth_map = {}
if asym.auth_seq_id_map:
for key, val in asym.auth_seq_id_map.items():
auth_map[val] = key
asym.auth_seq_id_map = {}
asym.orig_auth_seq_id_map = {}
asym.num_map = {}
for pdb_seq_num, pdb_ins_code, auth_seq_num, num in scheme:
asym.auth_seq_id_map[num] = (pdb_seq_num, pdb_ins_code)
if pdb_seq_num != auth_seq_num:
asym.orig_auth_seq_id_map[num] = auth_seq_num
as_seq_id = auth_map.get((pdb_seq_num, pdb_ins_code))
if as_seq_id is not None:
if as_seq_id != num:
asym.num_map[as_seq_id] = num
del auth_map[(pdb_seq_num, pdb_ins_code)]
if not asym.orig_auth_seq_id_map:
asym.orig_auth_seq_id_map = None
if not asym.num_map:
asym.num_map = None
# If any residues from atom_site are left, we can't assign a num
# for them, so raise an error
if auth_map:
raise ValueError(
"For branched asym %s, the following author-provided "
"residue numbers (atom_site.auth_seq_id) are not present in "
"the pdbx_branch_scheme table: %s"
% (asym._id, ", ".join(repr(x[0]) for x in auth_map.keys())))
class _EntityBranchListHandler(Handler):
category = '_pdbx_entity_branch_list'
def __call__(self, entity_id, comp_id, num):
s = self.sysr.entities.get_by_id(entity_id)
# Assume num is 1-based (appears to be)
seq_id = int(num)
if seq_id > len(s.sequence):
s.sequence.extend([None] * (seq_id - len(s.sequence)))
s.sequence[seq_id - 1] = self.sysr.chem_comps.get_by_id(comp_id)
class _BranchDescriptorHandler(Handler):
category = '_pdbx_entity_branch_descriptor'
def __call__(self, entity_id, descriptor, type, program, program_version):
e = self.sysr.entities.get_by_id(entity_id)
d = ihm.BranchDescriptor(text=descriptor, type=type, program=program,
program_version=program_version)
e.branch_descriptors.append(d)
class _BranchLinkHandler(Handler):
category = '_pdbx_entity_branch_link'
def __call__(self, entity_id, entity_branch_list_num_1: int, atom_id_1,
leaving_atom_id_1, entity_branch_list_num_2: int, atom_id_2,
leaving_atom_id_2, value_order, details):
e = self.sysr.entities.get_by_id(entity_id)
lnk = ihm.BranchLink(num1=entity_branch_list_num_1, atom_id1=atom_id_1,
leaving_atom_id1=leaving_atom_id_1,
num2=entity_branch_list_num_2, atom_id2=atom_id_2,
leaving_atom_id2=leaving_atom_id_2,
order=value_order, details=details)
e.branch_links.append(lnk)
class _CrossLinkListHandler(Handler):
category = '_ihm_cross_link_list'
ignored_keywords = ['entity_description_1', 'entity_description_2',
'comp_id_1', 'comp_id_2']
_linkers_by_name = None
def __init__(self, *args):
super().__init__(*args)
self._seen_group_ids = set()
self._linker_type = {}
def _get_linker_by_name(self, name):
"""Look up old-style linker, by name rather than descriptor"""
if self._linkers_by_name is None:
self._linkers_by_name \
= dict((x[1].auth_name, x[1])
for x in inspect.getmembers(ihm.cross_linkers)
if isinstance(x[1], ihm.ChemDescriptor))
if name not in self._linkers_by_name:
self._linkers_by_name[name] = ihm.ChemDescriptor(name)
return self._linkers_by_name[name]
def __call__(self, dataset_list_id, linker_chem_comp_descriptor_id,
group_id, id, entity_id_1, entity_id_2, seq_id_1, seq_id_2,
linker_type, details):
dataset = self.sysr.datasets.get_by_id_or_none(dataset_list_id)
if linker_chem_comp_descriptor_id is None and linker_type is not None:
linker = self._get_linker_by_name(linker_type)
else:
linker = self.sysr.chem_descriptors.get_by_id(
linker_chem_comp_descriptor_id)
if linker_type:
self._linker_type[linker] = linker_type
# Group all crosslinks with same dataset and linker in one
# CrossLinkRestraint object
r = self.sysr.xl_restraints.get_by_attrs(dataset, linker)
xl_group = self.sysr.experimental_xl_groups.get_by_id(group_id)
xl = self.sysr.experimental_xls.get_by_id(id)
if group_id not in self._seen_group_ids:
self._seen_group_ids.add(group_id)
r.experimental_cross_links.append(xl_group)
xl_group.append(xl)
xl.residue1 = self._get_entity_residue(entity_id_1, seq_id_1)
xl.residue2 = self._get_entity_residue(entity_id_2, seq_id_2)
xl.details = details
def _get_entity_residue(self, entity_id, seq_id):
entity = self.sysr.entities.get_by_id(entity_id)
return entity.residue(int(seq_id))
def finalize(self):
# If any ChemDescriptor has an empty name, fill it in using linker_type
for d in self.system.orphan_chem_descriptors:
if d.auth_name is None:
d.auth_name = self._linker_type.get(d)
class _CrossLinkRestraintHandler(Handler):
category = '_ihm_cross_link_restraint'
_cond_map = {'ALL': True, 'ANY': False, None: None}
_distance_map = {'harmonic': ihm.restraint.HarmonicDistanceRestraint,
'lower bound': ihm.restraint.LowerBoundDistanceRestraint,
'upper bound': ihm.restraint.UpperBoundDistanceRestraint}
# Map granularity to corresponding subclass
_type_map = dict((x[1].granularity.lower(), x[1])
for x in inspect.getmembers(ihm.restraint,
inspect.isclass)
if issubclass(x[1], ihm.restraint.CrossLink)
and x[1] is not ihm.restraint.CrossLink)
def __call__(self, model_granularity, id, group_id, asym_id_1, asym_id_2,
restraint_type, distance_threshold: float,
conditional_crosslink_flag, atom_id_1, atom_id_2, psi: float,
sigma_1: float, sigma_2: float):
typ = (model_granularity or 'other').lower()
xl = self.sysr.cross_links.get_by_id(
id, self._type_map.get(typ, ihm.restraint.ResidueCrossLink))
ex_xl = self.sysr.experimental_xls.get_by_id(group_id)
xl.experimental_cross_link = ex_xl
xl.asym1 = self.sysr.asym_units.get_by_id(asym_id_1)
xl.asym2 = self.sysr.asym_units.get_by_id(asym_id_2)
# todo: handle unknown restraint type
_distcls = self._distance_map[restraint_type.lower()]
xl.distance = _distcls(distance_threshold)
xl.restrain_all = self._cond_map[conditional_crosslink_flag]
if isinstance(xl, ihm.restraint.AtomCrossLink):
xl.atom1 = atom_id_1
xl.atom2 = atom_id_2
xl.psi = psi
xl.sigma1 = sigma_1
xl.sigma2 = sigma_2
def finalize(self):
# Put each cross link in the restraint that owns its experimental xl
rsr_for_ex_xl = {}
for r in self.sysr.xl_restraints.get_all():
for ex_xl_group in r.experimental_cross_links:
for ex_xl in ex_xl_group:
rsr_for_ex_xl[ex_xl] = r
for xl in self.sysr.cross_links.get_all():
r = rsr_for_ex_xl[xl.experimental_cross_link]
r.cross_links.append(xl)
class _CrossLinkPseudoSiteHandler(Handler):
category = '_ihm_cross_link_pseudo_site'
def __call__(self, id, restraint_id, cross_link_partner: int,
pseudo_site_id, model_id):
xlps = self.sysr.cross_link_pseudo_sites.get_by_id(id)
xlps.site = self.sysr.pseudo_sites.get_by_id(pseudo_site_id)
xlps.model = self.sysr.models.get_by_id_or_none(model_id)
xl = self.sysr.cross_links.get_by_id(restraint_id)
if cross_link_partner == 2:
if getattr(xl, 'pseudo2', None) is None:
xl.pseudo2 = []
xl.pseudo2.append(xlps)
else:
if getattr(xl, 'pseudo1', None) is None:
xl.pseudo1 = []
xl.pseudo1.append(xlps)
class _CrossLinkResultHandler(Handler):
category = '_ihm_cross_link_result'
def __call__(self, restraint_id, ensemble_id, model_group_id,
num_models: int, median_distance: float, details):
if ensemble_id:
g = self.sysr.ensembles.get_by_id(ensemble_id)
else:
g = self.sysr.model_groups.get_by_id(model_group_id)
xl = self.sysr.cross_links.get_by_id(restraint_id)
xl.fits[g] = ihm.restraint.CrossLinkGroupFit(
num_models=num_models, median_distance=median_distance,
details=details)
class _CrossLinkResultParametersHandler(Handler):
category = '_ihm_cross_link_result_parameters'
ignored_keywords = ['ordinal_id']
def __call__(self, restraint_id, model_id, psi: float, sigma_1: float,
sigma_2: float):
xl = self.sysr.cross_links.get_by_id(restraint_id)
model = self.sysr.models.get_by_id(model_id)
xl.fits[model] = ihm.restraint.CrossLinkFit(
psi=psi, sigma1=sigma_1, sigma2=sigma_2)
class _OrderedModelHandler(Handler):
category = '_ihm_ordered_model'
def __call__(self, process_id, step_id, model_group_id_begin,
model_group_id_end, edge_description, ordered_by,
process_description, step_description):
proc = self.sysr.ordered_procs.get_by_id(process_id)
# todo: will this work with multiple processes?
step = self.sysr.ordered_steps.get_by_id(step_id)
edge = ihm.model.ProcessEdge(
self.sysr.model_groups.get_by_id(model_group_id_begin),
self.sysr.model_groups.get_by_id(model_group_id_end))
self.copy_if_present(
edge, locals(), mapkeys={'edge_description': 'description'})
step.append(edge)
if step_id not in [s._id for s in proc.steps]:
proc.steps.append(step)
self.copy_if_present(
proc, locals(), keys=('ordered_by',),
mapkeys={'process_description': 'description'})
self.copy_if_present(
step, locals(), mapkeys={'step_description': 'description'})
# Handle the old name for the ihm_ordered_model category. This is a separate
# object so relies on _OrderedModelHandler not storing any state.
class _OrderedEnsembleHandler(_OrderedModelHandler):
category = '_ihm_ordered_ensemble'
class UnknownCategoryWarning(Warning):
"""Warning for unknown categories encountered in the file
by :func:`read`"""
pass
class UnknownKeywordWarning(Warning):
"""Warning for unknown keywords encountered in the file by :func:`read`"""
pass
class _UnknownCategoryHandler:
def __init__(self):
self.reset()
def reset(self):
self._seen_categories = set()
def __call__(self, catname, line):
# Only warn about a given category once
if catname in self._seen_categories:
return
self._seen_categories.add(catname)
warnings.warn("Unknown category %s encountered%s - will be ignored"
% (catname, " on line %d" % line if line else ""),
UnknownCategoryWarning, stacklevel=2)
class _UnknownKeywordHandler:
def add_category_handlers(self, handlers):
self._ignored_keywords = dict((h.category,
frozenset(h.ignored_keywords))
for h in handlers)
def __call__(self, catname, keyname, line):
if keyname in self._ignored_keywords[catname]:
return
warnings.warn("Unknown keyword %s.%s encountered%s - will be ignored"
% (catname, keyname,
" on line %d" % line if line else ""),
UnknownKeywordWarning, stacklevel=2)
class _MultiStateSchemeHandler(Handler):
category = '_ihm_multi_state_scheme'
def __call__(self, id, name, details):
# Get the object or create the object
cur_mss = self.sysr.multi_state_schemes.get_by_id(id)
# Set the variables
self.copy_if_present(cur_mss, locals(), keys=('name', 'details'))
class _MultiStateSchemeConnectivityHandler(Handler):
category = '_ihm_multi_state_scheme_connectivity'
def __call__(self, id, scheme_id, begin_state_id, end_state_id,
dataset_group_id, details):
# Get the object or create the object
mssc = self.sysr.multi_state_scheme_connectivities.get_by_id(id)
# Add the content
mssc.begin_state = self.sysr.states.get_by_id(begin_state_id)
mssc.end_state = self.sysr.states.get_by_id_or_none(end_state_id)
mssc.dataset_group = \
self.sysr.dataset_groups.get_by_id_or_none(dataset_group_id)
mssc.details = details
# Get the MultiStateScheme
mss = self.sysr.multi_state_schemes.get_by_id(scheme_id)
# Add the connectivity to the scheme
mss.add_connectivity(mssc)
class _KineticRateHandler(Handler):
category = '_ihm_kinetic_rate'
def __call__(self, id,
transition_rate_constant,
equilibrium_constant,
equilibrium_constant_determination_method,
equilibrium_constant_unit,
details,
scheme_connectivity_id,
dataset_group_id,
external_file_id):
# Get the object or create the object
k = self.sysr.kinetic_rates.get_by_id(id)
# if information for an equilibrium is given, create an object
eq_const = None
if (equilibrium_constant is not None) \
and (equilibrium_constant_determination_method is not None):
if equilibrium_constant_determination_method == 'equilibrium ' \
'constant is ' \
'determined from '\
'population':
eq_const = \
ihm.multi_state_scheme.PopulationEquilibriumConstant(
value=equilibrium_constant,
unit=equilibrium_constant_unit)
elif equilibrium_constant_determination_method == 'equilibrium ' \
'constant is ' \
'determined ' \
'from kinetic ' \
'rates, ' \
'kAB/kBA':
eq_const = \
ihm.multi_state_scheme.KineticRateEquilibriumConstant(
value=equilibrium_constant,
unit=equilibrium_constant_unit)
else:
eq_const = \
ihm.multi_state_scheme.EquilibriumConstant(
value=equilibrium_constant,
unit=equilibrium_constant_unit)
# Add the content
k.transition_rate_constant = transition_rate_constant
k.equilibrium_constant = eq_const
k.details = details
k.dataset_group = \
self.sysr.dataset_groups.get_by_id_or_none(dataset_group_id)
k.external_file = \
self.sysr.external_files.get_by_id_or_none(external_file_id)
tmp_connectivities = self.sysr.multi_state_scheme_connectivities
mssc = tmp_connectivities.get_by_id(scheme_connectivity_id)
# Add the kinetic rate to the connectivity
mssc.kinetic_rate = k
class _RelaxationTimeHandler(Handler):
category = '_ihm_relaxation_time'
def __call__(self, id, value, unit, amplitude,
dataset_group_id, external_file_id, details):
# Get the object or create the object
r = self.sysr.relaxation_times.get_by_id(id)
# Add the content
r.value = value
r.unit = unit
r.amplitude = amplitude
r.dataset_group = \
self.sysr.dataset_groups.get_by_id_or_none(dataset_group_id)
r.external_file = \
self.sysr.external_files.get_by_id_or_none(external_file_id)
r.details = details
class _RelaxationTimeMultiStateSchemeHandler(Handler):
category = '_ihm_relaxation_time_multi_state_scheme'
def __init__(self, *args):
super().__init__(*args)
self._read_args = []
def __call__(self, id, relaxation_time_id,
scheme_id, scheme_connectivity_id,
details):
r = self.sysr.relaxation_times.get_by_id(relaxation_time_id)
mss = self.sysr.multi_state_schemes.get_by_id(scheme_id)
self._read_args.append((r, mss, scheme_connectivity_id, details))
def finalize(self):
for (r, mss, scheme_connectivity_id, details) in self._read_args:
tmp_connectivities = self.sysr.multi_state_scheme_connectivities
mssc = tmp_connectivities.get_by_id_or_none(scheme_connectivity_id)
# If the relaxation time is assigned to a connectivity,
# add it there
if mssc is not None:
mssc.relaxation_time = r
# Otherwise, add it to the multi-state scheme
else:
mss.add_relaxation_time(r)
# FLR part
# Note: This Handler is only here, because the category is officially
# still in the flr dictionary.
class _FLRChemDescriptorHandler(_ChemDescriptorHandler):
category = '_flr_chemical_descriptor'
class _FLRExperimentHandler(Handler):
category = '_flr_experiment'
def __call__(self, ordinal_id, id, instrument_id,
inst_setting_id, exp_condition_id,
sample_id, details):
# Get the object or create the object
experiment = self.sysr.flr_experiments.get_by_id(id)
# Fill the object
instrument = self.sysr.flr_instruments.get_by_id(instrument_id)
inst_setting = self.sysr.flr_inst_settings.get_by_id(inst_setting_id)
exp_condition = self.sysr.flr_exp_conditions.get_by_id(
exp_condition_id)
sample = self.sysr.flr_samples.get_by_id(sample_id)
experiment.add_entry(instrument=instrument, inst_setting=inst_setting,
exp_condition=exp_condition, sample=sample,
details=details)
class _FLRInstSettingHandler(Handler):
category = '_flr_inst_setting'
def __call__(self, id, details):
# Get the object or create the object
cur_inst_setting = self.sysr.flr_inst_settings.get_by_id(id)
# Set the variables
self.copy_if_present(cur_inst_setting, locals(), keys=('details',))
class _FLRExpConditionHandler(Handler):
category = '_flr_exp_condition'
def __call__(self, id, details):
# Get the object or create the object
cur_exp_condition = self.sysr.flr_exp_conditions.get_by_id(id)
# Set the variables
self.copy_if_present(cur_exp_condition, locals(), keys=('details',))
class _FLRInstrumentHandler(Handler):
category = '_flr_instrument'
def __call__(self, id, details):
# Get the object or create the object
cur_instrument = self.sysr.flr_instruments.get_by_id(id)
# Set the variables
self.copy_if_present(cur_instrument, locals(), keys=('details',))
class _FLREntityAssemblyHandler(Handler):
category = '_flr_entity_assembly'
def __call__(self, ordinal_id, assembly_id, entity_id, num_copies: int):
# Get the object or create the object
a = self.sysr.flr_entity_assemblies.get_by_id(assembly_id)
# Get the entity
entity = self.sysr.entities.get_by_id(entity_id)
# Add the entity to the entity assembly
a.add_entity(entity=entity, num_copies=num_copies)
class _FLRSampleConditionHandler(Handler):
category = '_flr_sample_condition'
def __call__(self, id, details):
# Get the object or create the object
cur_sample_condition = self.sysr.flr_sample_conditions.get_by_id(id)
# Set the variables
self.copy_if_present(cur_sample_condition, locals(), keys=('details',))
class _FLRSampleHandler(Handler):
category = '_flr_sample'
def __call__(self, id, entity_assembly_id, num_of_probes: int,
sample_condition_id, sample_description, sample_details,
solvent_phase):
sample = self.sysr.flr_samples.get_by_id(id)
sample.entity_assembly \
= self.sysr.flr_entity_assemblies.get_by_id(entity_assembly_id)
sample.num_of_probes = num_of_probes
sample.condition = cond \
= self.sysr.flr_sample_conditions.get_by_id(sample_condition_id)
self.copy_if_present(sample, locals(), keys=('solvent_phase',),
mapkeys={'sample_description': 'description',
'sample_details': 'details'})
class _FLRProbeListHandler(Handler):
category = '_flr_probe_list'
def __call__(self, probe_id, chromophore_name, reactive_probe_flag: bool,
reactive_probe_name, probe_origin, probe_link_type):
cur_probe = self.sysr.flr_probes.get_by_id(probe_id)
cur_probe.probe_list_entry = ihm.flr.ProbeList(
chromophore_name=chromophore_name,
reactive_probe_flag=reactive_probe_flag,
reactive_probe_name=reactive_probe_name,
probe_origin=probe_origin, probe_link_type=probe_link_type)
class _FLRSampleProbeDetailsHandler(Handler):
category = '_flr_sample_probe_details'
def __call__(self, sample_probe_id, sample_id, probe_id, fluorophore_type,
description, poly_probe_position_id):
spd = self.sysr.flr_sample_probe_details.get_by_id(sample_probe_id)
spd.sample = self.sysr.flr_samples.get_by_id(sample_id)
spd.probe = self.sysr.flr_probes.get_by_id(probe_id)
spd.poly_probe_position = self.sysr.flr_poly_probe_positions.get_by_id(
poly_probe_position_id)
spd.fluorophore_type = fluorophore_type
spd.description = description
class _FLRProbeDescriptorHandler(Handler):
category = '_flr_probe_descriptor'
def __call__(self, probe_id, reactive_probe_chem_descriptor_id,
chromophore_chem_descriptor_id, chromophore_center_atom):
react_cd = self.sysr.chem_descriptors.get_by_id_or_none(
reactive_probe_chem_descriptor_id)
chrom_cd = self.sysr.chem_descriptors.get_by_id_or_none(
chromophore_chem_descriptor_id)
cur_probe = self.sysr.flr_probes.get_by_id(probe_id)
cur_probe.probe_descriptor = ihm.flr.ProbeDescriptor(
reactive_probe_chem_descriptor=react_cd,
chromophore_chem_descriptor=chrom_cd,
chromophore_center_atom=chromophore_center_atom)
class _FLRPolyProbePositionHandler(Handler):
category = '_flr_poly_probe_position'
def _get_resatom(self, entity_id, asym_id, seq_id, atom_id):
entity = self.sysr.entities.get_by_id(entity_id)
asym = self.sysr.asym_units.get_by_id_or_none(asym_id)
if asym is not None:
asym.entity = entity
asym.id = asym_id
resatom = entity.residue(seq_id)
if asym is not None:
resatom.asym = asym
if atom_id:
resatom = resatom.atom(atom_id)
return resatom
def __call__(self, id, entity_id, asym_id, seq_id: int, atom_id,
mutation_flag: bool, modification_flag: bool, auth_name):
ppos = self.sysr.flr_poly_probe_positions.get_by_id(id)
ppos.resatom = self._get_resatom(entity_id, asym_id, seq_id, atom_id)
ppos.mutation_flag = mutation_flag
ppos.modification_flag = modification_flag
ppos.auth_name = auth_name
class _FLRPolyProbePositionModifiedHandler(Handler):
category = '_flr_poly_probe_position_modified'
def __call__(self, id, chem_descriptor_id):
ppos = self.sysr.flr_poly_probe_positions.get_by_id(id)
ppos.modified_chem_descriptor = \
self.sysr.chem_descriptors.get_by_id_or_none(chem_descriptor_id)
class _FLRPolyProbePositionMutatedHandler(Handler):
category = '_flr_poly_probe_position_mutated'
def __call__(self, id, chem_comp_id, atom_id):
ppos = self.sysr.flr_poly_probe_positions.get_by_id(id)
ppos.mutated_chem_comp_id = \
self.sysr.chem_comps.get_by_id(chem_comp_id)
class _FLRPolyProbeConjugateHandler(Handler):
category = '_flr_poly_probe_conjugate'
def __call__(self, id, sample_probe_id, chem_descriptor_id,
ambiguous_stoichiometry_flag: bool,
probe_stoichiometry: float):
ppc = self.sysr.flr_poly_probe_conjugates.get_by_id(id)
ppc.sample_probe = self.sysr.flr_sample_probe_details.get_by_id(
sample_probe_id)
ppc.chem_descriptor = self.sysr.chem_descriptors.get_by_id(
chem_descriptor_id)
ppc.ambiguous_stoichiometry = ambiguous_stoichiometry_flag
ppc.probe_stoichiometry = probe_stoichiometry
class _FLRFretForsterRadiusHandler(Handler):
category = '_flr_fret_forster_radius'
def __call__(self, id, donor_probe_id, acceptor_probe_id,
forster_radius: float, reduced_forster_radius: float):
ffr = self.sysr.flr_fret_forster_radius.get_by_id(id)
ffr.donor_probe = self.sysr.flr_probes.get_by_id(donor_probe_id)
ffr.acceptor_probe = self.sysr.flr_probes.get_by_id(acceptor_probe_id)
ffr.forster_radius = forster_radius
ffr.reduced_forster_radius = reduced_forster_radius
class _FLRFretCalibrationParametersHandler(Handler):
category = '_flr_fret_calibration_parameters'
def __call__(self, id, phi_acceptor: float, alpha: float, alpha_sd: float,
gg_gr_ratio: float, beta: float, gamma: float, delta: float,
a_b: float):
p = self.sysr.flr_fret_calibration_parameters.get_by_id(id)
p.phi_acceptor = phi_acceptor
p.alpha = alpha
p.alpha_sd = alpha_sd
p.gg_gr_ratio = gg_gr_ratio
p.beta = beta
p.gamma = gamma
p.delta = delta
p.a_b = a_b
class _FLRFretAnalysisHandler(Handler):
category = '_flr_fret_analysis'
def __call__(self, id, experiment_id, type,
sample_probe_id_1, sample_probe_id_2,
forster_radius_id, dataset_list_id,
external_file_id, software_id):
f = self.sysr.flr_fret_analyses.get_by_id(id)
f.experiment = self.sysr.flr_experiments.get_by_id(experiment_id)
f.type = type
f.sample_probe_1 = self.sysr.flr_sample_probe_details.get_by_id(
sample_probe_id_1)
f.sample_probe_2 = self.sysr.flr_sample_probe_details.get_by_id(
sample_probe_id_2)
f.forster_radius = self.sysr.flr_fret_forster_radius.get_by_id(
forster_radius_id)
f.dataset = self.sysr.datasets.get_by_id(dataset_list_id)
f.external_file = \
self.sysr.external_files.get_by_id_or_none(external_file_id)
f.software = self.sysr.software.get_by_id_or_none(software_id)
class _FLRFretAnalysisIntensityHandler(Handler):
category = '_flr_fret_analysis_intensity'
def __call__(self, ordinal_id, analysis_id,
calibration_parameters_id, donor_only_fraction: float,
chi_square_reduced: float, method_name, details):
f = self.sysr.flr_fret_analyses.get_by_id(analysis_id)
f.type = 'intensity-based'
f.calibration_parameters = \
self.sysr.flr_fret_calibration_parameters.get_by_id(
calibration_parameters_id)
f.donor_only_fraction = donor_only_fraction
f.chi_square_reduced = chi_square_reduced
f.method_name = method_name
f.details = details
class _FLRFretAnalysisLifetimeHandler(Handler):
category = '_flr_fret_analysis_lifetime'
def __call__(self, ordinal_id, analysis_id,
reference_measurement_group_id, lifetime_fit_model_id,
donor_only_fraction: float, chi_square_reduced: float,
method_name, details):
f = self.sysr.flr_fret_analyses.get_by_id(analysis_id)
f.type = 'lifetime-based'
f.ref_measurement_group \
= self.sysr.flr_ref_measurement_groups.get_by_id(
reference_measurement_group_id)
f.lifetime_fit_model = self.sysr.flr_lifetime_fit_models.get_by_id(
lifetime_fit_model_id)
f.donor_only_fraction = donor_only_fraction
f.chi_square_reduced = chi_square_reduced
f.method_name = method_name
f.details = details
class _FLRLifetimeFitModelHandler(Handler):
category = '_flr_lifetime_fit_model'
def __call__(self, id, name, description,
external_file_id, citation_id):
f = self.sysr.flr_lifetime_fit_models.get_by_id(id)
f.name = name
f.description = description
f.external_file = \
self.sysr.external_files.get_by_id_or_none(external_file_id)
f.citation = \
self.sysr.citations.get_by_id_or_none(citation_id)
class _FLRRefMeasurementHandler(Handler):
category = '_flr_reference_measurement'
def __call__(self, id, reference_sample_probe_id,
num_species, details):
r = self.sysr.flr_ref_measurements.get_by_id(id)
r.ref_sample_probe = self.sysr.flr_sample_probe_details.get_by_id(
reference_sample_probe_id)
r.details = details
class _FLRRefMeasurementGroupHandler(Handler):
category = '_flr_reference_measurement_group'
def __call__(self, id, num_measurements, details):
g = self.sysr.flr_ref_measurement_groups.get_by_id(id)
g.details = details
class _FLRRefMeasurementGroupLinkHandler(Handler):
category = '_flr_reference_measurement_group_link'
def __call__(self, group_id, reference_measurement_id):
g = self.sysr.flr_ref_measurement_groups.get_by_id(group_id)
r = self.sysr.flr_ref_measurements.get_by_id(reference_measurement_id)
g.add_ref_measurement(r)
class _FLRRefMeasurementLifetimeHandler(Handler):
category = '_flr_reference_measurement_lifetime'
def __call__(self, ordinal_id, reference_measurement_id,
species_name, species_fraction: float, lifetime: float):
lf = self.sysr.flr_ref_measurement_lifetimes.get_by_id(ordinal_id)
lf.species_name = species_name
lf.species_fraction = species_fraction
lf.lifetime = lifetime
# Add the lifetime to the reference measurement
r = self.sysr.flr_ref_measurements.get_by_id(reference_measurement_id)
r.add_lifetime(lf)
class _FLRPeakAssignmentHandler(Handler):
category = '_flr_peak_assignment'
def __call__(self, id, method_name, details):
p = self.sysr.flr_peak_assignments.get_by_id(id)
self.copy_if_present(p, locals(), keys=('method_name', 'details'))
class _FLRFretDistanceRestraintHandler(Handler):
category = '_flr_fret_distance_restraint'
def __call__(self, ordinal_id, id, group_id, sample_probe_id_1,
sample_probe_id_2, state_id, analysis_id, distance: float,
distance_error_plus: float, distance_error_minus: float,
distance_type, population_fraction: float,
peak_assignment_id):
r = self.sysr.flr_fret_distance_restraints.get_by_id(id)
r.sample_probe_1 = self.sysr.flr_sample_probe_details.get_by_id(
sample_probe_id_1)
r.sample_probe_2 = self.sysr.flr_sample_probe_details.get_by_id(
sample_probe_id_2)
r.state = self.sysr.states.get_by_id_or_none(state_id)
r.analysis = self.sysr.flr_fret_analyses.get_by_id(analysis_id)
r.peak_assignment = self.sysr.flr_peak_assignments.get_by_id(
peak_assignment_id)
r.distance = distance
r.distance_error_plus = distance_error_plus
r.distance_error_minus = distance_error_minus
r.distance_type = distance_type
r.population_fraction = population_fraction
# also create the fret_distance_restraint_group
rg = self.sysr.flr_fret_distance_restraint_groups.get_by_id(group_id)
rg.add_distance_restraint(r)
class _FLRFretModelQualityHandler(Handler):
category = '_flr_fret_model_quality'
def __call__(self, model_id, chi_square_reduced: float, dataset_group_id,
method, details):
q = self.sysr.flr_fret_model_qualities.get_by_id(model_id)
q.model = self.sysr.models.get_by_id(model_id)
q.chi_square_reduced = chi_square_reduced
q.dataset_group = self.sysr.dataset_groups.get_by_id(dataset_group_id)
self.copy_if_present(q, locals(), keys=('method', 'details'))
class _FLRFretModelDistanceHandler(Handler):
category = '_flr_fret_model_distance'
def __init__(self, *args):
super().__init__(*args)
self._read_args = []
def __call__(self, id, restraint_id, model_id, distance: float,
distance_deviation: float):
md = self.sysr.flr_fret_model_distances.get_by_id(id)
md.restraint = self.sysr.flr_fret_distance_restraints.get_by_id(
restraint_id)
md.model = self.sysr.models.get_by_id(model_id)
md.distance = distance
md.distance_deviation = distance_deviation
self._read_args.append(md)
def finalize(self):
for md in self._read_args:
md.calculate_deviation()
class _FLRFPSGlobalParameterHandler(Handler):
category = '_flr_fps_global_parameter'
def __call__(self, id, forster_radius_value: float,
conversion_function_polynom_order: int, repetition: int,
av_grid_rel: float, av_min_grid_a: float,
av_allowed_sphere: float, av_search_nodes: int,
av_e_samples_k: float, sim_viscosity_adjustment: float,
sim_dt_adjustment: float, sim_max_iter_k: float,
sim_max_force: float, sim_clash_tolerance_a: float,
sim_reciprocal_kt: float, sim_clash_potential,
convergence_e: float, convergence_k: float,
convergence_f: float, convergence_t: float):
p = self.sysr.flr_fps_global_parameters.get_by_id(id)
p.forster_radius = forster_radius_value
p.conversion_function_polynom_order = conversion_function_polynom_order
p.repetition = repetition
p.av_grid_rel = av_grid_rel
p.av_min_grid_a = av_min_grid_a
p.av_allowed_sphere = av_allowed_sphere
p.av_search_nodes = av_search_nodes
p.av_e_samples_k = av_e_samples_k
p.sim_viscosity_adjustment = sim_viscosity_adjustment
p.sim_dt_adjustment = sim_dt_adjustment
p.sim_max_iter_k = sim_max_iter_k
p.sim_max_force = sim_max_force
p.sim_clash_tolerance_a = sim_clash_tolerance_a
p.sim_reciprocal_kt = sim_reciprocal_kt
p.sim_clash_potential = sim_clash_potential
p.convergence_e = convergence_e
p.convergence_k = convergence_k
p.convergence_f = convergence_f
p.convergence_t = convergence_t
class _FLRFPSModelingHandler(Handler):
category = '_flr_fps_modeling'
def __call__(self, id, ihm_modeling_protocol_ordinal_id,
restraint_group_id, global_parameter_id,
probe_modeling_method, details):
m = self.sysr.flr_fps_modeling.get_by_id(id)
m.protocol = self.sysr.protocols.get_by_id(
ihm_modeling_protocol_ordinal_id)
m.restraint_group = \
self.sysr.flr_fret_distance_restraint_groups.get_by_id(
restraint_group_id)
m.global_parameter = self.sysr.flr_fps_global_parameters.get_by_id(
global_parameter_id)
self.copy_if_present(m, locals(),
keys=('probe_modeling_method', 'details'))
class _FLRFPSAVParameterHandler(Handler):
category = '_flr_fps_av_parameter'
def __call__(self, id, num_linker_atoms: int, linker_length: float,
linker_width: float, probe_radius_1: float,
probe_radius_2: float, probe_radius_3: float):
p = self.sysr.flr_fps_av_parameters.get_by_id(id)
p.num_linker_atoms = num_linker_atoms
p.linker_length = linker_length
p.linker_width = linker_width
p.probe_radius_1 = probe_radius_1
p.probe_radius_2 = probe_radius_2
p.probe_radius_3 = probe_radius_3
class _FLRFPSAVModelingHandler(Handler):
category = '_flr_fps_av_modeling'
def __call__(self, id, sample_probe_id, fps_modeling_id, parameter_id):
m = self.sysr.flr_fps_av_modeling.get_by_id(id)
m.fps_modeling = self.sysr.flr_fps_modeling.get_by_id(fps_modeling_id)
m.sample_probe = self.sysr.flr_sample_probe_details.get_by_id(
sample_probe_id)
m.parameter = self.sysr.flr_fps_av_parameters.get_by_id(parameter_id)
class _FLRFPSMPPHandler(Handler):
category = '_flr_fps_mean_probe_position'
def __call__(self, id, sample_probe_id, mpp_xcoord: float,
mpp_ycoord: float, mpp_zcoord: float):
p = self.sysr.flr_fps_mean_probe_positions.get_by_id(id)
p.sample_probe = self.sysr.flr_sample_probe_details.get_by_id(
sample_probe_id)
p.x = mpp_xcoord
p.y = mpp_ycoord
p.z = mpp_zcoord
class _FLRFPSMPPAtomPositionHandler(Handler):
category = '_flr_fps_mpp_atom_position'
def __call__(self, id, group_id, seq_id: int, atom_id, asym_id,
xcoord: float, ycoord: float, zcoord: float):
asym = self.sysr.asym_units.get_by_id(asym_id)
p = self.sysr.flr_fps_mpp_atom_positions.get_by_id(id)
p.atom = asym.residue(seq_id).atom(atom_id)
p.x = xcoord
p.y = ycoord
p.z = zcoord
g = self.sysr.flr_fps_mpp_atom_position_groups.get_by_id(group_id)
g.add_atom_position(p)
class _FLRFPSMPPModelingHandler(Handler):
category = '_flr_fps_mpp_modeling'
def __call__(self, ordinal_id, fps_modeling_id, mpp_id,
mpp_atom_position_group_id):
m = self.sysr.flr_fps_mpp_modeling.get_by_id(ordinal_id)
m.fps_modeling = self.sysr.flr_fps_modeling.get_by_id(fps_modeling_id)
m.mpp = self.sysr.flr_fps_mean_probe_positions.get_by_id(mpp_id)
m.mpp_atom_position_group = \
self.sysr.flr_fps_mpp_atom_position_groups.get_by_id(
mpp_atom_position_group_id)
class _FLRKineticRateFretAnalysisConnectionHandler(Handler):
category = '_flr_kinetic_rate_analysis'
def __call__(self, id, fret_analysis_id, kinetic_rate_id, details):
f = self.sysr.flr_fret_analyses.get_by_id(fret_analysis_id)
k = self.sysr.kinetic_rates.get_by_id(kinetic_rate_id)
c = self.sysr.flr_kinetic_rate_fret_analysis_connection.get_by_id(id)
c.fret_analysis = f
c.kinetic_rate = k
c.details = details
class _FLRRelaxationTimeFretAnalysisConnectionHandler(Handler):
category = '_flr_relaxation_time_analysis'
def __init__(self, *args):
super().__init__(*args)
self._read_args = []
def __call__(self, id, fret_analysis_id, relaxation_time_id, details):
f = self.sysr.flr_fret_analyses.get_by_id(fret_analysis_id)
r = self.sysr.relaxation_times.get_by_id(relaxation_time_id)
self._read_args.append((id, f, r, details))
def finalize(self):
for (id, f, r, details) in self._read_args:
tmp_connection = \
self.sysr.flr_relaxation_time_fret_analysis_connection
c = tmp_connection.get_by_id(id)
c.fret_analysis = f
c.relaxation_time = r
c.details = details
_flr_handlers = [_FLRChemDescriptorHandler, _FLRInstSettingHandler,
_FLRExpConditionHandler, _FLRInstrumentHandler,
_FLRSampleConditionHandler, _FLREntityAssemblyHandler,
_FLRSampleHandler, _FLRExperimentHandler,
_FLRProbeListHandler, _FLRProbeDescriptorHandler,
_FLRPolyProbePositionHandler,
_FLRPolyProbePositionModifiedHandler,
_FLRPolyProbePositionMutatedHandler,
_FLRSampleProbeDetailsHandler, _FLRPolyProbeConjugateHandler,
_FLRFretForsterRadiusHandler,
_FLRFretCalibrationParametersHandler, _FLRFretAnalysisHandler,
_FLRFretAnalysisIntensityHandler,
_FLRFretAnalysisLifetimeHandler, _FLRLifetimeFitModelHandler,
_FLRRefMeasurementHandler, _FLRRefMeasurementGroupHandler,
_FLRRefMeasurementGroupLinkHandler,
_FLRRefMeasurementLifetimeHandler, _FLRPeakAssignmentHandler,
_FLRFretDistanceRestraintHandler, _FLRFretModelQualityHandler,
_FLRFretModelDistanceHandler, _FLRFPSGlobalParameterHandler,
_FLRFPSModelingHandler, _FLRFPSAVParameterHandler,
_FLRFPSAVModelingHandler, _FLRFPSMPPHandler,
_FLRFPSMPPAtomPositionHandler, _FLRFPSMPPModelingHandler,
_FLRKineticRateFretAnalysisConnectionHandler,
_FLRRelaxationTimeFretAnalysisConnectionHandler]
class Variant:
"""Utility class to select the type of file to read with :func:`read`."""
#: Class to track global file information, e.g. :class:`SystemReader`
system_reader = None
def get_handlers(self, sysr):
"""Get the :class:`Handler` objects to use to parse input.
:param sysr: class to track global file information.
:type sysr: :class:`SystemReader`
:return: a list of :class:`Handler` objects.
"""
pass
def get_audit_conform_handler(self, sysr):
"""Get a :class:`Handler` to check the audit_conform table.
If :func:`read` is called with ``reject_old_file=True``, this
handler is used to check the audit_conform table and reject the
file if it is deemed to be too old.
:param sysr: class to track global file information.
:type sysr: :class:`SystemReader`
:return: a suitable handler.
:rtype: :class:`Handler`
"""
pass
class IHMVariant(Variant):
"""Used to select typical PDBx/IHM file input. See :func:`read`."""
system_reader = SystemReader
_handlers = [
_CollectionHandler, _StructHandler, _SoftwareHandler, _CitationHandler,
_DatabaseHandler, _DatabaseStatusHandler,
_AuditAuthorHandler, _AuditRevisionHistoryHandler,
_AuditRevisionDetailsHandler, _AuditRevisionGroupHandler,
_AuditRevisionCategoryHandler, _AuditRevisionItemHandler,
_DataUsageHandler, _GrantHandler, _CitationAuthorHandler,
_ChemCompHandler, _ChemDescriptorHandler, _EntityHandler,
_EntitySrcNatHandler, _EntitySrcGenHandler, _EntitySrcSynHandler,
_StructRefHandler, _StructRefSeqHandler, _StructRefSeqDifHandler,
_EntityPolyHandler, _EntityPolySeqHandler, _EntityNonPolyHandler,
_EntityPolySegmentHandler, _StructAsymHandler, _AssemblyDetailsHandler,
_AssemblyHandler, _ExtRefHandler, _ExtFileHandler, _DatasetListHandler,
_DatasetGroupHandler, _DatasetGroupLinkHandler, _DatasetExtRefHandler,
_DatasetDBRefHandler, _DataTransformationHandler,
_RelatedDatasetsHandler, _ModelRepresentationHandler,
_ModelRepresentationDetailsHandler, _StartingModelDetailsHandler,
_StartingComputationalModelsHandler, _StartingComparativeModelsHandler,
_ProtocolHandler, _ProtocolDetailsHandler, _PostProcessHandler,
_ModelListHandler, _ModelGroupHandler, _ModelGroupLinkHandler,
_ModelRepresentativeHandler,
_MultiStateHandler, _MultiStateLinkHandler, _EnsembleHandler,
_NotModeledResidueRangeHandler,
_DensityHandler, _SubsampleHandler, _EM3DRestraintHandler,
_EM2DRestraintHandler, _EM2DFittingHandler, _SASRestraintHandler,
_SphereObjSiteHandler, _AtomSiteHandler, _FeatureListHandler,
_PolyResidueFeatureHandler, _PolyAtomFeatureHandler,
_NonPolyFeatureHandler, _PseudoSiteFeatureHandler, _PseudoSiteHandler,
_DerivedDistanceRestraintHandler, _HDXRestraintHandler,
_PredictedContactRestraintHandler,
_CenterHandler, _TransformationHandler, _GeometricObjectHandler,
_SphereHandler, _TorusHandler, _HalfTorusHandler, _AxisHandler,
_PlaneHandler, _GeometricRestraintHandler, _PolySeqSchemeHandler,
_NonPolySchemeHandler, _BranchSchemeHandler, _EntityBranchListHandler,
_BranchDescriptorHandler, _BranchLinkHandler, _CrossLinkListHandler,
_CrossLinkRestraintHandler, _CrossLinkPseudoSiteHandler,
_CrossLinkResultParametersHandler,
_CrossLinkResultHandler, _StartingModelSeqDifHandler,
_OrderedModelHandler, _OrderedEnsembleHandler,
_MultiStateSchemeHandler, _MultiStateSchemeConnectivityHandler,
_KineticRateHandler,
_RelaxationTimeHandler, _RelaxationTimeMultiStateSchemeHandler
]
def get_handlers(self, sysr):
return [h(sysr) for h in self._handlers + _flr_handlers]
def get_audit_conform_handler(self, sysr):
return _AuditConformHandler(sysr)
def read(fh, model_class=ihm.model.Model, format='mmCIF', handlers=[],
warn_unknown_category=False, warn_unknown_keyword=False,
read_starting_model_coord=True,
starting_model_class=ihm.startmodel.StartingModel,
reject_old_file=False, variant=IHMVariant,
add_to_system=None):
"""Read data from the file handle `fh`.
Note that the reader currently expects to see a file compliant
with the PDBx and/or IHM dictionaries. It is not particularly tolerant
of noncompliant or incomplete files, and will probably throw an
exception rather than warning about and trying to handle such files.
Please `open an issue `_
if you encounter such a problem.
Files can be read in either the text-based mmCIF format or the BinaryCIF
format. The mmCIF reader works by breaking the file into tokens, and
using this stream of tokens to populate Python data structures.
Two tokenizers are available: a pure Python implementation and a
C-accelerated version. The C-accelerated version is much faster and
so is used if built. The BinaryCIF reader needs the msgpack Python
module to function.
The file handle should be opened in text mode for mmCIF files.
Traditionally, mmCIF files used ASCII encoding. More and more
recent files are UTF-8 encoded instead, but some use other encodings
such as latin-1. To handle most current files use something like::
try:
with open('input.cif', encoding='utf-8') as fh:
systems = ihm.reader.read(fh)
except UnicodeDecodeError:
with open('input.cif', encoding='latin-1') as fh:
systems = ihm.reader.read(fh)
The file handle should be opened in binary mode for BinaryCIF files::
with open('input.bcif', 'rb') as fh:
systems = ihm.reader.read(fh, format='BCIF')
:param file fh: The file handle to read from. (For BinaryCIF files,
the file should be opened in binary mode. For mmCIF files,
files opened in binary mode with Python 3 will be treated as
if they are Latin-1-encoded.)
:param model_class: The class to use to store model information (such
as coordinates). For use with other software, it is recommended
to subclass :class:`ihm.model.Model` and override
:meth:`~ihm.model.Model.add_sphere` and/or
:meth:`~ihm.model.Model.add_atom`, and provide that subclass
here. See :meth:`ihm.model.Model.get_spheres` for more
information.
:param str format: The format of the file. This can be 'mmCIF' (the
default) for the (text-based) mmCIF format or 'BCIF' for
BinaryCIF.
:param list handlers: A list of :class:`Handler` classes (not objects).
These can be used to read extra categories from the file.
:param bool warn_unknown_category: if set, emit an
:exc:`UnknownCategoryWarning` for each unknown category
encountered in the file.
:param bool warn_unknown_keyword: if set, emit an
:exc:`UnknownKeywordWarning` for each unknown keyword
(within an otherwise-handled category) encountered in the file.
:param bool read_starting_model_coord: if set, read coordinates for
starting models, if provided in the file.
:param starting_model_class: The class to use to store starting model
information. If `read_starting_model_coord` is also set, it
is recommended to subclass :class:`ihm.startmodel.StartingModel`
and override :meth:`~ihm.startmodel.StartingModel.add_atom`
and/or :meth:`~ihm.startmodel.StartingModel.add_seq_dif`.
:param bool reject_old_file: If True, raise an
:exc:`ihm.reader.OldFileError` if the file conforms to an
older version of the dictionary than this library supports
(by default the library will read what it can from the file).
:param variant: A class or object that selects the type of file to
read. This primarily controls the set of tables that are
read from the file. In most cases the default
:class:`IHMVariant` should be used.
:type variant: :class:`Variant`
:param add_to_system: If provided, all data read from the file are added
to the existing System, rather than being placed in new System
objects. This System must itself have previously been read from
a file (so that objects have IDs, which can be used to map data
in the new file to the existing System). Note however that this
will not handle duplicate IDs (it is intended for depositions
where the data are split between multiple files) so cannot be
used to combine two disparate mmCIF files into one.
:type add_to_system: :class:`ihm.System`
:return: A list of :class:`ihm.System` objects.
"""
if isinstance(variant, type):
variant = variant()
systems = []
reader_map = {'mmCIF': ihm.format.CifReader,
'BCIF': ihm.format_bcif.BinaryCifReader}
uchandler = _UnknownCategoryHandler() if warn_unknown_category else None
ukhandler = _UnknownKeywordHandler() if warn_unknown_keyword else None
r = reader_map[format](fh, {}, unknown_category_handler=uchandler,
unknown_keyword_handler=ukhandler)
while True:
if add_to_system:
s = variant.system_reader(model_class, starting_model_class,
system=add_to_system)
else:
# e.g. older ModelCIF's SystemReader doesn't support add_to_system
s = variant.system_reader(model_class, starting_model_class)
hs = variant.get_handlers(s) + [h(s) for h in handlers]
if reject_old_file:
hs.append(variant.get_audit_conform_handler(s))
if read_starting_model_coord:
hs.append(_StartingModelCoordHandler(s))
if uchandler:
uchandler.reset()
if ukhandler:
ukhandler.add_category_handlers(hs)
r.category_handler = dict((h.category, h) for h in hs)
more_data = r.read_file()
for h in hs:
h.finalize()
s.finalize()
_finalize_entities(s.system)
systems.append(s.system)
if not more_data:
break
return systems
python-ihm-2.7/ihm/reference.py 0000664 0000000 0000000 00000014273 15035733372 0016562 0 ustar 00root root 0000000 0000000 """Classes for providing extra information about an :class:`ihm.Entity`"""
import urllib.request
class Reference:
"""Base class for extra information about an :class:`ihm.Entity`.
This class is not used directly; instead, use a subclass such as
:class:`Sequence` or :class:`UniProtSequence`. These objects are
then typically passed to the :class:`ihm.Entity` constructor."""
pass
class Sequence(Reference):
"""Point to the sequence of an :class:`ihm.Entity` in a sequence database;
convenience subclasses are provided for common sequence databases such
as :class:`UniProtSequence`.
These objects are typically passed to the :class:`ihm.Entity`
constructor.
See also :attr:`alignments` to describe the correspondence between
the database and entity sequences.
:param str db_name: The name of the database.
:param str db_code: The name of the sequence in the database.
:param str accession: The database accession.
:param str sequence: The complete sequence, as a string of
one-letter codes.
:param str details: Longer text describing the sequence.
"""
def __init__(self, db_name, db_code, accession, sequence, details=None):
self.db_name, self.db_code = db_name, db_code
self.accession = accession
self.sequence, self.details = sequence, details
#: All alignments between the reference and entity sequences, as
#: :class:`Alignment` objects. If none are provided, a simple 1:1
#: alignment is assumed.
self.alignments = []
def _signature(self):
# Ignore "details"
return ((self.db_name, self.db_code, self.accession, self.sequence)
+ tuple(a._signature() for a in self.alignments))
def _get_alignments(self):
if self.alignments:
return self.alignments
elif not hasattr(self, '_default_alignment'):
self._default_alignment = Alignment()
return [self._default_alignment]
class UniProtSequence(Sequence):
"""Point to the sequence of an :class:`ihm.Entity` in UniProt.
These objects are typically passed to the :class:`ihm.Entity`
constructor.
:param str db_code: The UniProt name (e.g. NUP84_YEAST)
:param str accession: The UniProt accession (e.g. P52891)
See :class:`Sequence` for a description of the remaining parameters.
"""
_db_name = 'UNP'
def __init__(self, db_code, accession, sequence, details=None):
super().__init__(self._db_name, db_code, accession, sequence, details)
def __str__(self):
return "" % self.accession
@classmethod
def from_accession(cls, accession):
"""Create :class:`UniProtSequence` from just an accession.
This is done by querying the UniProt web API, so requires network
access.
:param str accession: The UniProt accession (e.g. P52891)
"""
# urlopen returns bytes
def decode(t):
return t.decode('ascii')
url = 'https://www.uniprot.org/uniprot/%s.fasta' % accession
with urllib.request.urlopen(url) as fh:
header = decode(fh.readline())
spl = header.split('|')
if len(spl) < 3 or spl[0] not in ('>sp', '>tr'):
raise ValueError("Cannot parse UniProt header %s" % header)
cd = spl[2].split(None, 1)
code = cd[0]
details = cd[1].rstrip('\r\n') if len(cd) > 1 else None
seq = decode(fh.read()).replace('\n', '')
return cls(code, accession, seq, details)
class Alignment:
"""A sequence range that aligns between the database and the entity.
This describes part of the sequence in the sequence database
(:class:`Sequence`) and in the :class:`ihm.Entity`. The two ranges
must be the same length and have the same primary sequence (any
differences must be described with :class:`SeqDif` objects).
:param int db_begin: The first residue in the database sequence
that is used (defaults to the entire sequence).
:param int db_end: The last residue in the database sequence
that is used (or None, the default, to use the entire sequence).
:param int entity_begin: The first residue in the :class:`~ihm.Entity`
sequence that is taken from the reference (defaults to the entire
entity sequence).
:param int entity_end: The last residue in the :class:`~ihm.Entity`
sequence that is taken from the reference (or None, the default,
to use the entire sequence).
:param seq_dif: Single-point mutations made to the sequence.
:type seq_dif: Sequence of :class:`SeqDif` objects.
"""
def __init__(self, db_begin=1, db_end=None, entity_begin=1,
entity_end=None, seq_dif=[]):
self.db_begin, self.db_end = db_begin, db_end
self.entity_begin, self.entity_end = entity_begin, entity_end
self.seq_dif = []
self.seq_dif.extend(seq_dif)
def _signature(self):
return ((self.db_begin, self.db_end, self.entity_begin,
self.entity_end)
+ tuple(s._signature() for s in self.seq_dif))
class SeqDif:
"""Annotate a sequence difference between a reference and entity sequence.
See :class:`Alignment`.
:param int seq_id: The residue index in the entity sequence.
:param db_monomer: The monomer type (as a :class:`~ihm.ChemComp` object)
in the reference sequence.
:type db_monomer: :class:`ihm.ChemComp`
:param monomer: The monomer type (as a :class:`~ihm.ChemComp` object)
in the entity sequence.
:type monomer: :class:`ihm.ChemComp`
:param str details: Descriptive text for the sequence difference.
"""
def __init__(self, seq_id, db_monomer, monomer, details=None):
self.seq_id, self.db_monomer = seq_id, db_monomer
self.monomer, self.details = monomer, details
def _signature(self):
# Don't ignore "details", as these distinguish insertions from
# deletions
return (self.seq_id, self.db_monomer, self.monomer, self.details)
python-ihm-2.7/ihm/report.py 0000664 0000000 0000000 00000015003 15035733372 0016127 0 ustar 00root root 0000000 0000000 """Helper classes to provide a summary report of an :class:`ihm.System`"""
import ihm
import sys
import warnings
import collections
class MissingDataWarning(UserWarning):
pass
class LocalFilesWarning(UserWarning):
pass
class MissingFileWarning(UserWarning):
pass
def _get_name(name):
if name:
return repr(name)
else:
return "(unnamed)"
class _SectionReporter:
def __init__(self, title, fh):
self.fh = fh
print("\n\n# " + title, file=self.fh)
def report(self, txt):
print(" " + str(txt), file=self.fh)
class Reporter:
def __init__(self, system, fh=sys.stdout):
self.system = system
self.fh = fh
def report(self):
print("Title: %s" % self.system.title, file=self.fh)
self.report_entities()
self.report_asyms()
self.report_representations()
self.report_databases()
self.report_files()
self.report_citations()
self.report_software()
self.report_protocols()
self.report_restraints()
self.report_models()
self.report_ensembles()
def _section(self, title):
return _SectionReporter(title, self.fh)
def report_entities(self):
r = self._section("Entities (unique sequences)")
asyms_for_entity = collections.defaultdict(list)
for a in self.system.asym_units:
asyms_for_entity[a.entity].append(a)
for e in self.system.entities:
asyms = asyms_for_entity[e]
r.report("- %s (length %d, %d instances, chain IDs %s)"
% (e.description, len(e.sequence), len(asyms),
", ".join(a.id for a in asyms)))
if len(e.references) == 0:
warnings.warn(
"No reference sequence (e.g. from UniProt) provided "
"for %s" % e, MissingDataWarning)
for ref in e.references:
r.report(" - from %s" % str(ref))
def report_asyms(self):
r = self._section("Asyms/chains")
for a in self.system.asym_units:
r.report("- %s (chain ID %s)" % (a.details, a.id))
def report_citations(self):
r = self._section("Publications cited")
for c in self.system._all_citations():
r.report('- "%s", %s, %s' % (c.title, c.journal, c.year))
def report_software(self):
r = self._section("Software used")
for s in ihm._remove_identical(self.system._all_software()):
if s.version is None:
r.report("- %s (no version given)" % s.name)
else:
r.report("- %s (version %s)" % (s.name, s.version))
if not s.citation:
warnings.warn(
"No citation provided for %s" % s, MissingDataWarning)
def report_databases(self):
r = self._section("External databases referenced")
for loc in ihm._remove_identical(self.system._all_locations()):
if isinstance(loc, ihm.location.DatabaseLocation):
r.report(" - %s accession %s"
% (loc.db_name, loc.access_code))
def report_files(self):
r = self._section("Additional files referenced")
locs_by_repo = collections.defaultdict(list)
for loc in ihm._remove_identical(self.system._all_locations()):
if not isinstance(loc, ihm.location.DatabaseLocation):
locs_by_repo[loc.repo].append(loc)
for repo, locs in locs_by_repo.items():
r.report("- %s" % ("DOI: " + repo.doi if repo else "Local files"))
for loc in locs:
r.report(" - %r, %s" % (loc.path, loc.details))
if None in locs_by_repo:
warnings.warn(
"The following local files are referenced (they will need to "
"be deposited in a database or with a DOI): %s"
% [loc.path for loc in locs_by_repo[None]], LocalFilesWarning)
def report_representations(self):
r = self._section("Model representation")
for rep in self.system._all_representations():
if hasattr(rep, '_id'):
r.report("- Representation %s" % rep._id)
else:
r.report("- Representation")
for segment in rep:
r.report(" - " + segment._get_report())
def report_protocols(self):
r = self._section("Modeling protocols")
for prot in self.system._all_protocols():
r.report("- " + (prot.name if prot.name else "Unnamed protocol"))
for step in prot.steps:
r.report(" - " + step._get_report())
for analysis in prot.analyses:
r.report(" - Analysis")
for step in analysis.steps:
r.report(" - " + step._get_report())
def report_restraints(self):
r = self._section("Restraints")
for rsr in ihm._remove_identical(self.system._all_restraints()):
r.report("- " + rsr._get_report())
def report_models(self):
r = self._section("Models")
for sg in self.system.state_groups:
r.report("- State group")
for state in sg:
r.report(" - State %s" % _get_name(state.name))
for mg in state:
r.report(" - Model group %s containing %d models"
% (_get_name(mg.name), len(mg)))
def report_ensembles(self):
r = self._section("Ensembles")
for e in self.system.ensembles:
r.report("- Ensemble %s containing %d models"
% (_get_name(e.name), e.num_models))
if e.model_group is not None:
r.report(" - From model group %s"
% _get_name(e.model_group.name))
if e.precision is not None:
r.report(" - Precision %.1f" % e.precision)
if e.file:
r.report(" - In external file %s" % e.file)
if (e.model_group is not None and not e.file
and e.num_models > len(e.model_group)):
warnings.warn(
"%s references more models (%d) than are deposited in "
"its model group, but does not reference an external file"
% (e, e.num_models), MissingFileWarning)
for d in e.densities:
asym = d.asym_unit
r.report(" - Localization density for %s %d-%d"
% (asym.details, asym.seq_id_range[0],
asym.seq_id_range[1]))
python-ihm-2.7/ihm/representation.py 0000664 0000000 0000000 00000015642 15035733372 0017667 0 ustar 00root root 0000000 0000000 """Classes for handling representation of the system during modeling.
"""
def _starting_model_report(seg):
if seg.starting_model:
if hasattr(seg.starting_model, '_id'):
return " (from starting model %s)" % seg.starting_model._id
else:
return " (from starting model)"
else:
return ""
class Segment:
"""Base class for part of a :class:`Representation`.
See :class:`AtomicSegment`, :class:`ResidueSegment`,
:class:`MultiResidueSegment`, and :class:`FeatureSegment`.
"""
def _get_report(self):
"""Return a textual description of the object, used by
:meth:`ihm.System.report`"""
return str(self)
class AtomicSegment(Segment):
"""Part of the system modeled atomistically, stored in
a :class:`Representation`.
:param asym_unit: The asymmetric unit (or part of one) that
this segment represents.
:type asym_unit: :class:`~ihm.AsymUnit` or :class:`~ihm.AsymUnitRange`
:param bool rigid: Whether internal coordinates of the segment were
fixed during modeling.
:param starting_model: initial coordinates used for the segment
(or None).
:type starting_model: :class:`~ihm.startmodel.StartingModel`
:param str description: Additional text describing this segment.
"""
primitive = 'atomistic'
count = None
granularity = 'by-atom'
def _get_report(self):
asym = self.asym_unit
return ("%s %d-%d as %s atoms%s"
% (asym.details, asym.seq_id_range[0], asym.seq_id_range[1],
"rigid" if self.rigid else "flexible",
_starting_model_report(self)))
def __init__(self, asym_unit, rigid, starting_model=None,
description=None):
self.asym_unit = asym_unit
self.starting_model, self.rigid = starting_model, rigid
self.description = description
class ResidueSegment(Segment):
"""Part of the system modeled as a set of residues, stored in
a :class:`Representation`.
:param asym_unit: The asymmetric unit (or part of one) that
this segment represents.
:type asym_unit: :class:`~ihm.AsymUnit` or :class:`~ihm.AsymUnitRange`
:param bool rigid: Whether internal coordinates of the segment were
fixed during modeling.
:param str primitive: The type of object used to represent this segment
(sphere/gaussian/other).
:param starting_model: initial coordinates used for the segment
(or None).
:type starting_model: :class:`~ihm.startmodel.StartingModel`
:param str description: Additional text describing this segment.
"""
count = None
granularity = 'by-residue'
def _get_report(self):
asym = self.asym_unit
return ("%s %d-%d as %s residues%s"
% (asym.details, asym.seq_id_range[0], asym.seq_id_range[1],
"rigid" if self.rigid else "flexible",
_starting_model_report(self)))
def __init__(self, asym_unit, rigid, primitive, starting_model=None,
description=None):
self.asym_unit = asym_unit
self.primitive = primitive
self.starting_model, self.rigid = starting_model, rigid
self.description = description
class MultiResidueSegment(Segment):
"""Part of the system modeled as a single object representing a
range of residues, stored in a :class:`Representation`.
:param asym_unit: The asymmetric unit (or part of one) that
this segment represents.
:type asym_unit: :class:`~ihm.AsymUnit` or :class:`~ihm.AsymUnitRange`
:param bool rigid: Whether internal coordinates of the segment were
fixed during modeling.
:param str primitive: The type of object used to represent this segment
(sphere/gaussian/other).
:param starting_model: initial coordinates used for the segment
(or None).
:type starting_model: :class:`~ihm.startmodel.StartingModel`
:param str description: Additional text describing this segment.
"""
count = None
granularity = 'multi-residue'
def __init__(self, asym_unit, rigid, primitive, starting_model=None,
description=None):
self.asym_unit = asym_unit
self.primitive = primitive
self.starting_model, self.rigid = starting_model, rigid
self.description = description
class FeatureSegment(Segment):
"""Part of the system modeled as a number of geometric features,
stored in a :class:`Representation`.
:param asym_unit: The asymmetric unit (or part of one) that
this segment represents.
:type asym_unit: :class:`~ihm.AsymUnit` or :class:`~ihm.AsymUnitRange`
:param bool rigid: Whether internal coordinates of the segment were
fixed during modeling.
:param str primitive: The type of object used to represent this segment
(sphere/gaussian/other).
:param int count: The number of objects used to represent this segment.
:param starting_model: initial coordinates used for the segment
(or None).
:type starting_model: :class:`~ihm.startmodel.StartingModel`
:param str description: Additional text describing this segment.
"""
granularity = 'by-feature'
def _get_report(self):
asym = self.asym_unit
return ("%s %d-%d as %d %s feature%s (%s)%s"
% (asym.details, asym.seq_id_range[0], asym.seq_id_range[1],
self.count, "rigid" if self.rigid else "flexible",
"" if self.count == 1 else "s", self.primitive,
_starting_model_report(self)))
def __init__(self, asym_unit, rigid, primitive, count, starting_model=None,
description=None):
self.asym_unit = asym_unit
self.primitive, self.count = primitive, count
self.starting_model, self.rigid = starting_model, rigid
self.description = description
class Representation(list):
"""Part of the system modeled as a set of geometric objects, such as
spheres or atoms. This is implemented as a simple list of
:class:`Segment` objects.
:param sequence elements: Initial set of segments.
:param str name: A short descriptive name.
:param str details: A longer description of the representation.
Typically a Representation is assigned to a
:class:`~ihm.model.Model`. See also
:attr:`ihm.System.orphan_representations`.
Multiple representations of the same system are possible (multi-scale).
"""
# For backwards compatibility with earlier versions of this class which
# didn't specify name/details
name = details = None
# todo: use set rather than list?
def __init__(self, elements=(), name=None, details=None):
super().__init__(elements)
self.name, self.details = name, details
python-ihm-2.7/ihm/restraint.py 0000664 0000000 0000000 00000110441 15035733372 0016631 0 ustar 00root root 0000000 0000000 """Classes for handling restraints on the system.
"""
import ihm
class PseudoSite:
"""Selection of a pseudo position in the system.
Pseudo positions are typically used to reference a point or sphere
that is not explicitly represented, in a :class:`PseudoSiteFeature`
or :class:`CrossLinkPseudoSite`.
:param float x: Cartesian X coordinate of this site.
:param float y: Cartesian Y coordinate of this site.
:param float z: Cartesian Z coordinate of this site.
:param float radius: Radius of the site, if applicable.
:param str description: Additional text describing this feature.
"""
def __init__(self, x, y, z, radius=None, description=None):
self.x, self.y, self.z = x, y, z
self.radius = radius
self.description = description
def _signature(self):
return tuple("%.3f" % v if v else None
for v in (self.x, self.y, self.z, self.radius))
class Restraint:
"""Base class for all restraints.
See :attr:`ihm.System.restraints`.
"""
def _get_report(self):
return str(self)
class RestraintGroup(list):
"""A set of related :class:`Restraint` objects.
This is implemented as a simple list.
Note that due to limitations of the underlying dictionary, only
certain combinations of restraints can be placed in groups.
In particular, all objects in a group must be of the same type, and
only certain types (currently only :class:`DerivedDistanceRestraint`
and :class:`PredictedContactRestraint`) can be grouped.
Empty groups can be created, but will be ignored on output as the
dictionary does not support them.
Restraint groups should be stored in the system by adding them to
:attr:`ihm.System.restraint_groups`.
"""
pass
class EM3DRestraint(Restraint):
"""Restrain part of the system to match an electron microscopy density map.
:param dataset: Reference to the density map data (usually
an :class:`~ihm.dataset.EMDensityDataset`).
:type dataset: :class:`~ihm.dataset.Dataset`
:param assembly: The part of the system that is fit into the map.
:type assembly: :class:`~ihm.Assembly`
:param bool segment: True iff the map has been segmented.
:param str fitting_method: The method used to fit the model
into the map.
:param fitting_method_citation: The publication describing the fitting
method.
:type fitting_method_citation: :class:`~ihm.Citation`
:param int number_of_gaussians: Number of Gaussians used to represent
the map as a Gaussian Mixture Model (GMM), if applicable.
:param str details: Additional details regarding the fitting.
"""
def _get_report(self):
ret = "Fit to 3D electron microscopy density map"
if self.fitting_method:
ret += " using " + self.fitting_method
return ret
def __init__(self, dataset, assembly, segment=None, fitting_method=None,
fitting_method_citation=None, number_of_gaussians=None,
details=None):
self.dataset, self.assembly = dataset, assembly
self.segment, self.fitting_method = segment, fitting_method
self.fitting_method_citation = fitting_method_citation
self.number_of_gaussians = number_of_gaussians
self.details = details
#: Information about the fit of each model to this restraint's data.
#: This is a Python dict where keys are :class:`~ihm.model.Model`
#: objects and values are :class:`EM3DRestraintFit` objects.
self.fits = {}
class EM3DRestraintFit:
"""Information on the fit of a model to an :class:`EM3DRestraint`.
See :attr:`EM3DRestaint.fits`.
:param float cross_correlation_coefficient: The fit between the model
and the map.
"""
__slots__ = ["cross_correlation_coefficient"] # Reduce memory usage
def __init__(self, cross_correlation_coefficient=None):
self.cross_correlation_coefficient = cross_correlation_coefficient
class SASRestraint(Restraint):
"""Restrain part of the system to match small angle scattering (SAS) data.
:param dataset: Reference to the SAS data (usually
an :class:`~ihm.dataset.SASDataset`).
:type dataset: :class:`~ihm.dataset.Dataset`
:param assembly: The part of the system that is fit against SAS data.
:type assembly: :class:`~ihm.Assembly`
:param bool segment: True iff the SAS profile has been segmented.
:param str fitting_method: The method used to fit the model against the
SAS data (e.g. FoXS, DAMMIF).
:param str fitting_atom_type: The set of atoms fit against the data
(e.g. "Heavy atoms", "All atoms").
:param bool multi_state: Whether multiple state fitting was done.
:param float radius_of_gyration: Radius of gyration obtained from the
SAS profile, if used as part of the restraint.
:param str details: Additional details regarding the fitting.
"""
def _get_report(self):
state_map = {True: "Multi-state ", False: "Single-state "}
ret = "%sSAS restraint" % state_map.get(self.multi_state, "")
if self.fitting_atom_type:
ret += " on " + self.fitting_atom_type
return ret
def __init__(self, dataset, assembly, segment=None, fitting_method=None,
fitting_atom_type=None, multi_state=None,
radius_of_gyration=None, details=None):
self.dataset, self.assembly = dataset, assembly
self.segment, self.fitting_method = segment, fitting_method
self.fitting_atom_type = fitting_atom_type
self.multi_state = multi_state
self.radius_of_gyration = radius_of_gyration
self.details = details
#: Information about the fit of each model to this restraint's data.
#: This is a Python dict where keys are :class:`~ihm.model.Model`
#: objects and values are :class:`SASRestraintFit` objects.
self.fits = {}
class SASRestraintFit:
"""Information on the fit of a model to a :class:`SASRestraint`.
See :attr:`SASRestaint.fits`.
:param float chi_value: The fit between the model and the SAS data.
"""
__slots__ = ["chi_value"] # Reduce memory usage
def __init__(self, chi_value=None):
self.chi_value = chi_value
class EM2DRestraint(Restraint):
"""Restrain part of the system to match an electron microscopy class
average.
:param dataset: Reference to the class average data (usually
an :class:`~ihm.dataset.EM2DClassDataset`).
:type dataset: :class:`~ihm.dataset.Dataset`
:param assembly: The part of the system that is fit against the class.
:type assembly: :class:`~ihm.Assembly`
:param bool segment: True iff the image has been segmented.
:param int number_raw_micrographs: The number of particles picked from
the original raw micrographs that were used to create the
class average.
:param float pixel_size_width: Width of each pixel in the image, in
angstroms.
:param float pixel_size_height: Height of each pixel in the image, in
angstroms.
:param float image_resolution: Resolution of the image, in angstroms.
:param int number_of_projections: Number of projections of the assembly
used to fit against the image, if applicable.
:param str details: Additional details regarding the fitting.
"""
def _get_report(self):
return "Fit to 2D electron microscopy class average"
def __init__(self, dataset, assembly, segment=None,
number_raw_micrographs=None, pixel_size_width=None,
pixel_size_height=None, image_resolution=None,
number_of_projections=None, details=None):
self.dataset, self.assembly = dataset, assembly
self.segment = segment
self.number_raw_micrographs = number_raw_micrographs
self.pixel_size_width = pixel_size_width
self.pixel_size_height = pixel_size_height
self.image_resolution = image_resolution
self.number_of_projections = number_of_projections
self.details = details
#: Information about the fit of each model to this restraint's data.
#: This is a Python dict where keys are :class:`~ihm.model.Model`
#: objects and values are :class:`EM2DRestraintFit` objects.
self.fits = {}
class EM2DRestraintFit:
"""Information on the fit of a model to an :class:`EM2DRestraint`.
See :attr:`EM2DRestaint.fits`.
:param float cross_correlation_coefficient: The fit between the model
and the class average.
:param rot_matrix: Rotation matrix (as a 3x3 array of floats) that
places the model on the image.
:param tr_vector: Translation vector (as a 3-element float list) that
places the model on the image.
"""
__slots__ = ["cross_correlation_coefficient",
"rot_matrix", "tr_vector"] # Reduce memory usage
def __init__(self, cross_correlation_coefficient=None,
rot_matrix=None, tr_vector=None):
self.cross_correlation_coefficient = cross_correlation_coefficient
self.rot_matrix, self.tr_vector = rot_matrix, tr_vector
class CrossLinkRestraint(Restraint):
"""Restrain part of the system to match a set of cross-links.
:param dataset: Reference to the cross-link data (usually
a :class:`~ihm.dataset.CXMSDataset`).
:type dataset: :class:`~ihm.dataset.Dataset`
:param linker: The type of chemical linker used.
:type linker: :class:`ihm.ChemDescriptor`
"""
assembly = None # no struct_assembly_id for XL restraints
def _get_report(self):
return ("%d %s cross-links from %d experimental identifications"
% (len(self.cross_links), self.linker.auth_name,
sum(len(x) for x in self.experimental_cross_links)))
def __init__(self, dataset, linker):
self.dataset, self.linker = dataset, linker
#: All cross-links identified in the experiment, as a simple list
#: of lists of :class:`ExperimentalCrossLink` objects. All cross-links
#: in the same sublist are treated as experimentally ambiguous. For
#: example, xl2 and xl3 here are considered ambiguous::
#:
#: restraint.experimental_cross_links.append([xl1])
#: restraint.experimental_cross_links.append([xl2, xl3])
self.experimental_cross_links = []
#: All cross-links used in the modeling, as a list of
#: :class:`CrossLink` objects.
self.cross_links = []
class ExperimentalCrossLink:
"""A cross-link identified in the experiment.
These objects, once created, should be added to
the :attr:`CrossLinkRestraint.experimental_cross_links` list.
:param residue1: The first residue linked by the cross-link.
:type residue1: :class:`ihm.Residue`
:param residue2: The second residue linked by the cross-link.
:type residue2: :class:`ihm.Residue`
:param str details: Additional text describing the cross-link.
"""
def __init__(self, residue1, residue2, details=None):
self.residue1, self.residue2 = residue1, residue2
self.details = details
class DistanceRestraint:
"""Base class for all distance restraints. These are typically
used in a :class:`DerivedDistanceRestraint`.
Do not use this class directly but instead use a derived class
such as :class:`HarmonicDistanceRestraint`,
:class:`UpperBoundDistanceRestraint`,
:class:`LowerBoundDistanceRestraint`,
or :class:`LowerUpperBoundDistanceRestraint`.
"""
restraint_type = None
#: The minimum distance allowed for this restraint,
#: or None if unconstrained
distance_lower_limit = None
#: The maximum distance allowed for this restraint,
#: or None if unconstrained
distance_upper_limit = None
class HarmonicDistanceRestraint(DistanceRestraint):
"""Harmonically restrain two objects to be close to a given distance apart.
These objects are typically used in a :class:`DerivedDistanceRestraint`.
:param float distance: Equilibrium distance
"""
restraint_type = 'harmonic'
def __init__(self, distance):
self.distance = distance
#: The equilibrium distance
distance_lower_limit = property(lambda self: self.distance)
#: The equilibrium distance
distance_upper_limit = distance_lower_limit
class UpperBoundDistanceRestraint(DistanceRestraint):
"""Harmonically restrain two objects to be below a given distance apart.
These objects are typically used in a :class:`DerivedDistanceRestraint`.
:param float distance: Distance threshold
"""
restraint_type = 'upper bound'
def __init__(self, distance):
self.distance = distance
#: The maximum distance allowed by this restraint
distance_upper_limit = property(lambda self: self.distance)
#: Minimum distance (unconstrained, so always None)
distance_lower_limit = None
class LowerBoundDistanceRestraint(DistanceRestraint):
"""Harmonically restrain two objects to be above a given distance apart.
These objects are typically used in a :class:`DerivedDistanceRestraint`.
:param float distance: Distance threshold
"""
restraint_type = 'lower bound'
def __init__(self, distance):
self.distance = distance
#: The minimum distance allowed by this restraint
distance_lower_limit = property(lambda self: self.distance)
#: Maximum distance (unconstrained, so always None)
distance_upper_limit = None
class LowerUpperBoundDistanceRestraint(DistanceRestraint):
"""Harmonically restrain two objects to be above a given distance
and below another distance apart.
These objects are typically used in a :class:`DerivedDistanceRestraint`.
:param float distance_lower_limit: Lower bound on the distance.
:param float distance_upper_limit: Upper bound on the distance.
"""
restraint_type = 'lower and upper bound'
def __init__(self, distance_lower_limit, distance_upper_limit):
#: The minimum distance allowed by this restraint
self.distance_lower_limit = distance_lower_limit
#: The maximum distance allowed by this restraint
self.distance_upper_limit = distance_upper_limit
class CrossLink:
"""Base class for all cross-links used in the modeling.
Do not use this class directly, but instead use a subclass:
:class:`ResidueCrossLink`, :class:`AtomCrossLink`,
or :class:`FeatureCrossLink`."""
pass
class CrossLinkPseudoSite:
"""Pseudo site corresponding to one end of a cross-link.
These objects are used when the end of a cross-link is not represented
in the model but its position is known (e.g. it may have been
approximated given the position of nearby residues). They are passed
as the ``pseudo1`` or ``pseudo2`` arguments to :class:`CrossLink`
subclasses.
:param site: The pseudo site coordinates
:type site: :class:`PseudoSite`
:param model: The model in whose coordinate system the pseudo site
is active (if not specified, the coordinates are assumed to
be valid for all models using this cross-link).
:type model: :class:`ihm.model.Model`
"""
def __init__(self, site, model=None):
self.site, self.model = site, model
class ResidueCrossLink(CrossLink):
"""A cross-link used in the modeling, applied to residue
alpha carbon atoms.
These objects, once created, should be added to
the :attr:`CrossLinkRestraint.cross_links` list.
:param experimental_cross_link: The corresponding cross-link identified
by experiment. Multiple cross-links can map to a single
experimental identification.
:type experimental_cross_link: :class:`ExperimentalCrossLink`
:param asym1: The asymmetric unit containing the first linked residue.
:type asym1: :class:`ihm.AsymUnit`
:param asym2: The asymmetric unit containing the second linked residue.
:type asym2: :class:`ihm.AsymUnit`
:param distance: Restraint on the distance.
:type distance: :class:`DistanceRestraint`
:param float psi: Initial uncertainty in the experimental data.
:param float sigma1: Initial uncertainty in the position of the first
residue.
:param float sigma2: Initial uncertainty in the position of the second
residue.
:param bool restrain_all: If True, all cross-links are restrained.
:param pseudo1: List of pseudo sites representing the position of the
first residue (if applicable).
:type pseudo1: List of :class:`CrossLinkPseudoSite`
:param pseudo2: List of pseudo sites representing the position of the
second residue (if applicable).
:type pseudo2: List of :class:`CrossLinkPseudoSite`
"""
granularity = 'by-residue'
atom1 = atom2 = None
def __init__(self, experimental_cross_link, asym1, asym2, distance,
psi=None, sigma1=None, sigma2=None, restrain_all=None,
pseudo1=None, pseudo2=None):
self.experimental_cross_link = experimental_cross_link
self.asym1, self.asym2 = asym1, asym2
self.psi, self.sigma1, self.sigma2 = psi, sigma1, sigma2
self.distance, self.restrain_all = distance, restrain_all
self.pseudo1, self.pseudo2 = pseudo1, pseudo2
#: Information about the fit of models or groups to this cross-link.
#: This is a Python dict where keys can be :class:`~ihm.model.Model`
#: objects (with corresponding values as :class:`CrossLinkFit` objects)
#: or :class:`~ihm.model.ModelGroup` or class:`~ihm.model.Ensemble`
#: objects (with corresponding values as :class:`CrossLinkGroupFit`
#: objects).
self.fits = {}
def _get_residue1(self):
seq_id = self.experimental_cross_link.residue1.seq_id
return self.asym1.residue(seq_id)
residue1 = property(_get_residue1,
doc="Residue object representing one end "
"of the cross-link")
def _get_residue2(self):
seq_id = self.experimental_cross_link.residue2.seq_id
return self.asym2.residue(seq_id)
residue2 = property(_get_residue2,
doc="Residue object representing one end "
"of the cross-link")
class FeatureCrossLink(CrossLink):
"""A cross-link used in the modeling, applied to the closest primitive
object with the highest resolution.
These objects, once created, should be added to
the :attr:`CrossLinkRestraint.cross_links` list.
:param experimental_cross_link: The corresponding cross-link identified
by experiment. Multiple cross-links can map to a single
experimental identification.
:type experimental_cross_link: :class:`ExperimentalCrossLink`
:param asym1: The asymmetric unit containing the first linked residue.
:type asym1: :class:`ihm.AsymUnit`
:param asym2: The asymmetric unit containing the second linked residue.
:type asym2: :class:`ihm.AsymUnit`
:param distance: Restraint on the distance.
:type distance: :class:`DistanceRestraint`
:param float psi: Initial uncertainty in the experimental data.
:param float sigma1: Initial uncertainty in the position of the first
residue.
:param float sigma2: Initial uncertainty in the position of the second
residue.
:param bool restrain_all: If True, all cross-links are restrained.
:param pseudo1: List of pseudo sites representing the position of the
first residue (if applicable).
:type pseudo1: List of :class:`CrossLinkPseudoSite`
:param pseudo2: List of pseudo sites representing the position of the
second residue (if applicable).
:type pseudo2: List of :class:`CrossLinkPseudoSite`
"""
granularity = 'by-feature'
atom1 = atom2 = None
def __init__(self, experimental_cross_link, asym1, asym2, distance,
psi=None, sigma1=None, sigma2=None, restrain_all=None,
pseudo1=None, pseudo2=None):
self.experimental_cross_link = experimental_cross_link
self.asym1, self.asym2 = asym1, asym2
self.psi, self.sigma1, self.sigma2 = psi, sigma1, sigma2
self.distance, self.restrain_all = distance, restrain_all
self.pseudo1, self.pseudo2 = pseudo1, pseudo2
#: Information about the fit of models or groups to this cross-link.
#: This is a Python dict where keys can be :class:`~ihm.model.Model`
#: objects (with corresponding values as :class:`CrossLinkFit` objects)
#: or :class:`~ihm.model.ModelGroup` or class:`~ihm.model.Ensemble`
#: objects (with corresponding values as :class:`CrossLinkGroupFit`
#: objects).
self.fits = {}
class AtomCrossLink(CrossLink):
"""A cross-link used in the modeling, applied to the specified atoms.
These objects, once created, should be added to
the :attr:`CrossLinkRestraint.cross_links` list.
:param experimental_cross_link: The corresponding cross-link identified
by experiment. Multiple cross-links can map to a single
experimental identification.
:type experimental_cross_link: :class:`ExperimentalCrossLink`
:param asym1: The asymmetric unit containing the first linked residue.
:type asym1: :class:`ihm.AsymUnit`
:param asym2: The asymmetric unit containing the second linked residue.
:type asym2: :class:`ihm.AsymUnit`
:param str atom1: The name of the first linked atom.
:param str atom2: The name of the second linked atom.
:param distance: Restraint on the distance.
:type distance: :class:`DistanceRestraint`
:param float psi: Initial uncertainty in the experimental data.
:param float sigma1: Initial uncertainty in the position of the first
residue.
:param float sigma2: Initial uncertainty in the position of the second
residue.
:param bool restrain_all: If True, all cross-links are restrained.
:param pseudo1: List of pseudo sites representing the position of the
first residue (if applicable).
:type pseudo1: List of :class:`CrossLinkPseudoSite`
:param pseudo2: List of pseudo sites representing the position of the
second residue (if applicable).
:type pseudo2: List of :class:`CrossLinkPseudoSite`
"""
granularity = 'by-atom'
def __init__(self, experimental_cross_link, asym1, asym2, atom1, atom2,
distance, psi=None, sigma1=None, sigma2=None,
restrain_all=None, pseudo1=None, pseudo2=None):
self.experimental_cross_link = experimental_cross_link
self.asym1, self.asym2 = asym1, asym2
self.atom1, self.atom2 = atom1, atom2
self.psi, self.sigma1, self.sigma2 = psi, sigma1, sigma2
self.distance, self.restrain_all = distance, restrain_all
self.pseudo1, self.pseudo2 = pseudo1, pseudo2
#: Information about the fit of models or groups to this cross-link.
#: This is a Python dict where keys can be :class:`~ihm.model.Model`
#: objects (with corresponding values as :class:`CrossLinkFit` objects)
#: or :class:`~ihm.model.ModelGroup` or class:`~ihm.model.Ensemble`
#: objects (with corresponding values as :class:`CrossLinkGroupFit`
#: objects).
self.fits = {}
class CrossLinkFit:
"""Information on the fit of a single model to a :class:`CrossLink`.
See :attr:`ResidueCrossLink.fits`, :attr:`AtomCrossLink.fits`, or
:attr:`FeatureCrossLink.fits`.
See also :class:`CrossLinkGroupFit` for information on the fit of
a model group or ensemble in aggregate to the cross-link.
:param float psi: Uncertainty in the experimental data.
:param float sigma1: Uncertainty in the position of the first residue.
:param float sigma2: Uncertainty in the position of the second residue.
"""
__slots__ = ["psi", "sigma1", "sigma2"] # Reduce memory usage
def __init__(self, psi=None, sigma1=None, sigma2=None):
self.psi, self.sigma1, self.sigma2 = psi, sigma1, sigma2
class CrossLinkGroupFit:
"""Information on the fit of a :class:`~ihm.model.ModelGroup` or
:class:`~ihm.model.Ensemble` in aggregate to a :class:`CrossLink`.
See :attr:`ResidueCrossLink.fits`, :attr:`AtomCrossLink.fits`, or
:attr:`FeatureCrossLink.fits`.
See also :class:`CrossLinkFit` for information on the fit of
a single model to the cross-link.
:param float median_distance: Actual median cross-link distance in the
sampled models.
:param int num_models: Number of models sampled, for which the
median distance is provided.
:param str details: More information on the fit.
"""
__slots__ = ["median_distance", "num_models",
"details"] # Reduce memory usage
def __init__(self, median_distance, num_models=None, details=None):
self.median_distance = median_distance
self.num_models, self.details = num_models, details
class Feature:
"""Base class for selecting parts of the system that a restraint acts on.
This class should not be used itself; instead,
see :class:`ResidueFeature`, :class:`AtomFeature`,
:class:`NonPolyFeature`, and :class:`PseudoSiteFeature`.
Features are typically assigned to one or more
:class:`~ihm.restraint.GeometricRestraint` or
:class:`~ihm.restraint.DerivedDistanceRestraint` objects.
"""
details = None
type = ihm.unknown
def _all_entities_or_asyms(self):
# Get all Entities or AsymUnits referenced by this object
return []
def _get_entity_type(self):
return ihm.unknown
class ResidueFeature(Feature):
"""Selection of one or more residues from the system.
Residues can be selected from both :class:`ihm.AsymUnit` and
:class:`ihm.Entity` (the latter implies that it selects residues
in all instances of that entity). Individual residues can
also be selected by passing :class:`ihm.Residue` objects.
:param sequence ranges: A list of :class:`ihm.AsymUnitRange`,
:class:`ihm.AsymUnit`, :class:`ihm.EntityRange`,
:class:`ihm.Residue`, and/or :class:`ihm.Entity` objects.
:param str details: Additional text describing this feature.
"""
# Type is 'residue' if each range selects a single residue, otherwise
# it is 'residue range'
def __get_type(self):
for r in self.ranges:
if r.seq_id_range[0] != r.seq_id_range[1]:
return 'residue range'
return 'residue'
type = property(__get_type)
def __init__(self, ranges, details=None):
self.ranges, self.details = ranges, details
_ = self._get_entity_type()
def _signature(self):
return tuple(self.ranges)
def _all_entities_or_asyms(self):
return self.ranges
def _get_entity_type(self):
def _get_entity(x):
if isinstance(x, ihm.Entity):
return x
return x.entity if x.entity else x.asym.entity
if any(not _get_entity(r).is_polymeric() for r in self.ranges):
raise ValueError("%s cannot select non-polymeric entities" % self)
else:
return _get_entity(self.ranges[0]).type if self.ranges else None
class AtomFeature(Feature):
"""Selection of one or more atoms from the system.
Atoms can be selected from polymers or non-polymers (but not both).
Atoms can also be selected from both :class:`ihm.AsymUnit` and
:class:`ihm.Entity` (the latter implies that it selects atoms
in all instances of that entity).
For selecting an entire polymer or residue(s),
see :class:`ResidueFeature`. For selecting an entire non-polymer,
see :class:`NonPolyFeature`.
:param sequence atoms: A list of :class:`ihm.Atom` objects.
:param str details: Additional text describing this feature.
"""
type = 'atom'
def __init__(self, atoms, details=None):
self.atoms, self.details = atoms, details
_ = self._get_entity_type()
def _get_entity_type(self):
def _get_entity(residue):
return residue.entity if residue.entity else residue.asym.entity
types = frozenset(_get_entity(a.residue).type for a in self.atoms)
if len(types) > 1:
raise ValueError("%s cannot span both polymeric and "
"non-polymeric entities" % self)
elif types:
return tuple(types)[0]
class NonPolyFeature(Feature):
"""Selection of one or more non-polymers from the system.
To select individual atoms from a non-polymer, see :class:`AtomFeature`.
Features can include both :class:`ihm.AsymUnit` and
:class:`ihm.Entity` (the latter implies that it selects non-polymers
in all instances of that entity).
:param sequence objs: A list of :class:`ihm.AsymUnit` and/or
:class:`ihm.Entity` objects.
:param str details: Additional text describing this feature.
"""
type = 'ligand'
def __init__(self, objs, details=None):
self.objs, self.details = objs, details
_ = self._get_entity_type()
def _all_entities_or_asyms(self):
return self.objs
def _get_entity_type(self):
def _get_entity(x):
return x if isinstance(x, ihm.Entity) else x.entity
if any(_get_entity(r).is_polymeric() for r in self.objs):
raise ValueError(
"%s can only select non-polymeric entities" % self)
else:
return _get_entity(self.objs[0]).type if self.objs else None
class PseudoSiteFeature(Feature):
"""Selection of a pseudo position in the system.
:param site: The pseudo site to use for the feature.
:type site: :class:`PseudoSite`
"""
type = 'pseudo site'
def __init__(self, site):
self.site = site
def _get_entity_type(self):
return 'other'
def _signature(self):
return self.site._signature()
class GeometricRestraint(Restraint):
"""A restraint between part of the system and some part of a
geometric object. See :class:`CenterGeometricRestraint`,
:class:`InnerSurfaceGeometricRestraint`,
:class:`OuterSurfaceGeometricRestraint`.
:param dataset: Reference to the data from which the restraint is
derived.
:type dataset: :class:`~ihm.dataset.Dataset`
:param geometric_object: The geometric object to restrain against.
:type geometric_object: :class:`ihm.geometry.GeometricObject`
:param feature: The part of the system to restrain.
:type feature: :class:`Feature`
:param distance: Restraint on the distance.
:type distance: :class:`DistanceRestraint`
:param float harmonic_force_constant: Force constant, if applicable.
:param bool restrain_all: If True, all distances are restrained.
"""
object_characteristic = 'other'
assembly = None # no struct_assembly_id for geometric restraints
def _get_report(self):
return ("Distance (%s) to %s"
% (self.distance.restraint_type, self.geometric_object.type))
def __init__(self, dataset, geometric_object, feature, distance,
harmonic_force_constant=None, restrain_all=None,
pseudo1=None, pseudo2=None):
self.dataset = dataset
self.geometric_object, self.feature = geometric_object, feature
self.distance, self.restrain_all = distance, restrain_all
self.harmonic_force_constant = harmonic_force_constant
_all_features = property(lambda self: (self.feature,))
class CenterGeometricRestraint(GeometricRestraint):
"""A restraint between part of the system and the center of a
geometric object. See :class:`GeometricRestraint` for a description
of the parameters.
"""
object_characteristic = 'center'
class InnerSurfaceGeometricRestraint(GeometricRestraint):
"""A restraint between part of the system and the inner surface of a
geometric object. See :class:`GeometricRestraint` for a description
of the parameters.
"""
object_characteristic = 'inner surface'
class OuterSurfaceGeometricRestraint(GeometricRestraint):
"""A restraint between part of the system and the outer surface of a
geometric object. See :class:`GeometricRestraint` for a description
of the parameters.
"""
object_characteristic = 'outer surface'
class DerivedDistanceRestraint(Restraint):
"""A restraint between two parts of the system, derived from experimental
data.
:param dataset: Reference to the data from which the restraint is
derived.
:type dataset: :class:`~ihm.dataset.Dataset`
:param feature1: The first part of the system to restrain.
:type feature1: :class:`Feature`
:param feature2: The second part of the system to restrain.
:type feature2: :class:`Feature`
:param distance: Restraint on the distance.
:type distance: :class:`DistanceRestraint`
:param float probability: Likelihood that restraint is correct (0. - 1.)
:param bool restrain_all: If True, all distances are restrained.
:param float mic_value: Value of the Maximal Information Coefficient
(MIC) for this interaction, if applicable.
"""
assembly = None # no struct_assembly_id for derived distance restraints
def __init__(self, dataset, feature1, feature2, distance,
probability=None, restrain_all=None, mic_value=None):
self.dataset = dataset
self.feature1, self.feature2 = feature1, feature2
self.distance, self.restrain_all = distance, restrain_all
self.probability = probability
self.mic_value = mic_value
_all_features = property(lambda self: (self.feature1, self.feature2))
class PredictedContactRestraint(Restraint):
"""A predicted contact between two parts of the system, derived from
various computational tools.
:param dataset: Reference to the data from which the restraint is
derived.
:type dataset: :class:`~ihm.dataset.Dataset`
:param resatom1: The first residue or atom to restrain.
:type resatom1: :class:`ihm.Residue` or :class:`ihm.Atom`
:param resatom2: The second residue or atom to restrain.
:type resatom2: :class:`ihm.Residue` or :class:`ihm.Atom`
:param distance: Restraint on the distance.
:type distance: :class:`DistanceRestraint`
:param bool by_residue: If True, the restraint is applied to specific
residues; otherwise, it is applied to the closest primitive
object with the highest resolution.
:param float probability: Likelihood that restraint is correct (0. - 1.)
:param software: The software used to generate the contact.
:type software: :class:`~ihm.Software`
"""
assembly = None # no struct_assembly_id for predicted contact restraints
def __init__(self, dataset, resatom1, resatom2, distance,
by_residue, probability=None, software=None):
self.dataset = dataset
self.resatom1, self.resatom2 = resatom1, resatom2
self.distance, self.by_residue = distance, by_residue
self.probability, self.software = probability, software
class HDXRestraint(Restraint):
"""A restraint derived from Hydrogen-Deuterium Exchange experiments.
:param dataset: Reference to the data from which the restraint is
derived.
:type dataset: :class:`~ihm.dataset.Dataset`
:param feature: The part of the system to restrain.
:type feature: :class:`Feature`
:param float protection_factor: Unitless scaling factor.
:param str details: Additional details regarding the restraint.
"""
assembly = None # no struct_assembly_id for HDX restraints
def __init__(self, dataset, feature, protection_factor=None, details=None):
self.dataset, self.feature = dataset, feature
self.protection_factor = protection_factor
self.details = details
_all_features = property(lambda self: (self.feature,))
python-ihm-2.7/ihm/source.py 0000664 0000000 0000000 00000003277 15035733372 0016126 0 ustar 00root root 0000000 0000000 """Classes for describing the source of an entity.
"""
class Source:
"""Base class to describe the source of an :class:`ihm.Entity`.
See :class:`Manipulated`, :class:`Natural` and :class:`Synthetic`.
"""
src_method = None
class Details:
"""Identifying information for an entity source.
See :class:`Manipulated`, :class:`Natural` or :class:`Synthetic`.
:param ncbi_taxonomy_id: NCBI taxonomy identifier, e.g. "469008"
:param scientific_name: Scientific name, e.g. "Escherichia coli"
:param common_name: Common name
:param strain: Strain, e.g. "BL21(DE3)PLYSS"
"""
def __init__(self, ncbi_taxonomy_id=None, scientific_name=None,
common_name=None, strain=None):
self.ncbi_taxonomy_id = ncbi_taxonomy_id
self.scientific_name = scientific_name
self.common_name = common_name
self.strain = strain
class Manipulated(Source):
"""An entity isolated from a genetically manipulated source.
See :class:`ihm.Entity`.
:param gene: Details about the gene source.
:type gene: :class:`Details`
:param host: Details about the host organism.
:type host: :class:`Details`
"""
src_method = 'man'
def __init__(self, gene=None, host=None):
self.gene, self.host = gene, host
class Natural(Source, Details):
"""An entity isolated from a natural source. See :class:`ihm.Entity`.
See :class:`Details` for a description of the parameters."""
src_method = 'nat'
class Synthetic(Source, Details):
"""An entity obtained synthetically. See :class:`ihm.Entity`.
See :class:`Details` for a description of the parameters."""
src_method = 'syn'
python-ihm-2.7/ihm/startmodel.py 0000664 0000000 0000000 00000020442 15035733372 0016775 0 ustar 00root root 0000000 0000000 """Classes to handle starting models."""
import enum
class SequenceIdentityDenominator(enum.IntEnum):
"""The denominator used while calculating the sequence identity.
One of these constants can be passed to :class:`SequenceIdentity`."""
#: Length of the shorter sequence
SHORTER_LENGTH = 1
#: Number of aligned positions (including gaps)
NUM_ALIGNED_WITH_GAPS = 2
#: Number of aligned residue pairs (not including the gaps)
NUM_ALIGNED_WITHOUT_GAPS = 3
#: Arithmetic mean sequence length
MEAN_LENGTH = 4
#: Another method not covered here
OTHER = 5
class SequenceIdentity:
"""Describe the identity between template and target sequences.
See :class:`Template`.
:param value: Percentage sequence identity.
:param denominator: Way in which sequence identity was calculated -
see :class:`SequenceIdentityDenominator`.
"""
def __init__(self, value,
denominator=SequenceIdentityDenominator.SHORTER_LENGTH):
self.value = value
self.denominator = denominator
class Template:
"""A PDB file used as a comparative modeling template for part of a
starting model.
See :class:`StartingModel`.
:param dataset: Pointer to where this template is stored.
:type dataset: :class:`~ihm.dataset.Dataset`
:param str asym_id: The author-provided asymmetric unit (chain) to use
from the template dataset (not necessarily the same as the
starting model's asym_id or the ID of the asym_unit in the
final IHM model).
:param tuple seq_id_range: The sequence range in the dataset that
is modeled by this template. Note that this numbering may differ
from the IHM numbering. See `offset` in :class:`StartingModel`.
:param tuple template_seq_id_range: The sequence range of the template
that is used in comparative modeling.
:param sequence_identity: Sequence identity between template and
the target sequence.
:type sequence_identity: :class:`SequenceIdentity` or `float`
:param alignment_file: Reference to the external file containing the
template-target alignment.
:type alignment_file: :class:`~ihm.location.Location`
"""
# todo: handle sequence_identity_denominator as an enum, not int
def __init__(self, dataset, asym_id, seq_id_range, template_seq_id_range,
sequence_identity, alignment_file=None):
self.dataset, self.asym_id = dataset, asym_id
self.seq_id_range = seq_id_range
self.template_seq_id_range = template_seq_id_range
if isinstance(sequence_identity, float):
sequence_identity = SequenceIdentity(sequence_identity)
self.sequence_identity = sequence_identity
self.alignment_file = alignment_file
class StartingModel:
"""A starting guess for modeling of an asymmetric unit
See :class:`ihm.representation.Segment` and
:attr:`ihm.System.orphan_starting_models`.
:param asym_unit: The asymmetric unit (or part of one) this starting
model represents.
:type asym_unit: :class:`~ihm.AsymUnit` or :class:`~ihm.AsymUnitRange`
:param dataset: Pointer to where this model is stored.
:type dataset: :class:`~ihm.dataset.Dataset`
:param str asym_id: The asymmetric unit (chain) to use from the starting
model's dataset (not necessarily the same as the ID of the
asym_unit in the final model).
:param list templates: A list of :class:`Template` objects, if this is
a comparative model.
:param int offset: Offset between the residue numbering in the dataset
and the IHM model (the offset is added to the starting model
numbering to give the IHM model numbering).
:param list metadata: List of PDB metadata, such as HELIX records.
:param software: The software used to generate the starting model.
:type software: :class:`~ihm.Software`
:param script_file: Reference to the external file containing the
script used to generate the starting model (usually a
:class:`~ihm.location.WorkflowFileLocation`).
:type script_file: :class:`~ihm.location.Location`
:param str description: Additional text describing the starting model.
"""
def __init__(self, asym_unit, dataset, asym_id, templates=None, offset=0,
metadata=None, software=None, script_file=None,
description=None):
self.templates = templates if templates is not None else []
self.metadata = metadata if metadata is not None else []
self.asym_unit = asym_unit
self.dataset, self.asym_id, self.offset = dataset, asym_id, offset
self.software, self.script_file = software, script_file
self.description = description
self._atoms = []
self._seq_difs = []
def get_atoms(self):
"""Yield :class:`~ihm.model.Atom` objects that represent this
starting model. This allows the starting model coordinates to
be embedded in the mmCIF file, which is useful if the starting
model is not available elsewhere (or it has been modified).
The default implementation returns an internal list of atoms;
it is usually necessary to subclass and override this method.
See :meth:`ihm.model.Model.get_spheres` for more details.
Note that the returned atoms should be those used in modeling,
not those stored in the file. In particular, the numbering scheme
should be that used in the IHM model (add `offset` to the dataset
numbering). If any residues were changed (for example it is common
to mutate MSE in the dataset to MET in the modeling) the final
mutated name should be used (MET in this case) and
:meth:`get_seq_dif` overridden to note the change.
"""
return self._atoms
def get_seq_dif(self):
"""Yield :class:`SeqDif` objects for any sequence changes between
the dataset and the starting model. See :meth:`get_atoms`.
The default implementation returns an internal list of objects;
it is usually necessary to subclass and override this method.
Note that this is always called *after* :meth:`get_atoms`.
"""
return self._seq_difs
def add_atom(self, atom):
"""Add to the model's set of :class:`~ihm.model.Atom` objects.
See :meth:`get_atoms` for more details.
"""
self._atoms.append(atom)
def add_seq_dif(self, seq_dif):
"""Add to the model's set of :class:`SeqDif` objects.
See :meth:`get_atoms` for more details.
"""
self._seq_difs.append(seq_dif)
class PDBHelix:
"""Represent a HELIX record from a PDB file."""
def __init__(self, line):
self.helix_id = line[11:14].strip()
self.start_resnam = line[14:18].strip()
self.start_asym = line[19]
self.start_resnum = int(line[21:25])
self.end_resnam = line[27:30].strip()
self.end_asym = line[31]
self.end_resnum = int(line[33:37])
self.helix_class = int(line[38:40])
self.length = int(line[71:76])
class SeqDif:
"""Annotate a sequence difference between a dataset and starting model.
See :meth:`StartingModel.get_seq_dif` and :class:`MSESeqDif`.
:param int db_seq_id: The residue index in the dataset.
:param int seq_id: The residue index in the starting model. This should
normally be `db_seq_id + offset`.
:param str db_comp_id: The name of the residue in the dataset.
:param str details: Descriptive text for the sequence difference.
"""
def __init__(self, db_seq_id, seq_id, db_comp_id, details=None):
self.db_seq_id, self.seq_id = db_seq_id, seq_id
self.db_comp_id, self.details = db_comp_id, details
class MSESeqDif:
"""Denote that a residue was mutated from MSE to MET.
See :class:`SeqDif` for a description of the parameters.
"""
def __init__(self, db_seq_id, seq_id,
details="Conversion of modified residue MSE to MET"):
self.db_seq_id, self.seq_id = db_seq_id, seq_id
self.db_comp_id, self.details = 'MSE', details
python-ihm-2.7/ihm/test.py 0000664 0000000 0000000 00000002153 15035733372 0015575 0 ustar 00root root 0000000 0000000 import ihm
import ihm.dumper
import ihm.reader
import os
import unittest
try:
import msgpack
except ImportError:
msgpack = None
class Tests(unittest.TestCase):
def test_basic(self):
"""Basic install test"""
system = ihm.System(title='test system')
entity_a = ihm.Entity('AAA', description='Subunit A')
entity_b = ihm.Entity('AAAAAA', description='Subunit B')
system.entities.extend((entity_a, entity_b))
with open('output.cif', 'w') as fh:
ihm.dumper.write(fh, [system])
with open('output.cif') as fh:
sys2, = ihm.reader.read(fh)
self.assertEqual(sys2.title, 'test system')
os.unlink('output.cif')
# Also test with BinaryCIF
if msgpack:
with open('output.bcif', 'wb') as fh:
ihm.dumper.write(fh, [system], format='BCIF')
with open('output.bcif', 'rb') as fh:
sys2, = ihm.reader.read(fh, format='BCIF')
self.assertEqual(sys2.title, 'test system')
os.unlink('output.bcif')
if __name__ == '__main__':
unittest.main()
python-ihm-2.7/ihm/util/ 0000775 0000000 0000000 00000000000 15035733372 0015220 5 ustar 00root root 0000000 0000000 python-ihm-2.7/ihm/util/__init__.py 0000664 0000000 0000000 00000013526 15035733372 0017340 0 ustar 00root root 0000000 0000000 """Utility classes"""
import string
import os
import ihm
import datetime
class _AsymIDs:
"""Map indices to multi-character asym (chain) IDs.
We label the first 26 chains A-Z, then we move to two-letter
chain IDs: AA through AZ, then BA through BZ, through to ZZ.
This continues with longer chain IDs."""
def __getitem__(self, ind):
chars = string.ascii_uppercase
lc = len(chars)
ids = []
while ind >= lc:
ids.append(chars[ind % lc])
ind = ind // lc - 1
ids.append(chars[ind])
return "".join(reversed(ids))
def _remove_id(obj, attr='_id'):
"""Remove any unique ID from obj"""
if hasattr(obj, attr):
delattr(obj, attr)
def _assign_id(obj, seen_objs, obj_by_id, attr='_id', seen_obj=None,
by_id_obj=None):
"""Assign a unique ID to obj, and track all ids in obj_by_id."""
if seen_obj is None:
seen_obj = obj
if by_id_obj is None:
by_id_obj = obj
if seen_obj not in seen_objs:
if not hasattr(obj, attr):
obj_by_id.append(by_id_obj)
setattr(obj, attr, len(obj_by_id))
seen_objs[seen_obj] = getattr(obj, attr)
else:
setattr(obj, attr, seen_objs[seen_obj])
def _get_relative_path(reference, path):
"""Return `path` interpreted relative to `reference`"""
if os.path.isabs(path):
return path
else:
return os.path.join(os.path.dirname(reference), path)
def _text_choice_property(attr, choices, doc=None):
"""Like `property` but requires that the value be one of the set choices"""
schoices = frozenset(choices)
def getfunc(obj):
return getattr(obj, "_" + attr)
def setfunc(obj, val):
if val is not None and val is not ihm.unknown and val not in schoices:
raise ValueError(
"Invalid choice %s for %s; valid values are %s, "
"None, ihm.unknown"
% (repr(val), attr, ", ".join(repr(x) for x in choices)))
setattr(obj, "_" + attr, val)
return property(getfunc, setfunc, doc=doc)
def _check_residue_range(seq_id_range, entity):
"""Make sure that a residue range is not out of range of its Entity"""
if not entity or not entity._range_check:
return
if seq_id_range[1] < seq_id_range[0]:
raise ValueError("Range %d-%d is invalid; end is before start"
% seq_id_range)
if (seq_id_range[1] > len(entity.sequence)
or seq_id_range[0] < 1):
raise IndexError("Range %d-%d out of range for %s (1-%d)"
% (seq_id_range[0], seq_id_range[1],
entity, len(entity.sequence)))
def _check_residue(r):
"""Make sure that a residue is not out of range of its Entity"""
if not r.entity or not r.entity._range_check:
return
if r.seq_id > len(r.entity.sequence) or r.seq_id < 1:
raise IndexError("Residue %d out of range for %s (1-%d)"
% (r.seq_id, r.entity, len(r.entity.sequence)))
def _check_transform(t):
if t.rot_matrix in (None, ihm.unknown):
raise ValueError("Transformation %s is missing rotation" % t)
if t.tr_vector in (None, ihm.unknown):
raise ValueError("Transformation %s is missing translation" % t)
def _invert_ranges(ranges, end, start=1):
"""Given a sorted list of non-overlapping ranges, yield a new list which
contains every range in the range start-end which was not in the
original list. For example, if end=4,
[(2, 3)] -> [(1, 1), (4, 4)]"""
for r in ranges:
if r[0] > start:
yield (start, r[0] - 1)
start = r[1] + 1
if end >= start:
yield (start, end)
def _pred_ranges(ranges, end):
"""Given a sorted list of non-overlapping ranges, yield a new list which
covers the range 1-end. Each element in the new list contains a new
third bool member which is True iff the element was in the original
list. For example, if end=4,
[(2, 3)] -> [(1, 1, False), (2, 3, True), (4, 4, False)]"""
start = 1
for r in ranges:
if r[0] > start:
yield (start, r[0] - 1, False)
yield (r[0], r[1], True)
start = r[1] + 1
if end >= start:
yield (start, end, False)
def _combine_ranges(ranges):
"""Sort the input ranges and remove any overlaps; yield the result.
For example, [(8, 10), (1, 2), (3, 4)] -> [(1, 4), (8, 10)]"""
ranges = sorted(ranges)
if not ranges:
return
current = ranges[0]
for r in ranges[1:]:
if current[1] + 1 >= r[0]:
current = (current[0], max(r[1], current[1]))
else:
yield current
current = r
yield current
def _make_range_from_list(rr):
"""Yield a list of ranges given a sorted list of values.
For example, [1, 2, 5, 6] -> [[1, 2], [5, 6]]"""
if not rr:
return
current = [rr[0], rr[0]]
for r in rr[1:]:
if current[1] + 1 == r:
current[1] = r
else:
yield current
current = [r, r]
yield current
def _get_codes(codestr):
"""Convert a one-letter-code string into a sequence of individual
codes"""
if codestr is None or codestr is ihm.unknown:
return
i = 0
while i < len(codestr):
# Strip out linebreaks
if codestr[i] == '\n':
pass
elif codestr[i] == '(':
end = codestr.index(')', i)
yield codestr[i + 1:end]
i = end
else:
yield codestr[i]
i += 1
def _get_iso_date(iso_date_str):
"""Get a datetime.date obj for a string in isoformat."""
if not iso_date_str:
return iso_date_str
return datetime.date(int(iso_date_str[0:4]),
int(iso_date_str[5:7]),
int(iso_date_str[8:10]))
python-ihm-2.7/ihm/util/make_mmcif.py 0000664 0000000 0000000 00000047354 15035733372 0017677 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python3
"""
Add minimal IHM-related tables to an mmCIF file.
Given any mmCIF file as input, this script will add any missing
IHM-related tables and write out a new file that is minimally compliant
with the IHM dictionary.
This is done by simply reading in the original file with python-ihm and
then writing it out again, so
a) any data in the input file that is not understood by python-ihm
will be lost on output; and
b) input files that aren't compliant with the PDBx dictionary, or that
contain syntax errors or other problems, may crash or otherwise confuse
python-ihm.
The --add option can also be used to combine multiple input mmCIF files into
one. This is typically used when the mmCIF files contain models with
differing composition. Only model (coordinate) information is combined, not
other IHM information such as starting models or restraints.
"""
import ihm.reader
import ihm.dumper
import ihm.model
import ihm.protocol
import ihm.util
import ihm.format
import urllib.request
import os
import argparse
import collections
import operator
import warnings
# All canonical atom names for each standard residue type, as per CCD.
# This is generated using the util/get_ccd_atoms.py script.
KNOWN_ATOM_NAMES = {
'A': {"C4'", "C2'", 'C2', "C1'", 'N7', 'H62', 'OP2', 'N3', 'C5', 'P',
"H5''", 'H2', "C5'", 'H61', "H3'", 'C4', 'N1', 'H8', "H1'", 'C8',
'N9', 'HOP3', 'OP1', "O4'", "H2'", "HO2'", 'OP3', "O3'", 'N6',
'HOP2', "O5'", "O2'", "HO3'", "H5'", "C3'", 'C6', "H4'"},
'ALA': {'H2', 'HB1', 'HB3', 'HB2', 'N', 'HXT', 'O', 'CB', 'C', 'HA', 'CA',
'H', 'OXT'},
'ARG': {'HB2', 'CG', 'NE', 'H', 'H2', 'HH22', 'N', 'HG2', 'CA', 'NH2',
'HH11', 'HG3', 'HH21', 'CZ', 'HB3', 'HXT', 'O', 'C', 'HD3', 'HH12',
'CB', 'NH1', 'CD', 'HA', 'HD2', 'HE', 'OXT'},
'ASN': {'H2', 'HB3', 'HD22', 'HB2', 'N', 'CG', 'O', 'CB', 'ND2', 'HXT',
'C', 'HA', 'HD21', 'CA', 'OD1', 'H', 'OXT'},
'ASP': {'H2', 'HB3', 'HB2', 'N', 'CG', 'O', 'CB', 'HXT', 'C', 'HA', 'OD2',
'CA', 'OD1', 'HD2', 'H', 'OXT'},
'C': {"C4'", "C2'", 'C2', 'O2', 'H42', 'H5', "C1'", 'OP2', 'N3', 'C5',
'P', "H5''", 'H41', 'H6', "C5'", "H3'", 'C4', 'N1', 'N4', "H1'",
'HOP3', 'OP1', "O4'", "H2'", "HO2'", 'OP3', "O3'", 'HOP2', "O5'",
"O2'", "HO3'", "H5'", "C3'", 'C6', "H4'"},
'CYS': {'H2', 'HB3', 'HB2', 'N', 'SG', 'O', 'CB', 'HXT', 'C', 'HA', 'HG',
'CA', 'H', 'OXT'},
'DA': {"C4'", "C2'", 'C2', "C1'", 'N7', 'H62', 'OP2', 'N3', 'C5', 'P',
"H5''", 'H2', "C5'", 'H61', "H3'", 'C4', 'N1', 'H8', "H1'", 'C8',
'N9', 'HOP3', 'OP1', "O4'", "H2'", 'OP3', "O3'", 'N6', 'HOP2',
"O5'", "H2''", "HO3'", "H5'", "C3'", 'C6', "H4'"},
'DC': {"C4'", "C2'", 'C2', 'O2', 'H42', 'H5', "C1'", 'OP2', 'N3', 'C5',
'P', "H5''", 'H41', 'H6', "C5'", "H3'", 'C4', 'N1', 'N4', "H1'",
'HOP3', 'OP1', "O4'", "H2'", 'OP3', "O3'", 'HOP2', "O5'", "H2''",
"HO3'", "H5'", "C3'", 'C6', "H4'"},
'DG': {"C4'", "C2'", 'C2', "C1'", 'N7', 'OP2', 'N3', 'C5', 'P', "H5''",
"C5'", 'O6', 'H1', "H3'", 'C4', 'N1', 'H8', "H1'", 'C8', 'N9',
'HOP3', 'OP1', "O4'", "H2'", 'OP3', "O3'", 'HOP2', "O5'", "H2''",
'H21', 'H22', "HO3'", "H5'", "C3'", 'N2', 'C6', "H4'"},
'DT': {"C4'", "C2'", 'C2', 'O2', 'O4', "C1'", 'OP2', 'N3', 'C5', 'P',
"H5''", 'H6', "C5'", "H3'", 'C4', 'N1', 'C7', "H1'", 'H73', 'HOP3',
'H3', 'OP1', "O4'", "H2'", 'OP3', "O3'", 'HOP2', "O5'", "H2''",
'H71', "HO3'", "H5'", "C3'", 'H72', 'C6', "H4'"},
'G': {"C4'", "C2'", 'C2', "C1'", 'N7', 'OP2', 'N3', 'C5', 'P', "H5''",
"C5'", 'O6', 'H1', "H3'", 'C4', 'N1', 'H8', "H1'", 'C8', 'N9',
'HOP3', 'OP1', "O4'", "H2'", "HO2'", 'OP3', "O3'", 'HOP2', "O5'",
"O2'", 'H21', 'H22', "HO3'", "H5'", "C3'", 'N2', 'C6', "H4'"},
'GLN': {'HB2', 'CG', 'H', 'H2', 'N', 'HG2', 'HE22', 'CA', 'HG3', 'HE21',
'HB3', 'HXT', 'O', 'NE2', 'C', 'OE1', 'CB', 'CD', 'HA', 'OXT'},
'GLU': {'HB2', 'CG', 'H', 'H2', 'N', 'HG2', 'CA', 'HG3', 'HB3', 'HXT',
'O', 'HE2', 'C', 'OE2', 'OE1', 'CB', 'CD', 'HA', 'OXT'},
'GLY': {'HA3', 'HXT', 'CA', 'O', 'HA2', 'H', 'N', 'C', 'H2', 'OXT'},
'HIS': {'HB2', 'CG', 'CE1', 'HE1', 'H', 'ND1', 'H2', 'N', 'CA', 'HD1',
'HB3', 'HXT', 'O', 'HE2', 'NE2', 'C', 'CD2', 'CB', 'HA', 'HD2',
'OXT'},
'ILE': {'HD11', 'CG1', 'H', 'HD12', 'H2', 'N', 'CA', 'HD13', 'HG13',
'HXT', 'O', 'HB', 'C', 'CD1', 'HG23', 'HG22', 'HG21', 'HG12',
'CB', 'CG2', 'HA', 'OXT'},
'LEU': {'HD11', 'HB2', 'HD22', 'CG', 'HD21', 'H', 'HD12', 'H2', 'N',
'HD23', 'CA', 'HD13', 'HB3', 'HXT', 'O', 'C', 'CD2', 'CD1', 'CB',
'HA', 'HG', 'OXT'},
'LYS': {'HB2', 'CG', 'CE', 'H', 'H2', 'N', 'HG2', 'HE3', 'CA', 'HG3',
'HB3', 'HXT', 'O', 'HE2', 'HZ1', 'HZ3', 'C', 'HD3', 'CB', 'CD',
'HA', 'HZ2', 'HD2', 'NZ', 'OXT'},
'MET': {'HB2', 'CG', 'HE1', 'CE', 'H', 'H2', 'N', 'HG2', 'HE3', 'CA',
'HG3', 'SD', 'HB3', 'HXT', 'O', 'HE2', 'C', 'CB', 'HA', 'OXT'},
'PHE': {'HB2', 'CG', 'CE1', 'HE1', 'H', 'H2', 'N', 'HZ', 'CA', 'HD1',
'CZ', 'HB3', 'HXT', 'O', 'HE2', 'C', 'CD2', 'CD1', 'CB', 'CE2',
'HA', 'HD2', 'OXT'},
'PRO': {'HB3', 'HB2', 'N', 'CG', 'O', 'CB', 'HG2', 'HXT', 'CD', 'C', 'HA',
'CA', 'HD2', 'H', 'HG3', 'HD3', 'OXT'},
'SER': {'H2', 'HB3', 'HB2', 'N', 'HXT', 'O', 'CB', 'C', 'HA', 'HG', 'CA',
'H', 'OG', 'OXT'},
'THR': {'H2', 'HXT', 'N', 'HG23', 'O', 'CB', 'CG2', 'OG1', 'HB', 'C',
'HA', 'CA', 'HG22', 'H', 'HG1', 'HG21', 'OXT'},
'TRP': {'HB2', 'CG', 'CE3', 'CZ3', 'HE1', 'H', 'H2', 'N', 'HE3', 'CA',
'CZ2', 'HD1', 'HB3', 'HXT', 'O', 'HZ3', 'C', 'CD2', 'CD1', 'NE1',
'CB', 'HH2', 'CE2', 'HA', 'CH2', 'HZ2', 'OXT'},
'U': {"C4'", "C2'", 'C2', 'O2', 'H5', 'O4', "C1'", 'OP2', 'N3', 'C5', 'P',
"H5''", 'H6', "C5'", "H3'", 'C4', 'N1', "H1'", 'HOP3', 'H3', 'OP1',
"O4'", "H2'", "HO2'", 'OP3', "O3'", 'HOP2', "O5'", "O2'", "HO3'",
"H5'", "C3'", 'C6', "H4'"},
'VAL': {'CG1', 'H', 'H2', 'N', 'CA', 'HG13', 'HXT', 'O', 'HB', 'C',
'HG23', 'HG22', 'HG21', 'HG12', 'CB', 'CG2', 'HA', 'OXT', 'HG11'}
}
def add_ihm_info(s, fix_histidines, check_atom_names):
# Non-standard histidine names (protonation states)
histidines = frozenset(('HIP', 'HID', 'HIE'))
if not s.title:
s.title = 'Auto-generated system'
# Simple default assembly containing all chains
default_assembly = ihm.Assembly(s.asym_units, name='Modeled assembly')
# Simple default atomic representation for everything
default_representation = ihm.representation.Representation(
[ihm.representation.AtomicSegment(asym, rigid=False)
for asym in s.asym_units])
# Simple default modeling protocol
default_protocol = ihm.protocol.Protocol(name='modeling')
for state_group in s.state_groups:
for state in state_group:
for model_group in state:
for model in model_group:
if not model.assembly:
model.assembly = default_assembly
if not model.representation:
model.representation = default_representation
if not model.protocol:
model.protocol = default_protocol
model.not_modeled_residue_ranges.extend(
_get_not_modeled_residues(model))
if fix_histidines:
_fix_histidine_het_atoms(model, histidines)
if check_atom_names != 'no':
_check_atom_names(model, check_atom_names == 'all')
if fix_histidines:
_fix_histidine_chem_comps(s, histidines)
return s
def _fix_histidine_het_atoms(model, histidines):
"""Fix any non-standard histidine atoms in atom_site that are marked
HETATM to instead use ATOM"""
for atom in model._atoms:
if atom.seq_id is None or not atom.het:
continue
comp = atom.asym_unit.sequence[atom.seq_id - 1]
if comp.id in histidines:
atom.het = False
class _ChemCompAtomHandler:
not_in_file = omitted = unknown = None
def __init__(self):
super().__init__()
self.atoms = collections.defaultdict(set)
def __call__(self, comp_id, atom_id):
self.atoms[comp_id].add(atom_id)
def _get_non_std_restyp(restyp):
"""Return CCD info for the given residue type"""
url_top = 'https://files.rcsb.org'
url_pattern = url_top + '/pub/pdb/refdata/chem_comp/%s/%s/%s.cif'
url = url_pattern % (restyp[-1], restyp, restyp)
cca = _ChemCompAtomHandler()
try:
with urllib.request.urlopen(url) as fh:
c = ihm.format.CifReader(fh,
category_handler={'_chem_comp_atom': cca})
c.read_file()
except urllib.error.URLError as exc:
warnings.warn(
"Component %s could not be found in CCD: %s" % (restyp, exc))
return cca.atoms
def _get_non_canon(seen_atom_names, check_all):
"""Get all non-canonical atom names for each residue type"""
for restyp, atoms in seen_atom_names.items():
if check_all and restyp not in KNOWN_ATOM_NAMES:
KNOWN_ATOM_NAMES.update(_get_non_std_restyp(restyp))
if restyp in KNOWN_ATOM_NAMES:
non_canon_atoms = atoms - KNOWN_ATOM_NAMES[restyp]
if non_canon_atoms:
yield restyp, non_canon_atoms
def _check_atom_names(model, check_all):
"""Check that only standard atom names are used for known
residue types"""
seen_atom_names = collections.defaultdict(set)
for atom in model._atoms:
seq_id = 1 if atom.seq_id is None else atom.seq_id
comp = atom.asym_unit.sequence[seq_id - 1]
seen_atom_names[comp.id].add(atom.atom_id)
non_canon = sorted(_get_non_canon(seen_atom_names, check_all),
key=operator.itemgetter(0))
if non_canon:
raise ValueError(
"Non-canonical atom names found in the following residues: "
+ "; ".join("%s: %r" % (restyp, sorted(atoms))
for (restyp, atoms) in non_canon))
def _fix_histidine_chem_comps(s, histidines):
"""Change any non-standard histidine chemical components to normal HIS"""
his = ihm.LPeptideAlphabet()['H']
for e in s.entities:
for c in e.sequence:
if c.id in histidines:
# Change the ChemComp to HIS in place, as there may be
# references to this ChemComp elsewhere. Duplicate HIS
# components will be combined into one at output time.
c.id = his.id
c.code = his.code
c.code_canonical = his.code_canonical
c.name = his.name
c.formula = his.formula
c.__class__ = his.__class__
def _get_not_modeled_residues(model):
"""Yield NotModeledResidueRange objects for all residue ranges in the
Model that are not referenced by Atom, Sphere, or pre-existing
NotModeledResidueRange objects"""
for assem in model.assembly:
asym = assem.asym if hasattr(assem, 'asym') else assem
if not asym.entity.is_polymeric():
continue
# Make a set of all residue indices of this asym "handled" either
# by being modeled (with Atom or Sphere objects) or by being
# explicitly marked as not-modeled
handled_residues = set()
for rr in model.not_modeled_residue_ranges:
if rr.asym_unit is asym:
for seq_id in range(rr.seq_id_begin, rr.seq_id_end + 1):
handled_residues.add(seq_id)
for atom in model._atoms:
if atom.asym_unit is asym:
handled_residues.add(atom.seq_id)
for sphere in model._spheres:
if sphere.asym_unit is asym:
for seq_id in range(sphere.seq_id_range[0],
sphere.seq_id_range[1] + 1):
handled_residues.add(seq_id)
# Convert set to a list of residue ranges
handled_residues = ihm.util._make_range_from_list(
sorted(handled_residues))
# Return not-modeled for each non-handled range
for r in ihm.util._invert_ranges(handled_residues,
end=assem.seq_id_range[1],
start=assem.seq_id_range[0]):
yield ihm.model.NotModeledResidueRange(asym, r[0], r[1])
def add_ihm_info_one_system(fname, fix_histidines, check_atom_names):
"""Read mmCIF file `fname`, which must contain a single System, and
return it with any missing IHM data added."""
with open(fname) as fh:
systems = ihm.reader.read(fh)
if len(systems) != 1:
raise ValueError("mmCIF file %s must contain exactly 1 data block "
"(%d found)" % (fname, len(systems)))
return add_ihm_info(systems[0], fix_histidines, check_atom_names)
def combine(s, other_s):
"""Add models from the System `other_s` into the System `s`.
After running this function, `s` will contain all Models from both
systems. The models are added to new StateGroup(s) in `s`.
Note that this function also modifies `other_s` in place, so that
System should no longer be used after calling this function."""
# First map all Entity and AsymUnit objects in `other_s` to equivalent
# objects in `s`
entity_map = combine_entities(s, other_s)
asym_map = combine_asyms(s, other_s, entity_map)
# Now handle the Models themselves
combine_atoms(s, other_s, asym_map)
def combine_entities(s, other_s):
"""Add `other_s` entities into `s`. Returns a dict that maps Entities
in `other_s` to equivalent objects in `s`."""
entity_map = {}
sequences = dict((e.sequence, e) for e in s.entities)
for e in other_s.entities:
if e.sequence in sequences:
# If the `other_s` Entity already exists in `s`, map to it
entity_map[e] = sequences[e.sequence]
else:
# Otherwise, add the `other_s` Entity to `s`
s.entities.append(e)
entity_map[e] = e
return entity_map
def combine_asyms(s, other_s, entity_map):
"""Add `other_s` asyms into `s`. Returns a dict that maps AsymUnits
in `other_s` to equivalent objects in `s`."""
asym_map = {}
# Collect author-provided information for existing asyms. For polymers,
# we use the author-provided chain ID; for non-polymers, we also use
# the author-provided residue number of the first (only) residue
poly_asyms = dict(((a.entity, a.strand_id), a)
for a in s.asym_units if a.entity.is_polymeric())
nonpoly_asyms = dict(((a.entity, a.strand_id, a.auth_seq_id_map[1]), a)
for a in s.asym_units
if a.entity.type == 'non-polymer')
def map_asym(asym, orig_asym):
if orig_asym:
# If an equivalent asym already exists, use it (and its asym_id)
asym_map[asym] = orig_asym
else:
# Otherwise, add a new asym
asym_map[asym] = asym
asym.id = None # Assign new ID
s.asym_units.append(asym)
for asym in other_s.asym_units:
# Point to Entity in `s`, not `other_s`
asym.entity = entity_map[asym.entity]
# For polymers and non-polymers, if an asym in `other_s` has the
# same author-provided information and entity_id as an asym in `s`,
# reuse the asym_id
if asym.entity.is_polymeric():
map_asym(asym, poly_asyms.get((asym.entity, asym.strand_id)))
elif asym.entity.type == 'non-polymer':
map_asym(asym, nonpoly_asyms.get((asym.entity, asym.strand_id,
asym.auth_seq_id_map[1])))
else:
# For waters and branched entities, always assign a new asym_id
asym_map[asym] = asym
asym.id = None # Assign new ID
s.asym_units.append(asym)
return asym_map
def combine_atoms(s, other_s, asym_map):
"""Add `other_s` atoms into `s`"""
seen_asmb = set()
seen_rep = set()
for state_group in other_s.state_groups:
for state in state_group:
for model_group in state:
for model in model_group:
# Assembly, Representation and Atom and Sphere objects
# all reference `other_s` asyms. We must map these to
# asyms in `s`.
asmb = model.assembly
if id(asmb) not in seen_asmb:
seen_asmb.add(id(asmb))
# todo: also handle AsymUnitRange
asmb[:] = [asym_map[asym] for asym in asmb]
rep = model.representation
if id(rep) not in seen_rep:
seen_rep.add(id(rep))
for seg in rep:
seg.asym_unit = asym_map[seg.asym_unit]
for atom in model._atoms:
atom.asym_unit = asym_map[atom.asym_unit]
for sphere in model._spheres:
sphere.asym_unit = asym_map[sphere.asym_unit]
# Add all models as new state groups
s.state_groups.extend(other_s.state_groups)
def get_args():
p = argparse.ArgumentParser(
description="Add minimal IHM-related tables to an mmCIF file.")
p.add_argument("input", metavar="input.cif", help="input mmCIF file name")
p.add_argument("output", metavar="output.cif",
help="output mmCIF file name",
default="output.cif", nargs="?")
p.add_argument("--add", "-a", action='append', metavar="add.cif",
help="also add model information from the named mmCIF "
"file to the output file")
p.add_argument("--histidines", action='store_true', dest="fix_histidines",
help="Convert any non-standard histidine names (HIP, HID, "
"HIE, for different protonation states) to HIS")
p.add_argument('--check_atom_names', choices=['no', 'standard', 'all'],
dest="check_atom_names", default='no',
help="If 'standard', check for non-canonical atom names "
"in standard amino acid and nucleic acid chemical "
"components; if 'all', also check non-standard "
"residue types by querying CCD (needs network access)")
return p.parse_args()
def main():
args = get_args()
if (os.path.exists(args.input) and os.path.exists(args.output)
and os.path.samefile(args.input, args.output)):
raise ValueError("Input and output are the same file")
if args.add:
s = add_ihm_info_one_system(args.input, args.fix_histidines,
args.check_atom_names)
for other in args.add:
other_s = add_ihm_info_one_system(other, args.fix_histidines,
args.check_atom_names)
combine(s, other_s)
with open(args.output, 'w') as fhout:
ihm.dumper.write(
fhout, [s],
variant=ihm.dumper.IgnoreVariant(['_audit_conform']))
else:
with open(args.input) as fh:
with open(args.output, 'w') as fhout:
ihm.dumper.write(
fhout, [add_ihm_info(s, args.fix_histidines,
args.check_atom_names)
for s in ihm.reader.read(fh)],
variant=ihm.dumper.IgnoreVariant(['_audit_conform']))
if __name__ == '__main__':
main()
python-ihm-2.7/make-release.sh 0000775 0000000 0000000 00000003173 15035733372 0016364 0 ustar 00root root 0000000 0000000 #!/bin/bash -e
# First, do
# - Check spelling with
# codespell . --skip '*.cif' -L assertIn
# - Update AuditConformDumper to match latest IHM dictionary if necessary
# - Run util/validate-outputs.py to make sure all example outputs validate
# (cd util; PYTHONPATH=.. python3 validate-outputs.py)
# - Run util/check-db-entries.py to check against some real archive structures
# (cd util; PYTHONPATH=.. python3 check-db-entries.py)
# - Make sure all python-modelcif tests work using new IHM version
# - Make sure the self-test script in each package (Homebrew, conda) works
# - Update ChangeLog.rst, util/debian/changelog, and util/python-ihm.spec
# with the release number and date
# - Update release number in ihm/__init__.py, MANIFEST.in, and setup.py
# - Commit, tag, and push
# - Make release on GitHub
# - Upload the release tarball from
# https://github.com/ihmwg/python-ihm/releases to Zenodo as a new release
# - Make sure there are no extraneous .py files (setup.py will include them
# in the pypi package)
# Make SWIG wrapper so users don't need SWIG
rm -rf build src/ihm_format_wrap.c
python3 setup.py build_ext --inplace
VERSION=$(python3 setup.py --version)
mv src/ihm_format_wrap.c "src/ihm_format_wrap_${VERSION}.c"
python3 setup.py sdist
rm -f "src/ihm_format_wrap_${VERSION}.c"
echo "Now use 'twine upload dist/ihm-${VERSION}.tar.gz' to publish the release on PyPi."
echo "Then, update the conda-forge, COPR, PPA, and Homebrew packages to match."
echo "For COPR, use dist/ihm-${VERSION}.tar.gz together with util/python-ihm.spec"
echo "For the PPA, use the GitHub release tarball, renamed to python-ihm_${VERSION}.orig.tar.gz"
python-ihm-2.7/setup.py 0000775 0000000 0000000 00000003716 15035733372 0015212 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
try:
from setuptools import setup, Extension
except ImportError:
from distutils.core import setup, Extension
import sys
import os
VERSION = "2.7"
copy_args = sys.argv[1:]
# Allow building without the C extension
build_ext = True
if '--without-ext' in copy_args:
build_ext = False
copy_args.remove('--without-ext')
if sys.platform == 'win32':
# Our use of strdup, strerror should be safe - no need for the Windows
# compiler to warn about it; we want to use the POSIX name for strdup too
cargs = ['-D_CRT_SECURE_NO_WARNINGS', '-D_CRT_NONSTDC_NO_WARNINGS']
else:
cargs = []
if build_ext:
# Use pre-built SWIG wrappers for stable releases so that end users
# don't need SWIG installed
wrap = "src/ihm_format_wrap_%s.c" % VERSION
if not os.path.exists(wrap):
wrap = "src/ihm_format.i"
mod = [Extension("ihm._format",
sources=["src/ihm_format.c", "src/cmp.c", wrap],
include_dirs=['src'],
extra_compile_args=cargs,
swig_opts=['-keyword', '-nodefaultctor',
'-nodefaultdtor', '-noproxy'],
optional=True)]
else:
mod = []
with open("README.md", "r") as fh:
long_description = fh.read()
setup(name='ihm',
version=VERSION,
script_args=copy_args,
description='Package for handling IHM mmCIF and BinaryCIF files',
long_description=long_description,
long_description_content_type="text/markdown",
author='Ben Webb',
author_email='ben@salilab.org',
url='https://github.com/ihmwg/python-ihm',
ext_modules=mod,
packages=['ihm', 'ihm.util'],
install_requires=['msgpack'],
license='MIT',
classifiers=[
"Programming Language :: Python :: 3",
"Operating System :: OS Independent",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering",
])
python-ihm-2.7/src/ 0000775 0000000 0000000 00000000000 15035733372 0014255 5 ustar 00root root 0000000 0000000 python-ihm-2.7/src/.gitignore 0000664 0000000 0000000 00000000033 15035733372 0016241 0 ustar 00root root 0000000 0000000 *.o
test
ihm_format_wrap.c
python-ihm-2.7/src/cmp.c 0000664 0000000 0000000 00000244235 15035733372 0015212 0 ustar 00root root 0000000 0000000 /*
The MIT License (MIT)
Copyright (c) 2020 Charles Gunyon
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "cmp.h"
static const uint32_t cmp_version_ = 20;
static const uint32_t cmp_mp_version_ = 5;
enum {
POSITIVE_FIXNUM_MARKER = 0x00,
FIXMAP_MARKER = 0x80,
FIXARRAY_MARKER = 0x90,
FIXSTR_MARKER = 0xA0,
NIL_MARKER = 0xC0,
FALSE_MARKER = 0xC2,
TRUE_MARKER = 0xC3,
BIN8_MARKER = 0xC4,
BIN16_MARKER = 0xC5,
BIN32_MARKER = 0xC6,
EXT8_MARKER = 0xC7,
EXT16_MARKER = 0xC8,
EXT32_MARKER = 0xC9,
FLOAT_MARKER = 0xCA,
DOUBLE_MARKER = 0xCB,
U8_MARKER = 0xCC,
U16_MARKER = 0xCD,
U32_MARKER = 0xCE,
U64_MARKER = 0xCF,
S8_MARKER = 0xD0,
S16_MARKER = 0xD1,
S32_MARKER = 0xD2,
S64_MARKER = 0xD3,
FIXEXT1_MARKER = 0xD4,
FIXEXT2_MARKER = 0xD5,
FIXEXT4_MARKER = 0xD6,
FIXEXT8_MARKER = 0xD7,
FIXEXT16_MARKER = 0xD8,
STR8_MARKER = 0xD9,
STR16_MARKER = 0xDA,
STR32_MARKER = 0xDB,
ARRAY16_MARKER = 0xDC,
ARRAY32_MARKER = 0xDD,
MAP16_MARKER = 0xDE,
MAP32_MARKER = 0xDF,
NEGATIVE_FIXNUM_MARKER = 0xE0
};
enum {
FIXARRAY_SIZE = 0xF,
FIXMAP_SIZE = 0xF,
FIXSTR_SIZE = 0x1F
};
typedef enum cmp_error_e {
CMP_ERROR_NONE,
CMP_ERROR_STR_DATA_LENGTH_TOO_LONG,
CMP_ERROR_BIN_DATA_LENGTH_TOO_LONG,
CMP_ERROR_ARRAY_LENGTH_TOO_LONG,
CMP_ERROR_MAP_LENGTH_TOO_LONG,
CMP_ERROR_INPUT_VALUE_TOO_LARGE,
CMP_ERROR_FIXED_VALUE_WRITING,
CMP_ERROR_TYPE_MARKER_READING,
CMP_ERROR_TYPE_MARKER_WRITING,
CMP_ERROR_DATA_READING,
CMP_ERROR_DATA_WRITING,
CMP_ERROR_EXT_TYPE_READING,
CMP_ERROR_EXT_TYPE_WRITING,
CMP_ERROR_INVALID_TYPE,
CMP_ERROR_LENGTH_READING,
CMP_ERROR_LENGTH_WRITING,
CMP_ERROR_SKIP_DEPTH_LIMIT_EXCEEDED,
CMP_ERROR_INTERNAL,
CMP_ERROR_DISABLED_FLOATING_POINT,
CMP_ERROR_MAX
} cmp_error_t;
static const char *cmp_error_message(cmp_error_t error) {
switch (error) {
case CMP_ERROR_NONE: return "No Error";
case CMP_ERROR_STR_DATA_LENGTH_TOO_LONG: return "Specified string data length is too long (> 0xFFFFFFFF)";
case CMP_ERROR_BIN_DATA_LENGTH_TOO_LONG: return "Specified binary data length is too long (> 0xFFFFFFFF)";
case CMP_ERROR_ARRAY_LENGTH_TOO_LONG: return "Specified array length is too long (> 0xFFFFFFFF)";
case CMP_ERROR_MAP_LENGTH_TOO_LONG: return "Specified map length is too long (> 0xFFFFFFFF)";
case CMP_ERROR_INPUT_VALUE_TOO_LARGE: return "Input value is too large";
case CMP_ERROR_FIXED_VALUE_WRITING: return "Error writing fixed value";
case CMP_ERROR_TYPE_MARKER_READING: return "Error reading type marker";
case CMP_ERROR_TYPE_MARKER_WRITING: return "Error writing type marker";
case CMP_ERROR_DATA_READING: return "Error reading packed data";
case CMP_ERROR_DATA_WRITING: return "Error writing packed data";
case CMP_ERROR_EXT_TYPE_READING: return "Error reading ext type";
case CMP_ERROR_EXT_TYPE_WRITING: return "Error writing ext type";
case CMP_ERROR_INVALID_TYPE: return "Invalid type";
case CMP_ERROR_LENGTH_READING: return "Error reading size";
case CMP_ERROR_LENGTH_WRITING: return "Error writing size";
case CMP_ERROR_SKIP_DEPTH_LIMIT_EXCEEDED: return "Depth limit exceeded while skipping";
case CMP_ERROR_INTERNAL: return "Internal error";
case CMP_ERROR_DISABLED_FLOATING_POINT: return "Floating point operations disabled";
case CMP_ERROR_MAX: return "Max Error";
}
return "";
}
static bool is_bigendian(void) {
#ifdef WORDS_BIGENDIAN
return WORDS_BIGENDIAN;
#else
const int32_t one = 1;
const char *one_bytes = (const char *)&one;
return *one_bytes == 0;
#endif
}
static uint16_t be16(uint16_t x) {
if (!is_bigendian())
return ((x >> 8) & 0x00ff)
| ((x << 8) & 0xff00);
return x;
}
static int16_t sbe16(int16_t x) {
return (int16_t)be16((uint16_t)x);
}
static uint32_t be32(uint32_t x) {
if (!is_bigendian())
return ((uint32_t)be16((uint16_t)(x >> 16)))
| ((uint32_t)be16((uint16_t)(x & 0xffff)) << 16);
return x;
}
static int32_t sbe32(int32_t x) {
return (int32_t)be32((uint32_t)x);
}
static uint64_t be64(uint64_t x) {
if (!is_bigendian())
return ((uint64_t)be32((uint32_t)(x >> 32)))
| ((uint64_t)be32((uint32_t)(x & 0xffffffff)) << 32);
return x;
}
static int64_t sbe64(int64_t x) {
return (int64_t)be64((uint64_t)x);
}
#ifndef CMP_NO_FLOAT
static float decode_befloat(const char *b) {
float f = 0.;
char *fb = (char *)&f;
if (!is_bigendian()) {
fb[0] = b[3];
fb[1] = b[2];
fb[2] = b[1];
fb[3] = b[0];
}
else {
fb[0] = b[0];
fb[1] = b[1];
fb[2] = b[2];
fb[3] = b[3];
}
return f;
}
static double decode_bedouble(const char *b) {
double d = 0.;
char *db = (char *)&d;
if (!is_bigendian()) {
db[0] = b[7];
db[1] = b[6];
db[2] = b[5];
db[3] = b[4];
db[4] = b[3];
db[5] = b[2];
db[6] = b[1];
db[7] = b[0];
}
else {
db[0] = b[0];
db[1] = b[1];
db[2] = b[2];
db[3] = b[3];
db[4] = b[4];
db[5] = b[5];
db[6] = b[6];
db[7] = b[7];
}
return d;
}
#endif /* CMP_NO_FLOAT */
static bool read_byte(cmp_ctx_t *ctx, uint8_t *x) {
return ctx->read(ctx, x, sizeof(uint8_t));
}
static bool write_byte(cmp_ctx_t *ctx, uint8_t x) {
return ctx->write(ctx, &x, sizeof(uint8_t)) == sizeof(uint8_t);
}
static bool skip_bytes(cmp_ctx_t *ctx, size_t count) {
if (ctx->skip) {
return ctx->skip(ctx, count);
}
else {
size_t i;
for (i = 0; i < count; ++i) {
uint8_t floor;
if (!ctx->read(ctx, &floor, sizeof(uint8_t))) {
return false;
}
}
return true;
}
}
static bool read_type_marker(cmp_ctx_t *ctx, uint8_t *marker) {
if (read_byte(ctx, marker)) {
return true;
}
ctx->error = CMP_ERROR_TYPE_MARKER_READING;
return false;
}
static bool write_type_marker(cmp_ctx_t *ctx, uint8_t marker) {
if (write_byte(ctx, marker))
return true;
ctx->error = CMP_ERROR_TYPE_MARKER_WRITING;
return false;
}
static bool write_fixed_value(cmp_ctx_t *ctx, uint8_t value) {
if (write_byte(ctx, value))
return true;
ctx->error = CMP_ERROR_FIXED_VALUE_WRITING;
return false;
}
static bool type_marker_to_cmp_type(uint8_t type_marker, uint8_t *cmp_type) {
if (type_marker <= 0x7F) {
*cmp_type = CMP_TYPE_POSITIVE_FIXNUM;
return true;
}
if (type_marker <= 0x8F) {
*cmp_type = CMP_TYPE_FIXMAP;
return true;
}
if (type_marker <= 0x9F) {
*cmp_type = CMP_TYPE_FIXARRAY;
return true;
}
if (type_marker <= 0xBF) {
*cmp_type = CMP_TYPE_FIXSTR;
return true;
}
if (type_marker >= 0xE0) {
*cmp_type = CMP_TYPE_NEGATIVE_FIXNUM;
return true;
}
switch (type_marker) {
case NIL_MARKER:
*cmp_type = CMP_TYPE_NIL;
return true;
case FALSE_MARKER:
*cmp_type = CMP_TYPE_BOOLEAN;
return true;
case TRUE_MARKER:
*cmp_type = CMP_TYPE_BOOLEAN;
return true;
case BIN8_MARKER:
*cmp_type = CMP_TYPE_BIN8;
return true;
case BIN16_MARKER:
*cmp_type = CMP_TYPE_BIN16;
return true;
case BIN32_MARKER:
*cmp_type = CMP_TYPE_BIN32;
return true;
case EXT8_MARKER:
*cmp_type = CMP_TYPE_EXT8;
return true;
case EXT16_MARKER:
*cmp_type = CMP_TYPE_EXT16;
return true;
case EXT32_MARKER:
*cmp_type = CMP_TYPE_EXT32;
return true;
case FLOAT_MARKER:
*cmp_type = CMP_TYPE_FLOAT;
return true;
case DOUBLE_MARKER:
*cmp_type = CMP_TYPE_DOUBLE;
return true;
case U8_MARKER:
*cmp_type = CMP_TYPE_UINT8;
return true;
case U16_MARKER:
*cmp_type = CMP_TYPE_UINT16;
return true;
case U32_MARKER:
*cmp_type = CMP_TYPE_UINT32;
return true;
case U64_MARKER:
*cmp_type = CMP_TYPE_UINT64;
return true;
case S8_MARKER:
*cmp_type = CMP_TYPE_SINT8;
return true;
case S16_MARKER:
*cmp_type = CMP_TYPE_SINT16;
return true;
case S32_MARKER:
*cmp_type = CMP_TYPE_SINT32;
return true;
case S64_MARKER:
*cmp_type = CMP_TYPE_SINT64;
return true;
case FIXEXT1_MARKER:
*cmp_type = CMP_TYPE_FIXEXT1;
return true;
case FIXEXT2_MARKER:
*cmp_type = CMP_TYPE_FIXEXT2;
return true;
case FIXEXT4_MARKER:
*cmp_type = CMP_TYPE_FIXEXT4;
return true;
case FIXEXT8_MARKER:
*cmp_type = CMP_TYPE_FIXEXT8;
return true;
case FIXEXT16_MARKER:
*cmp_type = CMP_TYPE_FIXEXT16;
return true;
case STR8_MARKER:
*cmp_type = CMP_TYPE_STR8;
return true;
case STR16_MARKER:
*cmp_type = CMP_TYPE_STR16;
return true;
case STR32_MARKER:
*cmp_type = CMP_TYPE_STR32;
return true;
case ARRAY16_MARKER:
*cmp_type = CMP_TYPE_ARRAY16;
return true;
case ARRAY32_MARKER:
*cmp_type = CMP_TYPE_ARRAY32;
return true;
case MAP16_MARKER:
*cmp_type = CMP_TYPE_MAP16;
return true;
case MAP32_MARKER:
*cmp_type = CMP_TYPE_MAP32;
return true;
default:
return false;
}
}
static bool read_type_size(cmp_ctx_t *ctx, uint8_t type_marker,
uint8_t cmp_type,
uint32_t *size) {
uint8_t u8temp = 0;
uint16_t u16temp = 0;
uint32_t u32temp = 0;
switch (cmp_type) {
case CMP_TYPE_POSITIVE_FIXNUM:
*size = 0;
return true;
case CMP_TYPE_FIXMAP:
*size = (type_marker & FIXMAP_SIZE);
return true;
case CMP_TYPE_FIXARRAY:
*size = (type_marker & FIXARRAY_SIZE);
return true;
case CMP_TYPE_FIXSTR:
*size = (type_marker & FIXSTR_SIZE);
return true;
case CMP_TYPE_NIL:
*size = 0;
return true;
case CMP_TYPE_BOOLEAN:
*size = 0;
return true;
case CMP_TYPE_BIN8:
if (!ctx->read(ctx, &u8temp, sizeof(uint8_t))) {
ctx->error = CMP_ERROR_LENGTH_READING;
return false;
}
*size = u8temp;
return true;
case CMP_TYPE_BIN16:
if (!ctx->read(ctx, &u16temp, sizeof(uint16_t))) {
ctx->error = CMP_ERROR_LENGTH_READING;
return false;
}
*size = be16(u16temp);
return true;
case CMP_TYPE_BIN32:
if (!ctx->read(ctx, &u32temp, sizeof(uint32_t))) {
ctx->error = CMP_ERROR_LENGTH_READING;
return false;
}
*size = be32(u32temp);
return true;
case CMP_TYPE_EXT8:
if (!ctx->read(ctx, &u8temp, sizeof(uint8_t))) {
ctx->error = CMP_ERROR_LENGTH_READING;
return false;
}
*size = u8temp;
return true;
case CMP_TYPE_EXT16:
if (!ctx->read(ctx, &u16temp, sizeof(uint16_t))) {
ctx->error = CMP_ERROR_LENGTH_READING;
return false;
}
*size = be16(u16temp);
return true;
case CMP_TYPE_EXT32:
if (!ctx->read(ctx, &u32temp, sizeof(uint32_t))) {
ctx->error = CMP_ERROR_LENGTH_READING;
return false;
}
*size = be32(u32temp);
return true;
case CMP_TYPE_FLOAT:
*size = 4;
return true;
case CMP_TYPE_DOUBLE:
*size = 8;
return true;
case CMP_TYPE_UINT8:
*size = 1;
return true;
case CMP_TYPE_UINT16:
*size = 2;
return true;
case CMP_TYPE_UINT32:
*size = 4;
return true;
case CMP_TYPE_UINT64:
*size = 8;
return true;
case CMP_TYPE_SINT8:
*size = 1;
return true;
case CMP_TYPE_SINT16:
*size = 2;
return true;
case CMP_TYPE_SINT32:
*size = 4;
return true;
case CMP_TYPE_SINT64:
*size = 8;
return true;
case CMP_TYPE_FIXEXT1:
*size = 1;
return true;
case CMP_TYPE_FIXEXT2:
*size = 2;
return true;
case CMP_TYPE_FIXEXT4:
*size = 4;
return true;
case CMP_TYPE_FIXEXT8:
*size = 8;
return true;
case CMP_TYPE_FIXEXT16:
*size = 16;
return true;
case CMP_TYPE_STR8:
if (!ctx->read(ctx, &u8temp, sizeof(uint8_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
*size = u8temp;
return true;
case CMP_TYPE_STR16:
if (!ctx->read(ctx, &u16temp, sizeof(uint16_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
*size = be16(u16temp);
return true;
case CMP_TYPE_STR32:
if (!ctx->read(ctx, &u32temp, sizeof(uint32_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
*size = be32(u32temp);
return true;
case CMP_TYPE_ARRAY16:
if (!ctx->read(ctx, &u16temp, sizeof(uint16_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
*size = be16(u16temp);
return true;
case CMP_TYPE_ARRAY32:
if (!ctx->read(ctx, &u32temp, sizeof(uint32_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
*size = be32(u32temp);
return true;
case CMP_TYPE_MAP16:
if (!ctx->read(ctx, &u16temp, sizeof(uint16_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
*size = be16(u16temp);
return true;
case CMP_TYPE_MAP32:
if (!ctx->read(ctx, &u32temp, sizeof(uint32_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
*size = be32(u32temp);
return true;
case CMP_TYPE_NEGATIVE_FIXNUM:
*size = 0;
return true;
default:
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
}
static bool read_obj_data(cmp_ctx_t *ctx, uint8_t type_marker,
cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
obj->as.u8 = type_marker;
return true;
case CMP_TYPE_NEGATIVE_FIXNUM:
obj->as.s8 = (int8_t)type_marker;
return true;
case CMP_TYPE_NIL:
obj->as.u8 = 0;
return true;
case CMP_TYPE_BOOLEAN:
switch (type_marker) {
case TRUE_MARKER:
obj->as.boolean = true;
return true;
case FALSE_MARKER:
obj->as.boolean = false;
return true;
default:
break;
}
ctx->error = CMP_ERROR_INTERNAL;
return false;
case CMP_TYPE_UINT8:
if (!ctx->read(ctx, &obj->as.u8, sizeof(uint8_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
return true;
case CMP_TYPE_UINT16:
if (!ctx->read(ctx, &obj->as.u16, sizeof(uint16_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
obj->as.u16 = be16(obj->as.u16);
return true;
case CMP_TYPE_UINT32:
if (!ctx->read(ctx, &obj->as.u32, sizeof(uint32_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
obj->as.u32 = be32(obj->as.u32);
return true;
case CMP_TYPE_UINT64:
if (!ctx->read(ctx, &obj->as.u64, sizeof(uint64_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
obj->as.u64 = be64(obj->as.u64);
return true;
case CMP_TYPE_SINT8:
if (!ctx->read(ctx, &obj->as.s8, sizeof(int8_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
return true;
case CMP_TYPE_SINT16:
if (!ctx->read(ctx, &obj->as.s16, sizeof(int16_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
obj->as.s16 = sbe16(obj->as.s16);
return true;
case CMP_TYPE_SINT32:
if (!ctx->read(ctx, &obj->as.s32, sizeof(int32_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
obj->as.s32 = sbe32(obj->as.s32);
return true;
case CMP_TYPE_SINT64:
if (!ctx->read(ctx, &obj->as.s64, sizeof(int64_t))) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
obj->as.s64 = sbe64(obj->as.s64);
return true;
case CMP_TYPE_FLOAT:
{
#ifndef CMP_NO_FLOAT
char bytes[4];
if (!ctx->read(ctx, bytes, 4)) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
obj->as.flt = decode_befloat(bytes);
return true;
#else /* CMP_NO_FLOAT */
ctx->error = CMP_ERROR_DISABLED_FLOATING_POINT;
return false;
#endif /* CMP_NO_FLOAT */
}
case CMP_TYPE_DOUBLE:
{
#ifndef CMP_NO_FLOAT
char bytes[8];
if (!ctx->read(ctx, bytes, 8)) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
obj->as.dbl = decode_bedouble(bytes);
return true;
#else /* CMP_NO_FLOAT */
ctx->error = CMP_ERROR_DISABLED_FLOATING_POINT;
return false;
#endif /* CMP_NO_FLOAT */
}
case CMP_TYPE_BIN8:
case CMP_TYPE_BIN16:
case CMP_TYPE_BIN32:
return read_type_size(ctx, type_marker, obj->type, &obj->as.bin_size);
case CMP_TYPE_FIXSTR:
case CMP_TYPE_STR8:
case CMP_TYPE_STR16:
case CMP_TYPE_STR32:
return read_type_size(ctx, type_marker, obj->type, &obj->as.str_size);
case CMP_TYPE_FIXARRAY:
case CMP_TYPE_ARRAY16:
case CMP_TYPE_ARRAY32:
return read_type_size(ctx, type_marker, obj->type, &obj->as.array_size);
case CMP_TYPE_FIXMAP:
case CMP_TYPE_MAP16:
case CMP_TYPE_MAP32:
return read_type_size(ctx, type_marker, obj->type, &obj->as.map_size);
case CMP_TYPE_FIXEXT1:
if (!ctx->read(ctx, &obj->as.ext.type, sizeof(int8_t))) {
ctx->error = CMP_ERROR_EXT_TYPE_READING;
return false;
}
obj->as.ext.size = 1;
return true;
case CMP_TYPE_FIXEXT2:
if (!ctx->read(ctx, &obj->as.ext.type, sizeof(int8_t))) {
ctx->error = CMP_ERROR_EXT_TYPE_READING;
return false;
}
obj->as.ext.size = 2;
return true;
case CMP_TYPE_FIXEXT4:
if (!ctx->read(ctx, &obj->as.ext.type, sizeof(int8_t))) {
ctx->error = CMP_ERROR_EXT_TYPE_READING;
return false;
}
obj->as.ext.size = 4;
return true;
case CMP_TYPE_FIXEXT8:
if (!ctx->read(ctx, &obj->as.ext.type, sizeof(int8_t))) {
ctx->error = CMP_ERROR_EXT_TYPE_READING;
return false;
}
obj->as.ext.size = 8;
return true;
case CMP_TYPE_FIXEXT16:
if (!ctx->read(ctx, &obj->as.ext.type, sizeof(int8_t))) {
ctx->error = CMP_ERROR_EXT_TYPE_READING;
return false;
}
obj->as.ext.size = 16;
return true;
case CMP_TYPE_EXT8:
if (!read_type_size(ctx, type_marker, obj->type, &obj->as.ext.size)) {
return false;
}
if (!ctx->read(ctx, &obj->as.ext.type, sizeof(int8_t))) {
ctx->error = CMP_ERROR_EXT_TYPE_READING;
return false;
}
return true;
case CMP_TYPE_EXT16:
if (!read_type_size(ctx, type_marker, obj->type, &obj->as.ext.size)) {
return false;
}
if (!ctx->read(ctx, &obj->as.ext.type, sizeof(int8_t))) {
ctx->error = CMP_ERROR_EXT_TYPE_READING;
return false;
}
return true;
case CMP_TYPE_EXT32:
if (!read_type_size(ctx, type_marker, obj->type, &obj->as.ext.size)) {
return false;
}
if (!ctx->read(ctx, &obj->as.ext.type, sizeof(int8_t))) {
ctx->error = CMP_ERROR_EXT_TYPE_READING;
return false;
}
return true;
default:
break;
}
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
void cmp_init(cmp_ctx_t *ctx, void *buf, cmp_reader read,
cmp_skipper skip,
cmp_writer write) {
ctx->error = CMP_ERROR_NONE;
ctx->buf = buf;
ctx->read = read;
ctx->skip = skip;
ctx->write = write;
}
uint32_t cmp_version(void) {
return cmp_version_;
}
uint32_t cmp_mp_version(void) {
return cmp_mp_version_;
}
const char* cmp_strerror(const cmp_ctx_t *ctx) {
if (ctx->error > CMP_ERROR_NONE && ctx->error < CMP_ERROR_MAX)
return cmp_error_message((cmp_error_t)ctx->error);
return "";
}
bool cmp_write_pfix(cmp_ctx_t *ctx, uint8_t c) {
if (c <= 0x7F)
return write_fixed_value(ctx, c);
ctx->error = CMP_ERROR_INPUT_VALUE_TOO_LARGE;
return false;
}
bool cmp_write_nfix(cmp_ctx_t *ctx, int8_t c) {
if (c >= -0x20 && c <= -1)
return write_fixed_value(ctx, (uint8_t)c);
ctx->error = CMP_ERROR_INPUT_VALUE_TOO_LARGE;
return false;
}
bool cmp_write_sfix(cmp_ctx_t *ctx, int8_t c) {
if (c >= 0)
return cmp_write_pfix(ctx, (uint8_t)c);
if (c >= -0x20 && c <= -1)
return cmp_write_nfix(ctx, c);
ctx->error = CMP_ERROR_INPUT_VALUE_TOO_LARGE;
return false;
}
bool cmp_write_s8(cmp_ctx_t *ctx, int8_t c) {
if (!write_type_marker(ctx, S8_MARKER))
return false;
return ctx->write(ctx, &c, sizeof(int8_t)) == sizeof(int8_t);
}
bool cmp_write_s16(cmp_ctx_t *ctx, int16_t s) {
if (!write_type_marker(ctx, S16_MARKER))
return false;
s = sbe16(s);
return ctx->write(ctx, &s, sizeof(int16_t)) == sizeof(int16_t);
}
bool cmp_write_s32(cmp_ctx_t *ctx, int32_t i) {
if (!write_type_marker(ctx, S32_MARKER))
return false;
i = sbe32(i);
return ctx->write(ctx, &i, sizeof(int32_t)) == sizeof(int32_t);
}
bool cmp_write_s64(cmp_ctx_t *ctx, int64_t l) {
if (!write_type_marker(ctx, S64_MARKER))
return false;
l = sbe64(l);
return ctx->write(ctx, &l, sizeof(int64_t)) == sizeof(int64_t);
}
bool cmp_write_integer(cmp_ctx_t *ctx, int64_t d) {
if (d >= 0)
return cmp_write_uinteger(ctx, (uint64_t)d);
if (d >= -0x20)
return cmp_write_nfix(ctx, (int8_t)d);
if (d >= -0x80)
return cmp_write_s8(ctx, (int8_t)d);
if (d >= -0x8000)
return cmp_write_s16(ctx, (int16_t)d);
if (d >= -INT64_C(0x80000000))
return cmp_write_s32(ctx, (int32_t)d);
return cmp_write_s64(ctx, d);
}
bool cmp_write_ufix(cmp_ctx_t *ctx, uint8_t c) {
return cmp_write_pfix(ctx, c);
}
bool cmp_write_u8(cmp_ctx_t *ctx, uint8_t c) {
if (!write_type_marker(ctx, U8_MARKER))
return false;
return ctx->write(ctx, &c, sizeof(uint8_t)) == sizeof(uint8_t);
}
bool cmp_write_u16(cmp_ctx_t *ctx, uint16_t s) {
if (!write_type_marker(ctx, U16_MARKER))
return false;
s = be16(s);
return ctx->write(ctx, &s, sizeof(uint16_t)) == sizeof(uint16_t);
}
bool cmp_write_u32(cmp_ctx_t *ctx, uint32_t i) {
if (!write_type_marker(ctx, U32_MARKER))
return false;
i = be32(i);
return ctx->write(ctx, &i, sizeof(uint32_t)) == sizeof(uint32_t);
}
bool cmp_write_u64(cmp_ctx_t *ctx, uint64_t l) {
if (!write_type_marker(ctx, U64_MARKER))
return false;
l = be64(l);
return ctx->write(ctx, &l, sizeof(uint64_t)) == sizeof(uint64_t);
}
bool cmp_write_uinteger(cmp_ctx_t *ctx, uint64_t u) {
if (u <= 0x7F)
return cmp_write_pfix(ctx, (uint8_t)u);
if (u <= 0xFF)
return cmp_write_u8(ctx, (uint8_t)u);
if (u <= 0xFFFF)
return cmp_write_u16(ctx, (uint16_t)u);
if (u <= 0xFFFFFFFF)
return cmp_write_u32(ctx, (uint32_t)u);
return cmp_write_u64(ctx, u);
}
#ifndef CMP_NO_FLOAT
bool cmp_write_float(cmp_ctx_t *ctx, float f) {
if (!write_type_marker(ctx, FLOAT_MARKER))
return false;
/*
* We may need to swap the float's bytes, but we can't just swap them inside
* the float because the swapped bytes may not constitute a valid float.
* Therefore, we have to create a buffer and swap the bytes there.
*/
if (!is_bigendian()) {
char swapped[sizeof(float)];
char *fbuf = (char *)&f;
size_t i;
for (i = 0; i < sizeof(float); ++i)
swapped[i] = fbuf[sizeof(float) - i - 1];
return ctx->write(ctx, swapped, sizeof(float)) == sizeof(float);
}
return ctx->write(ctx, &f, sizeof(float)) == sizeof(float);
}
bool cmp_write_double(cmp_ctx_t *ctx, double d) {
if (!write_type_marker(ctx, DOUBLE_MARKER))
return false;
/* Same deal for doubles */
if (!is_bigendian()) {
char swapped[sizeof(double)];
char *dbuf = (char *)&d;
size_t i;
for (i = 0; i < sizeof(double); ++i)
swapped[i] = dbuf[sizeof(double) - i - 1];
return ctx->write(ctx, swapped, sizeof(double)) == sizeof(double);
}
return ctx->write(ctx, &d, sizeof(double)) == sizeof(double);
}
bool cmp_write_decimal(cmp_ctx_t *ctx, double d) {
float f = (float)d;
double df = (double)f;
if (df == d)
return cmp_write_float(ctx, f);
else
return cmp_write_double(ctx, d);
}
#endif /* CMP_NO_FLOAT */
bool cmp_write_nil(cmp_ctx_t *ctx) {
return write_type_marker(ctx, NIL_MARKER);
}
bool cmp_write_true(cmp_ctx_t *ctx) {
return write_type_marker(ctx, TRUE_MARKER);
}
bool cmp_write_false(cmp_ctx_t *ctx) {
return write_type_marker(ctx, FALSE_MARKER);
}
bool cmp_write_bool(cmp_ctx_t *ctx, bool b) {
if (b)
return cmp_write_true(ctx);
return cmp_write_false(ctx);
}
bool cmp_write_u8_as_bool(cmp_ctx_t *ctx, uint8_t b) {
if (b)
return cmp_write_true(ctx);
return cmp_write_false(ctx);
}
bool cmp_write_fixstr_marker(cmp_ctx_t *ctx, uint8_t size) {
if (size <= FIXSTR_SIZE)
return write_fixed_value(ctx, FIXSTR_MARKER | size);
ctx->error = CMP_ERROR_INPUT_VALUE_TOO_LARGE;
return false;
}
bool cmp_write_fixstr(cmp_ctx_t *ctx, const char *data, uint8_t size) {
if (!cmp_write_fixstr_marker(ctx, size))
return false;
if (size == 0)
return true;
if (ctx->write(ctx, data, size) == size)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_str8_marker(cmp_ctx_t *ctx, uint8_t size) {
if (!write_type_marker(ctx, STR8_MARKER))
return false;
if (ctx->write(ctx, &size, sizeof(uint8_t)) == sizeof(uint8_t))
return true;
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
bool cmp_write_str8(cmp_ctx_t *ctx, const char *data, uint8_t size) {
if (!cmp_write_str8_marker(ctx, size))
return false;
if (size == 0)
return true;
if (ctx->write(ctx, data, size) == size)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_str16_marker(cmp_ctx_t *ctx, uint16_t size) {
if (!write_type_marker(ctx, STR16_MARKER))
return false;
size = be16(size);
if (ctx->write(ctx, &size, sizeof(uint16_t)) == sizeof(uint16_t))
return true;
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
bool cmp_write_str16(cmp_ctx_t *ctx, const char *data, uint16_t size) {
if (!cmp_write_str16_marker(ctx, size))
return false;
if (size == 0)
return true;
if (ctx->write(ctx, data, size) == size)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_str32_marker(cmp_ctx_t *ctx, uint32_t size) {
if (!write_type_marker(ctx, STR32_MARKER))
return false;
size = be32(size);
if (ctx->write(ctx, &size, sizeof(uint32_t)) == sizeof(uint32_t))
return true;
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
bool cmp_write_str32(cmp_ctx_t *ctx, const char *data, uint32_t size) {
if (!cmp_write_str32_marker(ctx, size))
return false;
if (size == 0)
return true;
if (ctx->write(ctx, data, size) == size)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_str_marker(cmp_ctx_t *ctx, uint32_t size) {
if (size <= FIXSTR_SIZE)
return cmp_write_fixstr_marker(ctx, (uint8_t)size);
if (size <= 0xFF)
return cmp_write_str8_marker(ctx, (uint8_t)size);
if (size <= 0xFFFF)
return cmp_write_str16_marker(ctx, (uint16_t)size);
return cmp_write_str32_marker(ctx, size);
}
bool cmp_write_str_marker_v4(cmp_ctx_t *ctx, uint32_t size) {
if (size <= FIXSTR_SIZE)
return cmp_write_fixstr_marker(ctx, (uint8_t)size);
if (size <= 0xFFFF)
return cmp_write_str16_marker(ctx, (uint16_t)size);
return cmp_write_str32_marker(ctx, size);
}
bool cmp_write_str(cmp_ctx_t *ctx, const char *data, uint32_t size) {
if (size <= FIXSTR_SIZE)
return cmp_write_fixstr(ctx, data, (uint8_t)size);
if (size <= 0xFF)
return cmp_write_str8(ctx, data, (uint8_t)size);
if (size <= 0xFFFF)
return cmp_write_str16(ctx, data, (uint16_t)size);
return cmp_write_str32(ctx, data, size);
}
bool cmp_write_str_v4(cmp_ctx_t *ctx, const char *data, uint32_t size) {
if (size <= FIXSTR_SIZE)
return cmp_write_fixstr(ctx, data, (uint8_t)size);
if (size <= 0xFFFF)
return cmp_write_str16(ctx, data, (uint16_t)size);
return cmp_write_str32(ctx, data, size);
}
bool cmp_write_bin8_marker(cmp_ctx_t *ctx, uint8_t size) {
if (!write_type_marker(ctx, BIN8_MARKER))
return false;
if (ctx->write(ctx, &size, sizeof(uint8_t)) == sizeof(uint8_t))
return true;
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
bool cmp_write_bin8(cmp_ctx_t *ctx, const void *data, uint8_t size) {
if (!cmp_write_bin8_marker(ctx, size))
return false;
if (size == 0)
return true;
if (ctx->write(ctx, data, size) == size)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_bin16_marker(cmp_ctx_t *ctx, uint16_t size) {
if (!write_type_marker(ctx, BIN16_MARKER))
return false;
size = be16(size);
if (ctx->write(ctx, &size, sizeof(uint16_t)) == sizeof(uint16_t))
return true;
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
bool cmp_write_bin16(cmp_ctx_t *ctx, const void *data, uint16_t size) {
if (!cmp_write_bin16_marker(ctx, size))
return false;
if (size == 0)
return true;
if (ctx->write(ctx, data, size) == size)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_bin32_marker(cmp_ctx_t *ctx, uint32_t size) {
if (!write_type_marker(ctx, BIN32_MARKER))
return false;
size = be32(size);
if (ctx->write(ctx, &size, sizeof(uint32_t)) == sizeof(uint32_t))
return true;
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
bool cmp_write_bin32(cmp_ctx_t *ctx, const void *data, uint32_t size) {
if (!cmp_write_bin32_marker(ctx, size))
return false;
if (size == 0)
return true;
if (ctx->write(ctx, data, size) == size)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_bin_marker(cmp_ctx_t *ctx, uint32_t size) {
if (size <= 0xFF)
return cmp_write_bin8_marker(ctx, (uint8_t)size);
if (size <= 0xFFFF)
return cmp_write_bin16_marker(ctx, (uint16_t)size);
return cmp_write_bin32_marker(ctx, size);
}
bool cmp_write_bin(cmp_ctx_t *ctx, const void *data, uint32_t size) {
if (size <= 0xFF)
return cmp_write_bin8(ctx, data, (uint8_t)size);
if (size <= 0xFFFF)
return cmp_write_bin16(ctx, data, (uint16_t)size);
return cmp_write_bin32(ctx, data, size);
}
bool cmp_write_fixarray(cmp_ctx_t *ctx, uint8_t size) {
if (size <= FIXARRAY_SIZE)
return write_fixed_value(ctx, FIXARRAY_MARKER | size);
ctx->error = CMP_ERROR_INPUT_VALUE_TOO_LARGE;
return false;
}
bool cmp_write_array16(cmp_ctx_t *ctx, uint16_t size) {
if (!write_type_marker(ctx, ARRAY16_MARKER))
return false;
size = be16(size);
if (ctx->write(ctx, &size, sizeof(uint16_t)) == sizeof(uint16_t))
return true;
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
bool cmp_write_array32(cmp_ctx_t *ctx, uint32_t size) {
if (!write_type_marker(ctx, ARRAY32_MARKER))
return false;
size = be32(size);
if (ctx->write(ctx, &size, sizeof(uint32_t)) == sizeof(uint32_t))
return true;
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
bool cmp_write_array(cmp_ctx_t *ctx, uint32_t size) {
if (size <= FIXARRAY_SIZE)
return cmp_write_fixarray(ctx, (uint8_t)size);
if (size <= 0xFFFF)
return cmp_write_array16(ctx, (uint16_t)size);
return cmp_write_array32(ctx, size);
}
bool cmp_write_fixmap(cmp_ctx_t *ctx, uint8_t size) {
if (size <= FIXMAP_SIZE)
return write_fixed_value(ctx, FIXMAP_MARKER | size);
ctx->error = CMP_ERROR_INPUT_VALUE_TOO_LARGE;
return false;
}
bool cmp_write_map16(cmp_ctx_t *ctx, uint16_t size) {
if (!write_type_marker(ctx, MAP16_MARKER))
return false;
size = be16(size);
if (ctx->write(ctx, &size, sizeof(uint16_t)) == sizeof(uint16_t))
return true;
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
bool cmp_write_map32(cmp_ctx_t *ctx, uint32_t size) {
if (!write_type_marker(ctx, MAP32_MARKER))
return false;
size = be32(size);
if (ctx->write(ctx, &size, sizeof(uint32_t)) == sizeof(uint32_t))
return true;
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
bool cmp_write_map(cmp_ctx_t *ctx, uint32_t size) {
if (size <= FIXMAP_SIZE)
return cmp_write_fixmap(ctx, (uint8_t)size);
if (size <= 0xFFFF)
return cmp_write_map16(ctx, (uint16_t)size);
return cmp_write_map32(ctx, size);
}
bool cmp_write_fixext1_marker(cmp_ctx_t *ctx, int8_t type) {
if (!write_type_marker(ctx, FIXEXT1_MARKER))
return false;
if (ctx->write(ctx, &type, sizeof(int8_t)) == sizeof(int8_t))
return true;
ctx->error = CMP_ERROR_EXT_TYPE_WRITING;
return false;
}
bool cmp_write_fixext1(cmp_ctx_t *ctx, int8_t type, const void *data) {
if (!cmp_write_fixext1_marker(ctx, type))
return false;
if (ctx->write(ctx, data, 1) == 1)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_fixext2_marker(cmp_ctx_t *ctx, int8_t type) {
if (!write_type_marker(ctx, FIXEXT2_MARKER))
return false;
if (ctx->write(ctx, &type, sizeof(int8_t)) == sizeof(int8_t))
return true;
ctx->error = CMP_ERROR_EXT_TYPE_WRITING;
return false;
}
bool cmp_write_fixext2(cmp_ctx_t *ctx, int8_t type, const void *data) {
if (!cmp_write_fixext2_marker(ctx, type))
return false;
if (ctx->write(ctx, data, 2) == 2)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_fixext4_marker(cmp_ctx_t *ctx, int8_t type) {
if (!write_type_marker(ctx, FIXEXT4_MARKER))
return false;
if (ctx->write(ctx, &type, sizeof(int8_t)) == sizeof(int8_t))
return true;
ctx->error = CMP_ERROR_EXT_TYPE_WRITING;
return false;
}
bool cmp_write_fixext4(cmp_ctx_t *ctx, int8_t type, const void *data) {
if (!cmp_write_fixext4_marker(ctx, type))
return false;
if (ctx->write(ctx, data, 4) == 4)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_fixext8_marker(cmp_ctx_t *ctx, int8_t type) {
if (!write_type_marker(ctx, FIXEXT8_MARKER))
return false;
if (ctx->write(ctx, &type, sizeof(int8_t)) == sizeof(int8_t))
return true;
ctx->error = CMP_ERROR_EXT_TYPE_WRITING;
return false;
}
bool cmp_write_fixext8(cmp_ctx_t *ctx, int8_t type, const void *data) {
if (!cmp_write_fixext8_marker(ctx, type))
return false;
if (ctx->write(ctx, data, 8) == 8)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_fixext16_marker(cmp_ctx_t *ctx, int8_t type) {
if (!write_type_marker(ctx, FIXEXT16_MARKER))
return false;
if (ctx->write(ctx, &type, sizeof(int8_t)) == sizeof(int8_t))
return true;
ctx->error = CMP_ERROR_EXT_TYPE_WRITING;
return false;
}
bool cmp_write_fixext16(cmp_ctx_t *ctx, int8_t type, const void *data) {
if (!cmp_write_fixext16_marker(ctx, type))
return false;
if (ctx->write(ctx, data, 16) == 16)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_ext8_marker(cmp_ctx_t *ctx, int8_t type, uint8_t size) {
if (!write_type_marker(ctx, EXT8_MARKER))
return false;
if (ctx->write(ctx, &size, sizeof(uint8_t)) != sizeof(uint8_t)) {
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
if (ctx->write(ctx, &type, sizeof(int8_t)) == sizeof(int8_t))
return true;
ctx->error = CMP_ERROR_EXT_TYPE_WRITING;
return false;
}
bool cmp_write_ext8(cmp_ctx_t *ctx, int8_t type, uint8_t size, const void *data) {
if (!cmp_write_ext8_marker(ctx, type, size))
return false;
if (ctx->write(ctx, data, size) == size)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_ext16_marker(cmp_ctx_t *ctx, int8_t type, uint16_t size) {
if (!write_type_marker(ctx, EXT16_MARKER))
return false;
size = be16(size);
if (ctx->write(ctx, &size, sizeof(uint16_t)) != sizeof(uint16_t)) {
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
if (ctx->write(ctx, &type, sizeof(int8_t)) == sizeof(int8_t))
return true;
ctx->error = CMP_ERROR_EXT_TYPE_WRITING;
return false;
}
bool cmp_write_ext16(cmp_ctx_t *ctx, int8_t type, uint16_t size, const void *data) {
if (!cmp_write_ext16_marker(ctx, type, size))
return false;
if (ctx->write(ctx, data, size) == size)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_ext32_marker(cmp_ctx_t *ctx, int8_t type, uint32_t size) {
if (!write_type_marker(ctx, EXT32_MARKER))
return false;
size = be32(size);
if (ctx->write(ctx, &size, sizeof(uint32_t)) != sizeof(uint32_t)) {
ctx->error = CMP_ERROR_LENGTH_WRITING;
return false;
}
if (ctx->write(ctx, &type, sizeof(int8_t)) == sizeof(int8_t))
return true;
ctx->error = CMP_ERROR_EXT_TYPE_WRITING;
return false;
}
bool cmp_write_ext32(cmp_ctx_t *ctx, int8_t type, uint32_t size, const void *data) {
if (!cmp_write_ext32_marker(ctx, type, size))
return false;
if (ctx->write(ctx, data, size) == size)
return true;
ctx->error = CMP_ERROR_DATA_WRITING;
return false;
}
bool cmp_write_ext_marker(cmp_ctx_t *ctx, int8_t type, uint32_t size) {
if (size == 1)
return cmp_write_fixext1_marker(ctx, type);
if (size == 2)
return cmp_write_fixext2_marker(ctx, type);
if (size == 4)
return cmp_write_fixext4_marker(ctx, type);
if (size == 8)
return cmp_write_fixext8_marker(ctx, type);
if (size == 16)
return cmp_write_fixext16_marker(ctx, type);
if (size <= 0xFF)
return cmp_write_ext8_marker(ctx, type, (uint8_t)size);
if (size <= 0xFFFF)
return cmp_write_ext16_marker(ctx, type, (uint16_t)size);
return cmp_write_ext32_marker(ctx, type, size);
}
bool cmp_write_ext(cmp_ctx_t *ctx, int8_t type, uint32_t size, const void *data) {
if (size == 1)
return cmp_write_fixext1(ctx, type, data);
if (size == 2)
return cmp_write_fixext2(ctx, type, data);
if (size == 4)
return cmp_write_fixext4(ctx, type, data);
if (size == 8)
return cmp_write_fixext8(ctx, type, data);
if (size == 16)
return cmp_write_fixext16(ctx, type, data);
if (size <= 0xFF)
return cmp_write_ext8(ctx, type, (uint8_t)size, data);
if (size <= 0xFFFF)
return cmp_write_ext16(ctx, type, (uint16_t)size, data);
return cmp_write_ext32(ctx, type, size, data);
}
bool cmp_write_object(cmp_ctx_t *ctx, const cmp_object_t *obj) {
switch(obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
return cmp_write_pfix(ctx, obj->as.u8);
case CMP_TYPE_FIXMAP:
return cmp_write_fixmap(ctx, (uint8_t)obj->as.map_size);
case CMP_TYPE_FIXARRAY:
return cmp_write_fixarray(ctx, (uint8_t)obj->as.array_size);
case CMP_TYPE_FIXSTR:
return cmp_write_fixstr_marker(ctx, (uint8_t)obj->as.str_size);
case CMP_TYPE_NIL:
return cmp_write_nil(ctx);
case CMP_TYPE_BOOLEAN:
if (obj->as.boolean)
return cmp_write_true(ctx);
return cmp_write_false(ctx);
case CMP_TYPE_BIN8:
return cmp_write_bin8_marker(ctx, (uint8_t)obj->as.bin_size);
case CMP_TYPE_BIN16:
return cmp_write_bin16_marker(ctx, (uint16_t)obj->as.bin_size);
case CMP_TYPE_BIN32:
return cmp_write_bin32_marker(ctx, obj->as.bin_size);
case CMP_TYPE_EXT8:
return cmp_write_ext8_marker(
ctx, obj->as.ext.type, (uint8_t)obj->as.ext.size
);
case CMP_TYPE_EXT16:
return cmp_write_ext16_marker(
ctx, obj->as.ext.type, (uint16_t)obj->as.ext.size
);
case CMP_TYPE_EXT32:
return cmp_write_ext32_marker(ctx, obj->as.ext.type, obj->as.ext.size);
case CMP_TYPE_FLOAT:
#ifndef CMP_NO_FLOAT
return cmp_write_float(ctx, obj->as.flt);
#else /* CMP_NO_FLOAT */
ctx->error = CMP_ERROR_DISABLED_FLOATING_POINT;
return false;
#endif /* CMP_NO_FLOAT */
case CMP_TYPE_DOUBLE:
#ifndef CMP_NO_FLOAT
return cmp_write_double(ctx, obj->as.dbl);
#else /* CMP_NO_FLOAT */
ctx->error = CMP_ERROR_DISABLED_FLOATING_POINT;
return false;
#endif
case CMP_TYPE_UINT8:
return cmp_write_u8(ctx, obj->as.u8);
case CMP_TYPE_UINT16:
return cmp_write_u16(ctx, obj->as.u16);
case CMP_TYPE_UINT32:
return cmp_write_u32(ctx, obj->as.u32);
case CMP_TYPE_UINT64:
return cmp_write_u64(ctx, obj->as.u64);
case CMP_TYPE_SINT8:
return cmp_write_s8(ctx, obj->as.s8);
case CMP_TYPE_SINT16:
return cmp_write_s16(ctx, obj->as.s16);
case CMP_TYPE_SINT32:
return cmp_write_s32(ctx, obj->as.s32);
case CMP_TYPE_SINT64:
return cmp_write_s64(ctx, obj->as.s64);
case CMP_TYPE_FIXEXT1:
return cmp_write_fixext1_marker(ctx, obj->as.ext.type);
case CMP_TYPE_FIXEXT2:
return cmp_write_fixext2_marker(ctx, obj->as.ext.type);
case CMP_TYPE_FIXEXT4:
return cmp_write_fixext4_marker(ctx, obj->as.ext.type);
case CMP_TYPE_FIXEXT8:
return cmp_write_fixext8_marker(ctx, obj->as.ext.type);
case CMP_TYPE_FIXEXT16:
return cmp_write_fixext16_marker(ctx, obj->as.ext.type);
case CMP_TYPE_STR8:
return cmp_write_str8_marker(ctx, (uint8_t)obj->as.str_size);
case CMP_TYPE_STR16:
return cmp_write_str16_marker(ctx, (uint16_t)obj->as.str_size);
case CMP_TYPE_STR32:
return cmp_write_str32_marker(ctx, obj->as.str_size);
case CMP_TYPE_ARRAY16:
return cmp_write_array16(ctx, (uint16_t)obj->as.array_size);
case CMP_TYPE_ARRAY32:
return cmp_write_array32(ctx, obj->as.array_size);
case CMP_TYPE_MAP16:
return cmp_write_map16(ctx, (uint16_t)obj->as.map_size);
case CMP_TYPE_MAP32:
return cmp_write_map32(ctx, obj->as.map_size);
case CMP_TYPE_NEGATIVE_FIXNUM:
return cmp_write_nfix(ctx, obj->as.s8);
default:
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
}
bool cmp_write_object_v4(cmp_ctx_t *ctx, const cmp_object_t *obj) {
switch(obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
return cmp_write_pfix(ctx, obj->as.u8);
case CMP_TYPE_FIXMAP:
return cmp_write_fixmap(ctx, (uint8_t)obj->as.map_size);
case CMP_TYPE_FIXARRAY:
return cmp_write_fixarray(ctx, (uint8_t)obj->as.array_size);
case CMP_TYPE_FIXSTR:
return cmp_write_fixstr_marker(ctx, (uint8_t)obj->as.str_size);
case CMP_TYPE_NIL:
return cmp_write_nil(ctx);
case CMP_TYPE_BOOLEAN:
if (obj->as.boolean)
return cmp_write_true(ctx);
return cmp_write_false(ctx);
case CMP_TYPE_EXT8:
return cmp_write_ext8_marker(ctx, obj->as.ext.type, (uint8_t)obj->as.ext.size);
case CMP_TYPE_EXT16:
return cmp_write_ext16_marker(
ctx, obj->as.ext.type, (uint16_t)obj->as.ext.size
);
case CMP_TYPE_EXT32:
return cmp_write_ext32_marker(ctx, obj->as.ext.type, obj->as.ext.size);
case CMP_TYPE_FLOAT:
#ifndef CMP_NO_FLOAT
return cmp_write_float(ctx, obj->as.flt);
#else /* CMP_NO_FLOAT */
ctx->error = CMP_ERROR_DISABLED_FLOATING_POINT;
return false;
#endif
case CMP_TYPE_DOUBLE:
#ifndef CMP_NO_FLOAT
return cmp_write_double(ctx, obj->as.dbl);
#else
ctx->error = CMP_ERROR_DISABLED_FLOATING_POINT;
return false;
#endif
case CMP_TYPE_UINT8:
return cmp_write_u8(ctx, obj->as.u8);
case CMP_TYPE_UINT16:
return cmp_write_u16(ctx, obj->as.u16);
case CMP_TYPE_UINT32:
return cmp_write_u32(ctx, obj->as.u32);
case CMP_TYPE_UINT64:
return cmp_write_u64(ctx, obj->as.u64);
case CMP_TYPE_SINT8:
return cmp_write_s8(ctx, obj->as.s8);
case CMP_TYPE_SINT16:
return cmp_write_s16(ctx, obj->as.s16);
case CMP_TYPE_SINT32:
return cmp_write_s32(ctx, obj->as.s32);
case CMP_TYPE_SINT64:
return cmp_write_s64(ctx, obj->as.s64);
case CMP_TYPE_FIXEXT1:
return cmp_write_fixext1_marker(ctx, obj->as.ext.type);
case CMP_TYPE_FIXEXT2:
return cmp_write_fixext2_marker(ctx, obj->as.ext.type);
case CMP_TYPE_FIXEXT4:
return cmp_write_fixext4_marker(ctx, obj->as.ext.type);
case CMP_TYPE_FIXEXT8:
return cmp_write_fixext8_marker(ctx, obj->as.ext.type);
case CMP_TYPE_FIXEXT16:
return cmp_write_fixext16_marker(ctx, obj->as.ext.type);
case CMP_TYPE_STR16:
return cmp_write_str16_marker(ctx, (uint16_t)obj->as.str_size);
case CMP_TYPE_STR32:
return cmp_write_str32_marker(ctx, obj->as.str_size);
case CMP_TYPE_ARRAY16:
return cmp_write_array16(ctx, (uint16_t)obj->as.array_size);
case CMP_TYPE_ARRAY32:
return cmp_write_array32(ctx, obj->as.array_size);
case CMP_TYPE_MAP16:
return cmp_write_map16(ctx, (uint16_t)obj->as.map_size);
case CMP_TYPE_MAP32:
return cmp_write_map32(ctx, obj->as.map_size);
case CMP_TYPE_NEGATIVE_FIXNUM:
return cmp_write_nfix(ctx, obj->as.s8);
default:
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
}
bool cmp_read_pfix(cmp_ctx_t *ctx, uint8_t *c) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_POSITIVE_FIXNUM) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*c = obj.as.u8;
return true;
}
bool cmp_read_nfix(cmp_ctx_t *ctx, int8_t *c) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_NEGATIVE_FIXNUM) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*c = obj.as.s8;
return true;
}
bool cmp_read_sfix(cmp_ctx_t *ctx, int8_t *c) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_NEGATIVE_FIXNUM:
*c = obj.as.s8;
return true;
default:
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
}
bool cmp_read_s8(cmp_ctx_t *ctx, int8_t *c) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_SINT8) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*c = obj.as.s8;
return true;
}
bool cmp_read_s16(cmp_ctx_t *ctx, int16_t *s) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_SINT16) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*s = obj.as.s16;
return true;
}
bool cmp_read_s32(cmp_ctx_t *ctx, int32_t *i) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_SINT32) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*i = obj.as.s32;
return true;
}
bool cmp_read_s64(cmp_ctx_t *ctx, int64_t *l) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_SINT64) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*l = obj.as.s64;
return true;
}
bool cmp_read_char(cmp_ctx_t *ctx, int8_t *c) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
*c = obj.as.s8;
return true;
case CMP_TYPE_UINT8:
if (obj.as.u8 <= 0x7F) {
*c = (int8_t)obj.as.u8;
return true;
}
break;
default:
break;
}
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
bool cmp_read_short(cmp_ctx_t *ctx, int16_t *s) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
*s = obj.as.s8;
return true;
case CMP_TYPE_UINT8:
*s = obj.as.u8;
return true;
case CMP_TYPE_SINT16:
*s = obj.as.s16;
return true;
case CMP_TYPE_UINT16:
if (obj.as.u16 <= 0x7FFF) {
*s = (int16_t)obj.as.u16;
return true;
}
break;
default:
break;
}
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
bool cmp_read_int(cmp_ctx_t *ctx, int32_t *i) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
*i = obj.as.s8;
return true;
case CMP_TYPE_UINT8:
*i = obj.as.u8;
return true;
case CMP_TYPE_SINT16:
*i = obj.as.s16;
return true;
case CMP_TYPE_UINT16:
*i = obj.as.u16;
return true;
case CMP_TYPE_SINT32:
*i = obj.as.s32;
return true;
case CMP_TYPE_UINT32:
if (obj.as.u32 <= 0x7FFFFFFF) {
*i = (int32_t)obj.as.u32;
return true;
}
break;
default:
break;
}
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
bool cmp_read_long(cmp_ctx_t *ctx, int64_t *d) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
*d = obj.as.s8;
return true;
case CMP_TYPE_UINT8:
*d = obj.as.u8;
return true;
case CMP_TYPE_SINT16:
*d = obj.as.s16;
return true;
case CMP_TYPE_UINT16:
*d = obj.as.u16;
return true;
case CMP_TYPE_SINT32:
*d = obj.as.s32;
return true;
case CMP_TYPE_UINT32:
*d = obj.as.u32;
return true;
case CMP_TYPE_SINT64:
*d = obj.as.s64;
return true;
case CMP_TYPE_UINT64:
if (obj.as.u64 <= UINT64_C(0x7FFFFFFFFFFFFFFF)) {
*d = (int64_t)obj.as.u64;
return true;
}
break;
default:
break;
}
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
bool cmp_read_integer(cmp_ctx_t *ctx, int64_t *d) {
return cmp_read_long(ctx, d);
}
bool cmp_read_ufix(cmp_ctx_t *ctx, uint8_t *c) {
return cmp_read_pfix(ctx, c);
}
bool cmp_read_u8(cmp_ctx_t *ctx, uint8_t *c) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_UINT8) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*c = obj.as.u8;
return true;
}
bool cmp_read_u16(cmp_ctx_t *ctx, uint16_t *s) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_UINT16) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*s = obj.as.u16;
return true;
}
bool cmp_read_u32(cmp_ctx_t *ctx, uint32_t *i) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_UINT32) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*i = obj.as.u32;
return true;
}
bool cmp_read_u64(cmp_ctx_t *ctx, uint64_t *l) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_UINT64) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*l = obj.as.u64;
return true;
}
bool cmp_read_uchar(cmp_ctx_t *ctx, uint8_t *c) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
*c = obj.as.u8;
return true;
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
if (obj.as.s8 >= 0) {
*c = (uint8_t)obj.as.s8;
return true;
}
break;
default:
break;
}
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
bool cmp_read_ushort(cmp_ctx_t *ctx, uint16_t *s) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
*s = obj.as.u8;
return true;
case CMP_TYPE_UINT16:
*s = obj.as.u16;
return true;
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
if (obj.as.s8 >= 0) {
*s = (uint8_t)obj.as.s8;
return true;
}
break;
case CMP_TYPE_SINT16:
if (obj.as.s16 >= 0) {
*s = (uint16_t)obj.as.s16;
return true;
}
break;
default:
break;
}
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
bool cmp_read_uint(cmp_ctx_t *ctx, uint32_t *i) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
*i = obj.as.u8;
return true;
case CMP_TYPE_UINT16:
*i = obj.as.u16;
return true;
case CMP_TYPE_UINT32:
*i = obj.as.u32;
return true;
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
if (obj.as.s8 >= 0) {
*i = (uint8_t)obj.as.s8;
return true;
}
break;
case CMP_TYPE_SINT16:
if (obj.as.s16 >= 0) {
*i = (uint16_t)obj.as.s16;
return true;
}
break;
case CMP_TYPE_SINT32:
if (obj.as.s32 >= 0) {
*i = (uint32_t)obj.as.s32;
return true;
}
break;
default:
break;
}
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
bool cmp_read_ulong(cmp_ctx_t *ctx, uint64_t *u) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
*u = obj.as.u8;
return true;
case CMP_TYPE_UINT16:
*u = obj.as.u16;
return true;
case CMP_TYPE_UINT32:
*u = obj.as.u32;
return true;
case CMP_TYPE_UINT64:
*u = obj.as.u64;
return true;
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
if (obj.as.s8 >= 0) {
*u = (uint8_t)obj.as.s8;
return true;
}
break;
case CMP_TYPE_SINT16:
if (obj.as.s16 >= 0) {
*u = (uint16_t)obj.as.s16;
return true;
}
break;
case CMP_TYPE_SINT32:
if (obj.as.s32 >= 0) {
*u = (uint32_t)obj.as.s32;
return true;
}
break;
case CMP_TYPE_SINT64:
if (obj.as.s64 >= 0) {
*u = (uint64_t)obj.as.s64;
return true;
}
break;
default:
break;
}
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
bool cmp_read_uinteger(cmp_ctx_t *ctx, uint64_t *u) {
return cmp_read_ulong(ctx, u);
}
#ifndef CMP_NO_FLOAT
bool cmp_read_float(cmp_ctx_t *ctx, float *f) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_FLOAT) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*f = obj.as.flt;
return true;
}
bool cmp_read_double(cmp_ctx_t *ctx, double *d) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_DOUBLE) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*d = obj.as.dbl;
return true;
}
bool cmp_read_decimal(cmp_ctx_t *ctx, double *d) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_FLOAT:
*d = (double)obj.as.flt;
return true;
case CMP_TYPE_DOUBLE:
*d = obj.as.dbl;
return true;
default:
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
}
#endif /* CMP_NO_FLOAT */
bool cmp_read_nil(cmp_ctx_t *ctx) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type == CMP_TYPE_NIL)
return true;
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
bool cmp_read_bool(cmp_ctx_t *ctx, bool *b) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_BOOLEAN) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
if (obj.as.boolean)
*b = true;
else
*b = false;
return true;
}
bool cmp_read_bool_as_u8(cmp_ctx_t *ctx, uint8_t *b) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_BOOLEAN) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
if (obj.as.boolean)
*b = 1;
else
*b = 0;
return true;
}
bool cmp_read_str_size(cmp_ctx_t *ctx, uint32_t *size) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_FIXSTR:
case CMP_TYPE_STR8:
case CMP_TYPE_STR16:
case CMP_TYPE_STR32:
*size = obj.as.str_size;
return true;
default:
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
}
bool cmp_read_str(cmp_ctx_t *ctx, char *data, uint32_t *size) {
uint32_t str_size = 0;
if (!cmp_read_str_size(ctx, &str_size))
return false;
if (str_size >= *size) {
*size = str_size;
ctx->error = CMP_ERROR_STR_DATA_LENGTH_TOO_LONG;
return false;
}
if (!ctx->read(ctx, data, str_size)) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
data[str_size] = 0;
*size = str_size;
return true;
}
bool cmp_read_bin_size(cmp_ctx_t *ctx, uint32_t *size) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_BIN8:
case CMP_TYPE_BIN16:
case CMP_TYPE_BIN32:
*size = obj.as.bin_size;
return true;
default:
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
}
bool cmp_read_bin(cmp_ctx_t *ctx, void *data, uint32_t *size) {
uint32_t bin_size = 0;
if (!cmp_read_bin_size(ctx, &bin_size))
return false;
if (bin_size > *size) {
ctx->error = CMP_ERROR_BIN_DATA_LENGTH_TOO_LONG;
return false;
}
if (!ctx->read(ctx, data, bin_size)) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
*size = bin_size;
return true;
}
bool cmp_read_array(cmp_ctx_t *ctx, uint32_t *size) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_FIXARRAY:
case CMP_TYPE_ARRAY16:
case CMP_TYPE_ARRAY32:
*size = obj.as.array_size;
return true;
default:
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
}
bool cmp_read_map(cmp_ctx_t *ctx, uint32_t *size) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_FIXMAP:
case CMP_TYPE_MAP16:
case CMP_TYPE_MAP32:
*size = obj.as.map_size;
return true;
default:
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
}
bool cmp_read_fixext1_marker(cmp_ctx_t *ctx, int8_t *type) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_FIXEXT1) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*type = obj.as.ext.type;
return true;
}
bool cmp_read_fixext1(cmp_ctx_t *ctx, int8_t *type, void *data) {
if (!cmp_read_fixext1_marker(ctx, type))
return false;
if (ctx->read(ctx, data, 1))
return true;
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
bool cmp_read_fixext2_marker(cmp_ctx_t *ctx, int8_t *type) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_FIXEXT2) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*type = obj.as.ext.type;
return true;
}
bool cmp_read_fixext2(cmp_ctx_t *ctx, int8_t *type, void *data) {
if (!cmp_read_fixext2_marker(ctx, type))
return false;
if (ctx->read(ctx, data, 2))
return true;
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
bool cmp_read_fixext4_marker(cmp_ctx_t *ctx, int8_t *type) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_FIXEXT4) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*type = obj.as.ext.type;
return true;
}
bool cmp_read_fixext4(cmp_ctx_t *ctx, int8_t *type, void *data) {
if (!cmp_read_fixext4_marker(ctx, type))
return false;
if (ctx->read(ctx, data, 4))
return true;
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
bool cmp_read_fixext8_marker(cmp_ctx_t *ctx, int8_t *type) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_FIXEXT8) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*type = obj.as.ext.type;
return true;
}
bool cmp_read_fixext8(cmp_ctx_t *ctx, int8_t *type, void *data) {
if (!cmp_read_fixext8_marker(ctx, type))
return false;
if (ctx->read(ctx, data, 8))
return true;
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
bool cmp_read_fixext16_marker(cmp_ctx_t *ctx, int8_t *type) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_FIXEXT16) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*type = obj.as.ext.type;
return true;
}
bool cmp_read_fixext16(cmp_ctx_t *ctx, int8_t *type, void *data) {
if (!cmp_read_fixext16_marker(ctx, type))
return false;
if (ctx->read(ctx, data, 16))
return true;
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
bool cmp_read_ext8_marker(cmp_ctx_t *ctx, int8_t *type, uint8_t *size) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_EXT8) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*type = obj.as.ext.type;
*size = (uint8_t)obj.as.ext.size;
return true;
}
bool cmp_read_ext8(cmp_ctx_t *ctx, int8_t *type, uint8_t *size, void *data) {
if (!cmp_read_ext8_marker(ctx, type, size))
return false;
if (ctx->read(ctx, data, *size))
return true;
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
bool cmp_read_ext16_marker(cmp_ctx_t *ctx, int8_t *type, uint16_t *size) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_EXT16) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*type = obj.as.ext.type;
*size = (uint16_t)obj.as.ext.size;
return true;
}
bool cmp_read_ext16(cmp_ctx_t *ctx, int8_t *type, uint16_t *size, void *data) {
if (!cmp_read_ext16_marker(ctx, type, size))
return false;
if (ctx->read(ctx, data, *size))
return true;
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
bool cmp_read_ext32_marker(cmp_ctx_t *ctx, int8_t *type, uint32_t *size) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
if (obj.type != CMP_TYPE_EXT32) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
*type = obj.as.ext.type;
*size = obj.as.ext.size;
return true;
}
bool cmp_read_ext32(cmp_ctx_t *ctx, int8_t *type, uint32_t *size, void *data) {
if (!cmp_read_ext32_marker(ctx, type, size))
return false;
if (ctx->read(ctx, data, *size))
return true;
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
bool cmp_read_ext_marker(cmp_ctx_t *ctx, int8_t *type, uint32_t *size) {
cmp_object_t obj;
if (!cmp_read_object(ctx, &obj))
return false;
switch (obj.type) {
case CMP_TYPE_FIXEXT1:
case CMP_TYPE_FIXEXT2:
case CMP_TYPE_FIXEXT4:
case CMP_TYPE_FIXEXT8:
case CMP_TYPE_FIXEXT16:
case CMP_TYPE_EXT8:
case CMP_TYPE_EXT16:
case CMP_TYPE_EXT32:
*type = obj.as.ext.type;
*size = obj.as.ext.size;
return true;
default:
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
}
bool cmp_read_ext(cmp_ctx_t *ctx, int8_t *type, uint32_t *size, void *data) {
if (!cmp_read_ext_marker(ctx, type, size))
return false;
if (ctx->read(ctx, data, *size))
return true;
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
bool cmp_read_object(cmp_ctx_t *ctx, cmp_object_t *obj) {
uint8_t type_marker = 0;
if (!read_type_marker(ctx, &type_marker))
return false;
if (!type_marker_to_cmp_type(type_marker, &obj->type)) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
return read_obj_data(ctx, type_marker, obj);
}
bool cmp_skip_object(cmp_ctx_t *ctx, cmp_object_t *obj) {
uint8_t type_marker = 0;
uint8_t cmp_type;
uint32_t size = 0;
if (!read_type_marker(ctx, &type_marker)) {
return false;
}
if (!type_marker_to_cmp_type(type_marker, &cmp_type)) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
switch (cmp_type) {
case CMP_TYPE_FIXARRAY:
case CMP_TYPE_ARRAY16:
case CMP_TYPE_ARRAY32:
case CMP_TYPE_FIXMAP:
case CMP_TYPE_MAP16:
case CMP_TYPE_MAP32:
obj->type = cmp_type;
if (!read_obj_data(ctx, type_marker, obj)) {
return false;
}
ctx->error = CMP_ERROR_SKIP_DEPTH_LIMIT_EXCEEDED;
return false;
default:
if (!read_type_size(ctx, type_marker, cmp_type, &size)) {
return false;
}
if (size) {
switch (cmp_type) {
case CMP_TYPE_FIXEXT1:
case CMP_TYPE_FIXEXT2:
case CMP_TYPE_FIXEXT4:
case CMP_TYPE_FIXEXT8:
case CMP_TYPE_FIXEXT16:
case CMP_TYPE_EXT8:
case CMP_TYPE_EXT16:
case CMP_TYPE_EXT32:
++size;
break;
default:
break;
}
if (!skip_bytes(ctx, size)) {
return false;
}
}
}
return true;
}
bool cmp_skip_object_flat(cmp_ctx_t *ctx, cmp_object_t *obj) {
size_t element_count = 1;
bool in_container = false;
while (element_count) {
uint8_t type_marker = 0;
uint8_t cmp_type;
uint32_t size = 0;
if (!read_type_marker(ctx, &type_marker)) {
return false;
}
if (!type_marker_to_cmp_type(type_marker, &cmp_type)) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
switch (cmp_type) {
case CMP_TYPE_FIXARRAY:
case CMP_TYPE_ARRAY16:
case CMP_TYPE_ARRAY32:
case CMP_TYPE_FIXMAP:
case CMP_TYPE_MAP16:
case CMP_TYPE_MAP32:
if (in_container) {
obj->type = cmp_type;
if (!read_obj_data(ctx, type_marker, obj)) {
return false;
}
ctx->error = CMP_ERROR_SKIP_DEPTH_LIMIT_EXCEEDED;
return false;
}
in_container = true;
break;
default:
if (!read_type_size(ctx, type_marker, cmp_type, &size)) {
return false;
}
if (size) {
switch (cmp_type) {
case CMP_TYPE_FIXEXT1:
case CMP_TYPE_FIXEXT2:
case CMP_TYPE_FIXEXT4:
case CMP_TYPE_FIXEXT8:
case CMP_TYPE_FIXEXT16:
case CMP_TYPE_EXT8:
case CMP_TYPE_EXT16:
case CMP_TYPE_EXT32:
++size;
break;
default:
break;
}
if (!skip_bytes(ctx, size)) {
return false;
}
}
}
element_count--;
switch (cmp_type) {
case CMP_TYPE_FIXARRAY:
case CMP_TYPE_ARRAY16:
case CMP_TYPE_ARRAY32:
if (!read_type_size(ctx, type_marker, cmp_type, &size)) {
return false;
}
element_count += size;
break;
case CMP_TYPE_FIXMAP:
case CMP_TYPE_MAP16:
case CMP_TYPE_MAP32:
if (!read_type_size(ctx, type_marker, cmp_type, &size)) {
return false;
}
element_count += ((size_t)size) * 2;
break;
default:
break;
}
}
return true;
}
bool cmp_skip_object_no_limit(cmp_ctx_t *ctx) {
size_t element_count = 1;
while (element_count) {
uint8_t type_marker = 0;
uint8_t cmp_type = 0;
uint32_t size = 0;
if (!read_type_marker(ctx, &type_marker)) {
return false;
}
if (!type_marker_to_cmp_type(type_marker, &cmp_type)) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
switch (cmp_type) {
case CMP_TYPE_FIXARRAY:
case CMP_TYPE_ARRAY16:
case CMP_TYPE_ARRAY32:
case CMP_TYPE_FIXMAP:
case CMP_TYPE_MAP16:
case CMP_TYPE_MAP32:
break;
default:
if (!read_type_size(ctx, type_marker, cmp_type, &size)) {
return false;
}
if (size) {
switch (cmp_type) {
case CMP_TYPE_FIXEXT1:
case CMP_TYPE_FIXEXT2:
case CMP_TYPE_FIXEXT4:
case CMP_TYPE_FIXEXT8:
case CMP_TYPE_FIXEXT16:
case CMP_TYPE_EXT8:
case CMP_TYPE_EXT16:
case CMP_TYPE_EXT32:
++size;
break;
default:
break;
}
if (!skip_bytes(ctx, size)) {
return false;
}
}
}
element_count--;
switch (cmp_type) {
case CMP_TYPE_FIXARRAY:
case CMP_TYPE_ARRAY16:
case CMP_TYPE_ARRAY32:
if (!read_type_size(ctx, type_marker, cmp_type, &size)) {
return false;
}
element_count += size;
break;
case CMP_TYPE_FIXMAP:
case CMP_TYPE_MAP16:
case CMP_TYPE_MAP32:
if (!read_type_size(ctx, type_marker, cmp_type, &size)) {
return false;
}
element_count += ((size_t)size) * 2;
break;
default:
break;
}
}
return true;
}
bool cmp_skip_object_limit(cmp_ctx_t *ctx, cmp_object_t *obj, uint32_t limit) {
size_t element_count = 1;
uint32_t depth = 0;
while (element_count) {
uint8_t type_marker = 0;
uint8_t cmp_type;
uint32_t size = 0;
if (!read_type_marker(ctx, &type_marker)) {
return false;
}
if (!type_marker_to_cmp_type(type_marker, &cmp_type)) {
ctx->error = CMP_ERROR_INVALID_TYPE;
return false;
}
switch (cmp_type) {
case CMP_TYPE_FIXARRAY:
case CMP_TYPE_ARRAY16:
case CMP_TYPE_ARRAY32:
case CMP_TYPE_FIXMAP:
case CMP_TYPE_MAP16:
case CMP_TYPE_MAP32:
++depth;
if (depth > limit) {
obj->type = cmp_type;
if (!read_obj_data(ctx, type_marker, obj)) {
return false;
}
ctx->error = CMP_ERROR_SKIP_DEPTH_LIMIT_EXCEEDED;
return false;
}
break;
default:
if (!read_type_size(ctx, type_marker, cmp_type, &size)) {
return false;
}
if (size) {
switch (cmp_type) {
case CMP_TYPE_FIXEXT1:
case CMP_TYPE_FIXEXT2:
case CMP_TYPE_FIXEXT4:
case CMP_TYPE_FIXEXT8:
case CMP_TYPE_FIXEXT16:
case CMP_TYPE_EXT8:
case CMP_TYPE_EXT16:
case CMP_TYPE_EXT32:
++size;
break;
default:
break;
}
if (!skip_bytes(ctx, size)) {
return false;
}
}
}
element_count--;
switch (cmp_type) {
case CMP_TYPE_FIXARRAY:
case CMP_TYPE_ARRAY16:
case CMP_TYPE_ARRAY32:
if (!read_type_size(ctx, type_marker, cmp_type, &size)) {
return false;
}
element_count += size;
break;
case CMP_TYPE_FIXMAP:
case CMP_TYPE_MAP16:
case CMP_TYPE_MAP32:
if (!read_type_size(ctx, type_marker, cmp_type, &size)) {
return false;
}
element_count += ((size_t)size) * 2;
break;
default:
break;
}
}
return true;
}
bool cmp_object_is_char(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
return true;
default:
return false;
}
}
bool cmp_object_is_short(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
case CMP_TYPE_SINT16:
return true;
default:
return false;
}
}
bool cmp_object_is_int(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
case CMP_TYPE_SINT16:
case CMP_TYPE_SINT32:
return true;
default:
return false;
}
}
bool cmp_object_is_long(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
case CMP_TYPE_SINT16:
case CMP_TYPE_SINT32:
case CMP_TYPE_SINT64:
return true;
default:
return false;
}
}
bool cmp_object_is_sinteger(const cmp_object_t *obj) {
return cmp_object_is_long(obj);
}
bool cmp_object_is_uchar(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
return true;
default:
return false;
}
}
bool cmp_object_is_ushort(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
return true;
case CMP_TYPE_UINT16:
return true;
default:
return false;
}
}
bool cmp_object_is_uint(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
case CMP_TYPE_UINT16:
case CMP_TYPE_UINT32:
return true;
default:
return false;
}
}
bool cmp_object_is_ulong(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
case CMP_TYPE_UINT16:
case CMP_TYPE_UINT32:
case CMP_TYPE_UINT64:
return true;
default:
return false;
}
}
bool cmp_object_is_uinteger(const cmp_object_t *obj) {
return cmp_object_is_ulong(obj);
}
bool cmp_object_is_float(const cmp_object_t *obj) {
if (obj->type == CMP_TYPE_FLOAT)
return true;
return false;
}
bool cmp_object_is_double(const cmp_object_t *obj) {
if (obj->type == CMP_TYPE_DOUBLE)
return true;
return false;
}
bool cmp_object_is_nil(const cmp_object_t *obj) {
if (obj->type == CMP_TYPE_NIL)
return true;
return false;
}
bool cmp_object_is_bool(const cmp_object_t *obj) {
if (obj->type == CMP_TYPE_BOOLEAN)
return true;
return false;
}
bool cmp_object_is_str(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_FIXSTR:
case CMP_TYPE_STR8:
case CMP_TYPE_STR16:
case CMP_TYPE_STR32:
return true;
default:
return false;
}
}
bool cmp_object_is_bin(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_BIN8:
case CMP_TYPE_BIN16:
case CMP_TYPE_BIN32:
return true;
default:
return false;
}
}
bool cmp_object_is_array(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_FIXARRAY:
case CMP_TYPE_ARRAY16:
case CMP_TYPE_ARRAY32:
return true;
default:
return false;
}
}
bool cmp_object_is_map(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_FIXMAP:
case CMP_TYPE_MAP16:
case CMP_TYPE_MAP32:
return true;
default:
return false;
}
}
bool cmp_object_is_ext(const cmp_object_t *obj) {
switch (obj->type) {
case CMP_TYPE_FIXEXT1:
case CMP_TYPE_FIXEXT2:
case CMP_TYPE_FIXEXT4:
case CMP_TYPE_FIXEXT8:
case CMP_TYPE_FIXEXT16:
case CMP_TYPE_EXT8:
case CMP_TYPE_EXT16:
case CMP_TYPE_EXT32:
return true;
default:
return false;
}
}
bool cmp_object_as_char(const cmp_object_t *obj, int8_t *c) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
*c = obj->as.s8;
return true;
case CMP_TYPE_UINT8:
if (obj->as.u8 <= 0x7F) {
*c = obj->as.s8;
return true;
}
else {
return false;
}
default:
return false;
}
}
bool cmp_object_as_short(const cmp_object_t *obj, int16_t *s) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
*s = obj->as.s8;
return true;
case CMP_TYPE_UINT8:
*s = obj->as.u8;
return true;
case CMP_TYPE_SINT16:
*s = obj->as.s16;
return true;
case CMP_TYPE_UINT16:
if (obj->as.u16 <= 0x7FFF) {
*s = (int16_t)obj->as.u16;
return true;
}
else {
return false;
}
default:
return false;
}
}
bool cmp_object_as_int(const cmp_object_t *obj, int32_t *i) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
*i = obj->as.s8;
return true;
case CMP_TYPE_UINT8:
*i = obj->as.u8;
return true;
case CMP_TYPE_SINT16:
*i = obj->as.s16;
return true;
case CMP_TYPE_UINT16:
*i = obj->as.u16;
return true;
case CMP_TYPE_SINT32:
*i = obj->as.s32;
return true;
case CMP_TYPE_UINT32:
if (obj->as.u32 <= 0x7FFFFFFF) {
*i = (int32_t)obj->as.u32;
return true;
}
else {
return false;
}
default:
return false;
}
}
bool cmp_object_as_long(const cmp_object_t *obj, int64_t *d) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
*d = obj->as.s8;
return true;
case CMP_TYPE_UINT8:
*d = obj->as.u8;
return true;
case CMP_TYPE_SINT16:
*d = obj->as.s16;
return true;
case CMP_TYPE_UINT16:
*d = obj->as.u16;
return true;
case CMP_TYPE_SINT32:
*d = obj->as.s32;
return true;
case CMP_TYPE_UINT32:
*d = obj->as.u32;
return true;
case CMP_TYPE_SINT64:
*d = obj->as.s64;
return true;
case CMP_TYPE_UINT64:
if (obj->as.u64 <= UINT64_C(0x7FFFFFFFFFFFFFFF)) {
*d = (int64_t)obj->as.u64;
return true;
}
else {
return false;
}
default:
return false;
}
}
bool cmp_object_as_sinteger(const cmp_object_t *obj, int64_t *d) {
return cmp_object_as_long(obj, d);
}
bool cmp_object_as_uchar(const cmp_object_t *obj, uint8_t *c) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
*c = obj->as.u8;
return true;
default:
return false;
}
}
bool cmp_object_as_ushort(const cmp_object_t *obj, uint16_t *s) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
*s = obj->as.u8;
return true;
case CMP_TYPE_UINT16:
*s = obj->as.u16;
return true;
default:
return false;
}
}
bool cmp_object_as_uint(const cmp_object_t *obj, uint32_t *i) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
*i = obj->as.u8;
return true;
case CMP_TYPE_UINT16:
*i = obj->as.u16;
return true;
case CMP_TYPE_UINT32:
*i = obj->as.u32;
return true;
default:
return false;
}
}
bool cmp_object_as_ulong(const cmp_object_t *obj, uint64_t *u) {
switch (obj->type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
*u = obj->as.u8;
return true;
case CMP_TYPE_UINT16:
*u = obj->as.u16;
return true;
case CMP_TYPE_UINT32:
*u = obj->as.u32;
return true;
case CMP_TYPE_UINT64:
*u = obj->as.u64;
return true;
default:
return false;
}
}
bool cmp_object_as_uinteger(const cmp_object_t *obj, uint64_t *u) {
return cmp_object_as_ulong(obj, u);
}
#ifndef CMP_NO_FLOAT
bool cmp_object_as_float(const cmp_object_t *obj, float *f) {
if (obj->type == CMP_TYPE_FLOAT) {
*f = obj->as.flt;
return true;
}
return false;
}
bool cmp_object_as_double(const cmp_object_t *obj, double *d) {
if (obj->type == CMP_TYPE_DOUBLE) {
*d = obj->as.dbl;
return true;
}
return false;
}
#endif /* CMP_NO_FLOAT */
bool cmp_object_as_bool(const cmp_object_t *obj, bool *b) {
if (obj->type == CMP_TYPE_BOOLEAN) {
if (obj->as.boolean)
*b = true;
else
*b = false;
return true;
}
return false;
}
bool cmp_object_as_str(const cmp_object_t *obj, uint32_t *size) {
switch (obj->type) {
case CMP_TYPE_FIXSTR:
case CMP_TYPE_STR8:
case CMP_TYPE_STR16:
case CMP_TYPE_STR32:
*size = obj->as.str_size;
return true;
default:
return false;
}
}
bool cmp_object_as_bin(const cmp_object_t *obj, uint32_t *size) {
switch (obj->type) {
case CMP_TYPE_BIN8:
case CMP_TYPE_BIN16:
case CMP_TYPE_BIN32:
*size = obj->as.bin_size;
return true;
default:
return false;
}
}
bool cmp_object_as_array(const cmp_object_t *obj, uint32_t *size) {
switch (obj->type) {
case CMP_TYPE_FIXARRAY:
case CMP_TYPE_ARRAY16:
case CMP_TYPE_ARRAY32:
*size = obj->as.array_size;
return true;
default:
return false;
}
}
bool cmp_object_as_map(const cmp_object_t *obj, uint32_t *size) {
switch (obj->type) {
case CMP_TYPE_FIXMAP:
case CMP_TYPE_MAP16:
case CMP_TYPE_MAP32:
*size = obj->as.map_size;
return true;
default:
return false;
}
}
bool cmp_object_as_ext(const cmp_object_t *obj, int8_t *type, uint32_t *size) {
switch (obj->type) {
case CMP_TYPE_FIXEXT1:
case CMP_TYPE_FIXEXT2:
case CMP_TYPE_FIXEXT4:
case CMP_TYPE_FIXEXT8:
case CMP_TYPE_FIXEXT16:
case CMP_TYPE_EXT8:
case CMP_TYPE_EXT16:
case CMP_TYPE_EXT32:
*type = obj->as.ext.type;
*size = obj->as.ext.size;
return true;
default:
return false;
}
}
bool cmp_object_to_str(cmp_ctx_t *ctx, const cmp_object_t *obj, char *data,
uint32_t buf_size) {
uint32_t str_size = 0;
switch (obj->type) {
case CMP_TYPE_FIXSTR:
case CMP_TYPE_STR8:
case CMP_TYPE_STR16:
case CMP_TYPE_STR32:
str_size = obj->as.str_size;
if (str_size >= buf_size) {
ctx->error = CMP_ERROR_STR_DATA_LENGTH_TOO_LONG;
return false;
}
if (!ctx->read(ctx, data, str_size)) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
data[str_size] = 0;
return true;
default:
return false;
}
}
bool cmp_object_to_bin(cmp_ctx_t *ctx, const cmp_object_t *obj, void *data,
uint32_t buf_size) {
uint32_t bin_size = 0;
switch (obj->type) {
case CMP_TYPE_BIN8:
case CMP_TYPE_BIN16:
case CMP_TYPE_BIN32:
bin_size = obj->as.bin_size;
if (bin_size > buf_size) {
ctx->error = CMP_ERROR_BIN_DATA_LENGTH_TOO_LONG;
return false;
}
if (!ctx->read(ctx, data, bin_size)) {
ctx->error = CMP_ERROR_DATA_READING;
return false;
}
return true;
default:
return false;
}
}
/* vi: set et ts=2 sw=2: */
python-ihm-2.7/src/cmp.h 0000664 0000000 0000000 00000052522 15035733372 0015213 0 ustar 00root root 0000000 0000000 /*
The MIT License (MIT)
Copyright (c) 2020 Charles Gunyon
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef CMP_H_INCLUDED
#define CMP_H_INCLUDED
#if defined(_MSC_VER) && _MSC_VER <= 1800
typedef int bool;
#define true 1
#define false 0
typedef unsigned char uint8_t;
typedef signed char int8_t;
typedef unsigned short uint16_t;
typedef short int16_t;
typedef unsigned int uint32_t;
typedef int int32_t;
typedef unsigned long long uint64_t;
typedef long long int64_t;
#define INT64_C(c) c ## LL
#define UINT64_C(c) c ## ULL
#else
#include
#include
#endif
#include
struct cmp_ctx_s;
typedef bool (*cmp_reader)(struct cmp_ctx_s *ctx, void *data, size_t limit);
typedef bool (*cmp_skipper)(struct cmp_ctx_s *ctx, size_t count);
typedef size_t (*cmp_writer)(struct cmp_ctx_s *ctx, const void *data,
size_t count);
enum {
CMP_TYPE_POSITIVE_FIXNUM, /* 0 */
CMP_TYPE_FIXMAP, /* 1 */
CMP_TYPE_FIXARRAY, /* 2 */
CMP_TYPE_FIXSTR, /* 3 */
CMP_TYPE_NIL, /* 4 */
CMP_TYPE_BOOLEAN, /* 5 */
CMP_TYPE_BIN8, /* 6 */
CMP_TYPE_BIN16, /* 7 */
CMP_TYPE_BIN32, /* 8 */
CMP_TYPE_EXT8, /* 9 */
CMP_TYPE_EXT16, /* 10 */
CMP_TYPE_EXT32, /* 11 */
CMP_TYPE_FLOAT, /* 12 */
CMP_TYPE_DOUBLE, /* 13 */
CMP_TYPE_UINT8, /* 14 */
CMP_TYPE_UINT16, /* 15 */
CMP_TYPE_UINT32, /* 16 */
CMP_TYPE_UINT64, /* 17 */
CMP_TYPE_SINT8, /* 18 */
CMP_TYPE_SINT16, /* 19 */
CMP_TYPE_SINT32, /* 20 */
CMP_TYPE_SINT64, /* 21 */
CMP_TYPE_FIXEXT1, /* 22 */
CMP_TYPE_FIXEXT2, /* 23 */
CMP_TYPE_FIXEXT4, /* 24 */
CMP_TYPE_FIXEXT8, /* 25 */
CMP_TYPE_FIXEXT16, /* 26 */
CMP_TYPE_STR8, /* 27 */
CMP_TYPE_STR16, /* 28 */
CMP_TYPE_STR32, /* 29 */
CMP_TYPE_ARRAY16, /* 30 */
CMP_TYPE_ARRAY32, /* 31 */
CMP_TYPE_MAP16, /* 32 */
CMP_TYPE_MAP32, /* 33 */
CMP_TYPE_NEGATIVE_FIXNUM /* 34 */
};
typedef struct cmp_ext_s {
int8_t type;
uint32_t size;
} cmp_ext_t;
union cmp_object_data_u {
bool boolean;
uint8_t u8;
uint16_t u16;
uint32_t u32;
uint64_t u64;
int8_t s8;
int16_t s16;
int32_t s32;
int64_t s64;
#ifndef CMP_NO_FLOAT
float flt;
double dbl;
#endif /* CMP_NO_FLOAT */
uint32_t array_size;
uint32_t map_size;
uint32_t str_size;
uint32_t bin_size;
cmp_ext_t ext;
};
typedef struct cmp_ctx_s {
uint8_t error;
void *buf;
cmp_reader read;
cmp_skipper skip;
cmp_writer write;
} cmp_ctx_t;
typedef struct cmp_object_s {
uint8_t type;
union cmp_object_data_u as;
} cmp_object_t;
#ifdef __cplusplus
extern "C" {
#endif
/*
* ============================================================================
* === Main API
* ============================================================================
*/
/*
* Initializes a CMP context
*
* If you don't intend to read, `read` may be NULL, but calling `*read*`
* functions will crash; there is no check.
*
* `skip` may be NULL, in which case skipping functions will use `read`.
*
* If you don't intend to write, `write` may be NULL, but calling `*write*`
* functions will crash; there is no check.
*/
void cmp_init(cmp_ctx_t *ctx, void *buf, cmp_reader read,
cmp_skipper skip,
cmp_writer write);
/* Returns CMP's version */
uint32_t cmp_version(void);
/* Returns the MessagePack version employed by CMP */
uint32_t cmp_mp_version(void);
/* Returns a string description of a CMP context's error */
const char* cmp_strerror(const cmp_ctx_t *ctx);
/* Writes a signed integer to the backend */
bool cmp_write_integer(cmp_ctx_t *ctx, int64_t d);
/* Writes an unsigned integer to the backend */
bool cmp_write_uinteger(cmp_ctx_t *ctx, uint64_t u);
/*
* Writes a floating-point value (either single or double-precision) to the
* backend
*/
#ifndef CMP_NO_FLOAT
bool cmp_write_decimal(cmp_ctx_t *ctx, double d);
#endif /* CMP_NO_FLOAT */
/* Writes NULL to the backend */
bool cmp_write_nil(cmp_ctx_t *ctx);
/* Writes true to the backend */
bool cmp_write_true(cmp_ctx_t *ctx);
/* Writes false to the backend */
bool cmp_write_false(cmp_ctx_t *ctx);
/* Writes a boolean value to the backend */
bool cmp_write_bool(cmp_ctx_t *ctx, bool b);
/*
* Writes an unsigned char's value to the backend as a boolean. This is useful
* if you are using a different boolean type in your application.
*/
bool cmp_write_u8_as_bool(cmp_ctx_t *ctx, uint8_t b);
/*
* Writes a string to the backend; according to the MessagePack spec, this must
* be encoded using UTF-8, but CMP leaves that job up to the programmer.
*/
bool cmp_write_str(cmp_ctx_t *ctx, const char *data, uint32_t size);
/*
* Writes a string to the backend. This avoids using the STR8 marker, which
* is unsupported by MessagePack v4, the version implemented by many other
* MessagePack libraries. No encoding is assumed in this case, not that it
* matters.
*/
bool cmp_write_str_v4(cmp_ctx_t *ctx, const char *data, uint32_t size);
/*
* Writes the string marker to the backend. This is useful if you are writing
* data in chunks instead of a single shot.
*/
bool cmp_write_str_marker(cmp_ctx_t *ctx, uint32_t size);
/*
* Writes the string marker to the backend. This is useful if you are writing
* data in chunks instead of a single shot. This avoids using the STR8
* marker, which is unsupported by MessagePack v4, the version implemented by
* many other MessagePack libraries. No encoding is assumed in this case, not
* that it matters.
*/
bool cmp_write_str_marker_v4(cmp_ctx_t *ctx, uint32_t size);
/* Writes binary data to the backend */
bool cmp_write_bin(cmp_ctx_t *ctx, const void *data, uint32_t size);
/*
* Writes the binary data marker to the backend. This is useful if you are
* writing data in chunks instead of a single shot.
*/
bool cmp_write_bin_marker(cmp_ctx_t *ctx, uint32_t size);
/* Writes an array to the backend. */
bool cmp_write_array(cmp_ctx_t *ctx, uint32_t size);
/* Writes a map to the backend. */
bool cmp_write_map(cmp_ctx_t *ctx, uint32_t size);
/* Writes an extended type to the backend */
bool cmp_write_ext(cmp_ctx_t *ctx, int8_t type, uint32_t size,
const void *data);
/*
* Writes the extended type marker to the backend. This is useful if you want
* to write the type's data in chunks instead of a single shot.
*/
bool cmp_write_ext_marker(cmp_ctx_t *ctx, int8_t type, uint32_t size);
/* Writes an object to the backend */
bool cmp_write_object(cmp_ctx_t *ctx, const cmp_object_t *obj);
/*
* Writes an object to the backend. This avoids using the STR8 marker, which
* is unsupported by MessagePack v4, the version implemented by many other
* MessagePack libraries.
*/
bool cmp_write_object_v4(cmp_ctx_t *ctx, const cmp_object_t *obj);
/* Reads a signed integer that fits inside a signed char */
bool cmp_read_char(cmp_ctx_t *ctx, int8_t *c);
/* Reads a signed integer that fits inside a signed short */
bool cmp_read_short(cmp_ctx_t *ctx, int16_t *s);
/* Reads a signed integer that fits inside a signed int */
bool cmp_read_int(cmp_ctx_t *ctx, int32_t *i);
/* Reads a signed integer that fits inside a signed long */
bool cmp_read_long(cmp_ctx_t *ctx, int64_t *d);
/* Reads a signed integer */
bool cmp_read_integer(cmp_ctx_t *ctx, int64_t *d);
/* Reads an unsigned integer that fits inside an unsigned char */
bool cmp_read_uchar(cmp_ctx_t *ctx, uint8_t *c);
/* Reads an unsigned integer that fits inside an unsigned short */
bool cmp_read_ushort(cmp_ctx_t *ctx, uint16_t *s);
/* Reads an unsigned integer that fits inside an unsigned int */
bool cmp_read_uint(cmp_ctx_t *ctx, uint32_t *i);
/* Reads an unsigned integer that fits inside an unsigned long */
bool cmp_read_ulong(cmp_ctx_t *ctx, uint64_t *u);
/* Reads an unsigned integer */
bool cmp_read_uinteger(cmp_ctx_t *ctx, uint64_t *u);
/*
* Reads a floating point value (either single or double-precision) from the
* backend
*/
#ifndef CMP_NO_FLOAT
bool cmp_read_decimal(cmp_ctx_t *ctx, double *d);
#endif /* CMP_NO_FLOAT */
/* "Reads" (more like "skips") a NULL value from the backend */
bool cmp_read_nil(cmp_ctx_t *ctx);
/* Reads a boolean from the backend */
bool cmp_read_bool(cmp_ctx_t *ctx, bool *b);
/*
* Reads a boolean as an unsigned char from the backend; this is useful if your
* application uses a different boolean type.
*/
bool cmp_read_bool_as_u8(cmp_ctx_t *ctx, uint8_t *b);
/* Reads a string's size from the backend */
bool cmp_read_str_size(cmp_ctx_t *ctx, uint32_t *size);
/*
* Reads a string from the backend; according to the spec, the string's data
* ought to be encoded using UTF-8, but CMP leaves that job up to the programmer.
*/
bool cmp_read_str(cmp_ctx_t *ctx, char *data, uint32_t *size);
/* Reads the size of packed binary data from the backend */
bool cmp_read_bin_size(cmp_ctx_t *ctx, uint32_t *size);
/* Reads packed binary data from the backend */
bool cmp_read_bin(cmp_ctx_t *ctx, void *data, uint32_t *size);
/* Reads an array from the backend */
bool cmp_read_array(cmp_ctx_t *ctx, uint32_t *size);
/* Reads a map from the backend */
bool cmp_read_map(cmp_ctx_t *ctx, uint32_t *size);
/* Reads the extended type's marker from the backend */
bool cmp_read_ext_marker(cmp_ctx_t *ctx, int8_t *type, uint32_t *size);
/* Reads an extended type from the backend */
bool cmp_read_ext(cmp_ctx_t *ctx, int8_t *type, uint32_t *size, void *data);
/* Reads an object from the backend */
bool cmp_read_object(cmp_ctx_t *ctx, cmp_object_t *obj);
/*
* Skips the next object from the backend. If that object is an array or map,
* this function will:
* - If `obj` is not `NULL`, fill in `obj` with that object
* - Set `ctx->error` to `SKIP_DEPTH_LIMIT_EXCEEDED_ERROR`
* - Return `false`
* Otherwise:
* - (Don't touch `obj`)
* - Return `true`
*/
bool cmp_skip_object(cmp_ctx_t *ctx, cmp_object_t *obj);
/*
* This is similar to `cmp_skip_object`, except it tolerates flat arrays and
* maps. If when skipping such an array or map this function encounters
* another array/map, it will:
* - If `obj` is not `NULL`, fill in `obj` with that (nested) object
* - Set `ctx->error` to `SKIP_DEPTH_LIMIT_EXCEEDED_ERROR`
* - Return `false`
* Otherwise:
* - (Don't touch `obj`)
* - Return `true`
*
* WARNING: This can cause your application to spend an unbounded amount of
* time reading nested data structures. Unless you completely trust
* the data source, you should use `cmp_skip_object`.
*/
bool cmp_skip_object_flat(cmp_ctx_t *ctx, cmp_object_t *obj);
/*
* This is similar to `cmp_skip_object`, except it will continually skip
* nested data structures.
*
* WARNING: This can cause your application to spend an unbounded amount of
* time reading nested data structures. Unless you completely trust
* the data source, you should use `cmp_skip_object`.
*/
bool cmp_skip_object_no_limit(cmp_ctx_t *ctx);
/*
* WARNING: THIS FUNCTION IS DEPRECATED AND WILL BE REMOVED IN A FUTURE RELEASE
*
* There is no way to track depths across elements without allocation. For
* example, an array constructed as: `[ [] [] [] [] [] [] [] [] [] [] ]`
* should be able to be skipped with `cmp_skip_object_limit(&cmp, &obj, 2)`.
* However, because we cannot track depth across the elements, there's no way
* to reset it after descending down into each element.
*
* This is similar to `cmp_skip_object`, except it tolerates up to `limit`
* levels of nesting. For example, in order to skip an array that contains a
* map, call `cmp_skip_object_limit(ctx, &obj, 2)`. Or in other words,
* `cmp_skip_object(ctx, &obj)` acts similarly to `cmp_skip_object_limit(ctx,
* &obj, 0)`
*
* Specifically, `limit` refers to depth, not breadth. So in order to skip an
* array that contains two arrays that each contain 3 strings, you would call
* `cmp_skip_object_limit(ctx, &obj, 2). In order to skip an array that
* contains 4 arrays that each contain 1 string, you would still call
* `cmp_skip_object_limit(ctx, &obj, 2).
*/
bool cmp_skip_object_limit(cmp_ctx_t *ctx, cmp_object_t *obj, uint32_t limit)
#ifdef __GNUC__
__attribute__((deprecated))
#endif
;
#ifdef _MSC_VER
#pragma deprecated(cmp_skip_object_limit)
#endif
/*
* ============================================================================
* === Specific API
* ============================================================================
*/
bool cmp_write_pfix(cmp_ctx_t *ctx, uint8_t c);
bool cmp_write_nfix(cmp_ctx_t *ctx, int8_t c);
bool cmp_write_sfix(cmp_ctx_t *ctx, int8_t c);
bool cmp_write_s8(cmp_ctx_t *ctx, int8_t c);
bool cmp_write_s16(cmp_ctx_t *ctx, int16_t s);
bool cmp_write_s32(cmp_ctx_t *ctx, int32_t i);
bool cmp_write_s64(cmp_ctx_t *ctx, int64_t l);
bool cmp_write_ufix(cmp_ctx_t *ctx, uint8_t c);
bool cmp_write_u8(cmp_ctx_t *ctx, uint8_t c);
bool cmp_write_u16(cmp_ctx_t *ctx, uint16_t s);
bool cmp_write_u32(cmp_ctx_t *ctx, uint32_t i);
bool cmp_write_u64(cmp_ctx_t *ctx, uint64_t l);
#ifndef CMP_NO_FLOAT
bool cmp_write_float(cmp_ctx_t *ctx, float f);
bool cmp_write_double(cmp_ctx_t *ctx, double d);
#endif /* CMP_NO_FLOAT */
bool cmp_write_fixstr_marker(cmp_ctx_t *ctx, uint8_t size);
bool cmp_write_fixstr(cmp_ctx_t *ctx, const char *data, uint8_t size);
bool cmp_write_str8_marker(cmp_ctx_t *ctx, uint8_t size);
bool cmp_write_str8(cmp_ctx_t *ctx, const char *data, uint8_t size);
bool cmp_write_str16_marker(cmp_ctx_t *ctx, uint16_t size);
bool cmp_write_str16(cmp_ctx_t *ctx, const char *data, uint16_t size);
bool cmp_write_str32_marker(cmp_ctx_t *ctx, uint32_t size);
bool cmp_write_str32(cmp_ctx_t *ctx, const char *data, uint32_t size);
bool cmp_write_bin8_marker(cmp_ctx_t *ctx, uint8_t size);
bool cmp_write_bin8(cmp_ctx_t *ctx, const void *data, uint8_t size);
bool cmp_write_bin16_marker(cmp_ctx_t *ctx, uint16_t size);
bool cmp_write_bin16(cmp_ctx_t *ctx, const void *data, uint16_t size);
bool cmp_write_bin32_marker(cmp_ctx_t *ctx, uint32_t size);
bool cmp_write_bin32(cmp_ctx_t *ctx, const void *data, uint32_t size);
bool cmp_write_fixarray(cmp_ctx_t *ctx, uint8_t size);
bool cmp_write_array16(cmp_ctx_t *ctx, uint16_t size);
bool cmp_write_array32(cmp_ctx_t *ctx, uint32_t size);
bool cmp_write_fixmap(cmp_ctx_t *ctx, uint8_t size);
bool cmp_write_map16(cmp_ctx_t *ctx, uint16_t size);
bool cmp_write_map32(cmp_ctx_t *ctx, uint32_t size);
bool cmp_write_fixext1_marker(cmp_ctx_t *ctx, int8_t type);
bool cmp_write_fixext1(cmp_ctx_t *ctx, int8_t type, const void *data);
bool cmp_write_fixext2_marker(cmp_ctx_t *ctx, int8_t type);
bool cmp_write_fixext2(cmp_ctx_t *ctx, int8_t type, const void *data);
bool cmp_write_fixext4_marker(cmp_ctx_t *ctx, int8_t type);
bool cmp_write_fixext4(cmp_ctx_t *ctx, int8_t type, const void *data);
bool cmp_write_fixext8_marker(cmp_ctx_t *ctx, int8_t type);
bool cmp_write_fixext8(cmp_ctx_t *ctx, int8_t type, const void *data);
bool cmp_write_fixext16_marker(cmp_ctx_t *ctx, int8_t type);
bool cmp_write_fixext16(cmp_ctx_t *ctx, int8_t type, const void *data);
bool cmp_write_ext8_marker(cmp_ctx_t *ctx, int8_t type, uint8_t size);
bool cmp_write_ext8(cmp_ctx_t *ctx, int8_t type, uint8_t size,
const void *data);
bool cmp_write_ext16_marker(cmp_ctx_t *ctx, int8_t type, uint16_t size);
bool cmp_write_ext16(cmp_ctx_t *ctx, int8_t type, uint16_t size,
const void *data);
bool cmp_write_ext32_marker(cmp_ctx_t *ctx, int8_t type, uint32_t size);
bool cmp_write_ext32(cmp_ctx_t *ctx, int8_t type, uint32_t size,
const void *data);
bool cmp_read_pfix(cmp_ctx_t *ctx, uint8_t *c);
bool cmp_read_nfix(cmp_ctx_t *ctx, int8_t *c);
bool cmp_read_sfix(cmp_ctx_t *ctx, int8_t *c);
bool cmp_read_s8(cmp_ctx_t *ctx, int8_t *c);
bool cmp_read_s16(cmp_ctx_t *ctx, int16_t *s);
bool cmp_read_s32(cmp_ctx_t *ctx, int32_t *i);
bool cmp_read_s64(cmp_ctx_t *ctx, int64_t *l);
bool cmp_read_ufix(cmp_ctx_t *ctx, uint8_t *c);
bool cmp_read_u8(cmp_ctx_t *ctx, uint8_t *c);
bool cmp_read_u16(cmp_ctx_t *ctx, uint16_t *s);
bool cmp_read_u32(cmp_ctx_t *ctx, uint32_t *i);
bool cmp_read_u64(cmp_ctx_t *ctx, uint64_t *l);
#ifndef CMP_NO_FLOAT
bool cmp_read_float(cmp_ctx_t *ctx, float *f);
bool cmp_read_double(cmp_ctx_t *ctx, double *d);
#endif /* CMP_NO_FLOAT */
bool cmp_read_fixext1_marker(cmp_ctx_t *ctx, int8_t *type);
bool cmp_read_fixext1(cmp_ctx_t *ctx, int8_t *type, void *data);
bool cmp_read_fixext2_marker(cmp_ctx_t *ctx, int8_t *type);
bool cmp_read_fixext2(cmp_ctx_t *ctx, int8_t *type, void *data);
bool cmp_read_fixext4_marker(cmp_ctx_t *ctx, int8_t *type);
bool cmp_read_fixext4(cmp_ctx_t *ctx, int8_t *type, void *data);
bool cmp_read_fixext8_marker(cmp_ctx_t *ctx, int8_t *type);
bool cmp_read_fixext8(cmp_ctx_t *ctx, int8_t *type, void *data);
bool cmp_read_fixext16_marker(cmp_ctx_t *ctx, int8_t *type);
bool cmp_read_fixext16(cmp_ctx_t *ctx, int8_t *type, void *data);
bool cmp_read_ext8_marker(cmp_ctx_t *ctx, int8_t *type, uint8_t *size);
bool cmp_read_ext8(cmp_ctx_t *ctx, int8_t *type, uint8_t *size, void *data);
bool cmp_read_ext16_marker(cmp_ctx_t *ctx, int8_t *type, uint16_t *size);
bool cmp_read_ext16(cmp_ctx_t *ctx, int8_t *type, uint16_t *size, void *data);
bool cmp_read_ext32_marker(cmp_ctx_t *ctx, int8_t *type, uint32_t *size);
bool cmp_read_ext32(cmp_ctx_t *ctx, int8_t *type, uint32_t *size, void *data);
/*
* ============================================================================
* === Object API
* ============================================================================
*/
bool cmp_object_is_char(const cmp_object_t *obj);
bool cmp_object_is_short(const cmp_object_t *obj);
bool cmp_object_is_int(const cmp_object_t *obj);
bool cmp_object_is_long(const cmp_object_t *obj);
bool cmp_object_is_sinteger(const cmp_object_t *obj);
bool cmp_object_is_uchar(const cmp_object_t *obj);
bool cmp_object_is_ushort(const cmp_object_t *obj);
bool cmp_object_is_uint(const cmp_object_t *obj);
bool cmp_object_is_ulong(const cmp_object_t *obj);
bool cmp_object_is_uinteger(const cmp_object_t *obj);
bool cmp_object_is_float(const cmp_object_t *obj);
bool cmp_object_is_double(const cmp_object_t *obj);
bool cmp_object_is_nil(const cmp_object_t *obj);
bool cmp_object_is_bool(const cmp_object_t *obj);
bool cmp_object_is_str(const cmp_object_t *obj);
bool cmp_object_is_bin(const cmp_object_t *obj);
bool cmp_object_is_array(const cmp_object_t *obj);
bool cmp_object_is_map(const cmp_object_t *obj);
bool cmp_object_is_ext(const cmp_object_t *obj);
bool cmp_object_as_char(const cmp_object_t *obj, int8_t *c);
bool cmp_object_as_short(const cmp_object_t *obj, int16_t *s);
bool cmp_object_as_int(const cmp_object_t *obj, int32_t *i);
bool cmp_object_as_long(const cmp_object_t *obj, int64_t *d);
bool cmp_object_as_sinteger(const cmp_object_t *obj, int64_t *d);
bool cmp_object_as_uchar(const cmp_object_t *obj, uint8_t *c);
bool cmp_object_as_ushort(const cmp_object_t *obj, uint16_t *s);
bool cmp_object_as_uint(const cmp_object_t *obj, uint32_t *i);
bool cmp_object_as_ulong(const cmp_object_t *obj, uint64_t *u);
bool cmp_object_as_uinteger(const cmp_object_t *obj, uint64_t *u);
bool cmp_object_as_float(const cmp_object_t *obj, float *f);
bool cmp_object_as_double(const cmp_object_t *obj, double *d);
bool cmp_object_as_bool(const cmp_object_t *obj, bool *b);
bool cmp_object_as_str(const cmp_object_t *obj, uint32_t *size);
bool cmp_object_as_bin(const cmp_object_t *obj, uint32_t *size);
bool cmp_object_as_array(const cmp_object_t *obj, uint32_t *size);
bool cmp_object_as_map(const cmp_object_t *obj, uint32_t *size);
bool cmp_object_as_ext(const cmp_object_t *obj, int8_t *type, uint32_t *size);
bool cmp_object_to_str(cmp_ctx_t *ctx, const cmp_object_t *obj, char *data, uint32_t buf_size);
bool cmp_object_to_bin(cmp_ctx_t *ctx, const cmp_object_t *obj, void *data, uint32_t buf_size);
#ifdef __cplusplus
} /* extern "C" */
#endif
/*
* ============================================================================
* === Backwards compatibility defines
* ============================================================================
*/
#define cmp_write_int cmp_write_integer
#define cmp_write_sint cmp_write_integer
#define cmp_write_sinteger cmp_write_integer
#define cmp_write_uint cmp_write_uinteger
#define cmp_read_sinteger cmp_read_integer
#endif /* CMP_H_INCLUDED */
/* vi: set et ts=2 sw=2: */
python-ihm-2.7/src/ihm_format.c 0000664 0000000 0000000 00000310506 15035733372 0016553 0 ustar 00root root 0000000 0000000 /** \file ihm_format.c Routines for handling mmCIF or BinaryCIF format files.
*
* The file is read sequentially. All values for desired keywords in
* desired categories are collected (other parts of the file are ignored).
*
* For mmCIF, at the end of the file and each save frame a callback function
* for each category is called to process the data. In the case of mmCIF
* loops, this callback will be called multiple times, once for each entry
* in the loop.
*
* For BinaryCIF, the category callback will be called as each category
* is encountered in the file, once per row.
*/
#include "ihm_format.h"
#include
#include
#include
#include
#if defined(_WIN32) || defined(_WIN64)
# include
# include
#else
# include
#endif
#include
#include
#include "cmp.h"
#define INT_TO_POINTER(i) ((void *) (long) (i))
#define POINTER_TO_INT(p) ((int) (long) (p))
#if defined(_WIN32) || defined(_WIN64)
# define strcasecmp _stricmp
# define usleep Sleep
#endif
/* Allocate memory; unlike malloc() this never returns NULL (a failure will
terminate the program) */
static void *ihm_malloc(size_t size)
{
void *ret = malloc(size);
if (ret) {
return ret;
} else {
fprintf(stderr, "Memory allocation failed\n");
exit(1);
}
}
/* Allocate memory; unlike realloc() this never returns NULL (a failure will
terminate the program) */
static void *ihm_realloc(void *ptr, size_t size)
{
void *ret = realloc(ptr, size);
if (ret) {
return ret;
} else {
fprintf(stderr, "Memory allocation failed\n");
exit(1);
}
}
/* Free the memory used by an ihm_error */
void ihm_error_free(struct ihm_error *err)
{
free(err->msg);
free(err);
}
/* Set the error indicator */
void ihm_error_set(struct ihm_error **err, IHMErrorCode code,
const char *format, ...)
{
va_list ap;
int len;
char *msg = NULL;
assert(err && !*err);
/* First, determine length needed for complete string */
va_start(ap, format);
len = vsnprintf(msg, 0, format, ap);
va_end(ap);
msg = (char *)ihm_realloc(msg, len + 1);
va_start(ap, format);
vsnprintf(msg, len + 1, format, ap);
va_end(ap);
*err = (struct ihm_error *)ihm_malloc(sizeof(struct ihm_error));
(*err)->code = code;
(*err)->msg = msg;
}
/* Move error info from `from_err` to `to_err`, if `from_err` is set.
`to_err` must not have already been set with an error. Return true
iff info was moved. */
static bool ihm_error_move(struct ihm_error **to_err,
struct ihm_error **from_err)
{
assert(to_err && !*to_err);
assert(from_err);
if (*from_err) {
*to_err = *from_err;
*from_err = NULL;
return true;
} else {
return false;
}
}
/* A variable-sized array of elements */
struct ihm_array {
/* The array data itself */
void *data;
/* The number of elements in the array */
size_t len;
/* The size in bytes of each element */
size_t element_size;
/* The currently-allocated number of elements in the array (>= len) */
size_t capacity;
};
/* Make a new empty ihm_array */
static struct ihm_array *ihm_array_new(size_t element_size)
{
struct ihm_array *a = (struct ihm_array *)ihm_malloc(
sizeof(struct ihm_array));
a->len = 0;
a->element_size = element_size;
a->capacity = 8;
a->data = ihm_malloc(a->capacity * a->element_size);
return a;
}
/* Release the memory used by an ihm_array */
static void ihm_array_free(struct ihm_array *a)
{
free(a->data);
free(a);
}
/* Set the number of elements in the array to zero */
static void ihm_array_clear(struct ihm_array *a)
{
a->len = 0;
}
/* Return a reference to the ith element in the array, cast to the given type */
#define ihm_array_index(a, t, i) (((t*)(a)->data)[(i)])
/* Add a new element to the end of the array */
static void ihm_array_append(struct ihm_array *a, void *element)
{
a->len++;
if (a->len > a->capacity) {
a->capacity *= 2;
a->data = ihm_realloc(a->data, a->capacity * a->element_size);
}
memcpy((char *)a->data + (a->len - 1) * a->element_size,
element, a->element_size);
}
/* A variable-length string buffer */
struct ihm_string {
/* The string buffer itself */
char *str;
/* The length of the string (may be different from strlen(str) if str contains
embedded nulls); str[len] is always a null byte */
size_t len;
/* The allocated size of str; never less than len+1 (to allow for null
terminator) */
size_t capacity;
};
/* Make a new ihm_string of zero length */
static struct ihm_string *ihm_string_new(void)
{
struct ihm_string *s = (struct ihm_string *)ihm_malloc(
sizeof(struct ihm_string));
s->len = 0;
s->capacity = 64;
s->str = (char *)ihm_malloc(s->capacity);
/* Ensure string is null terminated */
s->str[0] = '\0';
return s;
}
/* Free the memory used by an ihm_string */
static void ihm_string_free(struct ihm_string *s)
{
free(s->str);
free(s);
}
/* Erase len characters starting at pos from an ihm_string */
static void ihm_string_erase(struct ihm_string *s, size_t pos, size_t len)
{
memmove(s->str + pos, s->str + pos + len, s->len + 1 - pos - len);
s->len -= len;
}
/* Set the size of the string to len. If shorter than the current length,
the string is truncated. If longer, memory (with undefined contents)
is added to the end of the string */
static void ihm_string_set_size(struct ihm_string *s, size_t len)
{
if (len >= s->capacity) {
s->capacity *= 2;
if (len >= s->capacity) {
s->capacity = len + 1;
}
s->str = (char *)ihm_realloc(s->str, s->capacity);
}
s->len = len;
s->str[s->len] = '\0';
}
/* Set the ihm_string contents to be equal to (null-terminated) str */
static void ihm_string_assign(struct ihm_string *s, const char *str)
{
size_t len = strlen(str);
ihm_string_set_size(s, len);
memcpy(s->str, str, len);
}
/* Set the ihm_string contents to be equal to str of given size */
static void ihm_string_assign_n(struct ihm_string *s, const char *str,
size_t strsz)
{
ihm_string_set_size(s, strsz);
memcpy(s->str, str, strsz);
}
/* Append str to the end of the ihm_string */
static void ihm_string_append(struct ihm_string *s, const char *str)
{
size_t len = strlen(str);
size_t oldlen = s->len;
ihm_string_set_size(s, s->len + len);
memcpy(s->str + oldlen, str, len);
}
struct ihm_key_value {
char *key;
void *value;
};
/* Function to free mapping values */
typedef void (*ihm_destroy_callback)(void *data);
/* Simple case-insensitive string to struct* mapping using a binary search */
struct ihm_mapping {
/* Array of struct ihm_key_value */
struct ihm_array *keyvalues;
/* Function to free mapping values */
ihm_destroy_callback value_destroy_func;
};
/* Make a new mapping from case-insensitive strings to arbitrary pointers.
The mapping uses a simple binary search (more memory efficient than
a hash table and generally faster too since the number of keys is quite
small). */
struct ihm_mapping *ihm_mapping_new(ihm_destroy_callback value_destroy_func)
{
struct ihm_mapping *m = (struct ihm_mapping *)ihm_malloc(
sizeof(struct ihm_mapping));
m->keyvalues = ihm_array_new(sizeof(struct ihm_key_value));
m->value_destroy_func = value_destroy_func;
return m;
}
/* Clear all key:value pairs from the mapping */
static void ihm_mapping_remove_all(struct ihm_mapping *m)
{
unsigned i;
for (i = 0; i < m->keyvalues->len; ++i) {
(*m->value_destroy_func)(ihm_array_index(m->keyvalues,
struct ihm_key_value, i).value);
}
ihm_array_clear(m->keyvalues);
}
/* Free memory used by a mapping */
static void ihm_mapping_free(struct ihm_mapping *m)
{
ihm_mapping_remove_all(m);
ihm_array_free(m->keyvalues);
free(m);
}
/* Add a new key:value pair to the mapping. key is assumed to point to memory
that is managed elsewhere (and must be valid as long as the mapping exists)
while value is freed using value_destroy_func when the mapping is freed.
Neither keys or nor values should ever be NULL. */
static void ihm_mapping_insert(struct ihm_mapping *m, char *key,
void *value)
{
struct ihm_key_value kv;
kv.key = key;
kv.value = value;
ihm_array_append(m->keyvalues, &kv);
}
static int mapping_compare(const void *a, const void *b)
{
const struct ihm_key_value *kv1, *kv2;
kv1 = (const struct ihm_key_value *)a;
kv2 = (const struct ihm_key_value *)b;
return strcasecmp(kv1->key, kv2->key);
}
/* Put a mapping's key:value pairs in sorted order. This must be done
before ihm_mapping_lookup is used. */
static void ihm_mapping_sort(struct ihm_mapping *m)
{
qsort(m->keyvalues->data, m->keyvalues->len, m->keyvalues->element_size,
mapping_compare);
}
/* Look up key in the mapping and return the corresponding value, or NULL
if not present. This uses a simple binary search so requires that
ihm_mapping_sort() has been called first. */
static void *ihm_mapping_lookup(struct ihm_mapping *m, char *key)
{
int left = 0, right = m->keyvalues->len - 1;
while (left <= right) {
int mid = (left + right) / 2;
int cmp = strcasecmp(ihm_array_index(m->keyvalues, struct ihm_key_value,
mid).key, key);
if (cmp < 0) {
left = mid + 1;
} else if (cmp > 0) {
right = mid - 1;
} else {
return ihm_array_index(m->keyvalues, struct ihm_key_value, mid).value;
}
}
return NULL;
}
/* Callback passed to ihm_mapping_foreach */
typedef void (*ihm_foreach_callback)(void *key, void *value, void *user_data);
/* Call the given function, passing it key, value, and data, for each
key:value pair in the mapping. */
static void ihm_mapping_foreach(struct ihm_mapping *m,
ihm_foreach_callback func, void *data)
{
unsigned i;
for (i = 0; i < m->keyvalues->len; ++i) {
struct ihm_key_value *kv = &ihm_array_index(m->keyvalues,
struct ihm_key_value, i);
(*func)(kv->key, kv->value, data);
}
}
/* Free the memory used by a struct ihm_keyword */
static void ihm_keyword_free(void *value)
{
struct ihm_keyword *key = (struct ihm_keyword *)value;
free(key->name);
if (key->own_data && key->in_file && key->type == IHM_STRING) {
free(key->data.str);
}
free(key);
}
/* A category in an mmCIF file. */
struct ihm_category {
char *name;
/* All keywords that we want to extract in this category */
struct ihm_mapping *keyword_map;
/* Function called when we have all data for this category */
ihm_category_callback data_callback;
/* Function called at the end of each save frame */
ihm_category_callback end_frame_callback;
/* Function called at the very end of the data block */
ihm_category_callback finalize_callback;
/* Data passed to callbacks */
void *data;
/* Function to release data */
ihm_free_callback free_func;
};
/* Keep track of data used while reading an mmCIF or BinaryCIF file. */
struct ihm_reader {
/* The file handle to read from */
struct ihm_file *fh;
/* true for BinaryCIF, false for mmCIF */
bool binary;
/* The current line number in the file */
int linenum;
/* Temporary buffer for string data. For mmCIF, this is used for
multiline tokens, to contain the entire contents of the lines */
struct ihm_string *tmp_str;
/* All tokens parsed from the last line */
struct ihm_array *tokens;
/* The next token to be returned */
unsigned token_index;
/* All categories that we want to extract from the file */
struct ihm_mapping *category_map;
/* Handler for unknown categories */
ihm_unknown_category_callback unknown_category_callback;
/* Data passed to unknown category callback */
void *unknown_category_data;
/* Function to release unknown category data */
ihm_free_callback unknown_category_free_func;
/* Handler for unknown keywords */
ihm_unknown_keyword_callback unknown_keyword_callback;
/* Data passed to unknown keyword callback */
void *unknown_keyword_data;
/* Function to release unknown keyword data */
ihm_free_callback unknown_keyword_free_func;
/* msgpack context for reading BinaryCIF file */
cmp_ctx_t cmp;
/* Number of BinaryCIF data blocks left to read, or -1 if header
not read yet */
int num_blocks_left;
/* Any errors raised in the CMP read callback */
struct ihm_error *cmp_read_err;
};
typedef enum {
MMCIF_TOKEN_VALUE = 1,
MMCIF_TOKEN_OMITTED,
MMCIF_TOKEN_UNKNOWN,
MMCIF_TOKEN_LOOP,
MMCIF_TOKEN_DATA,
MMCIF_TOKEN_SAVE,
MMCIF_TOKEN_VARIABLE
} ihm_token_type;
/* Part of a string that corresponds to an mmCIF token. The memory pointed
to by str is valid only until the next line is read from the file. */
struct ihm_token {
ihm_token_type type;
char *str;
};
/* Free memory used by a struct ihm_category */
static void ihm_category_free(void *value)
{
struct ihm_category *cat = (struct ihm_category *)value;
ihm_mapping_free(cat->keyword_map);
free(cat->name);
if (cat->free_func) {
(*cat->free_func) (cat->data);
}
free(cat);
}
/* Make a new struct ihm_category */
struct ihm_category *ihm_category_new(struct ihm_reader *reader,
const char *name,
ihm_category_callback data_callback,
ihm_category_callback end_frame_callback,
ihm_category_callback finalize_callback,
void *data, ihm_free_callback free_func)
{
struct ihm_category *category =
(struct ihm_category *)ihm_malloc(sizeof(struct ihm_category));
category->name = strdup(name);
category->data_callback = data_callback;
category->end_frame_callback = end_frame_callback;
category->finalize_callback = finalize_callback;
category->data = data;
category->free_func = free_func;
category->keyword_map = ihm_mapping_new(ihm_keyword_free);
ihm_mapping_insert(reader->category_map, category->name, category);
return category;
}
/* Add a new struct ihm_keyword (of undefined type) to a category. */
static struct ihm_keyword *ihm_keyword_new(struct ihm_category *category,
const char *name)
{
struct ihm_keyword *key =
(struct ihm_keyword *)ihm_malloc(sizeof(struct ihm_keyword));
key->name = strdup(name);
key->own_data = false;
key->in_file = false;
ihm_mapping_insert(category->keyword_map, key->name, key);
key->own_data = false;
return key;
}
/* Add a new integer ihm_keyword to a category. */
struct ihm_keyword *ihm_keyword_int_new(struct ihm_category *category,
const char *name)
{
struct ihm_keyword *key = ihm_keyword_new(category, name);
key->type = IHM_INT;
return key;
}
/* Add a new floating-point ihm_keyword to a category. */
struct ihm_keyword *ihm_keyword_float_new(struct ihm_category *category,
const char *name)
{
struct ihm_keyword *key = ihm_keyword_new(category, name);
key->type = IHM_FLOAT;
return key;
}
/* Add a new boolean ihm_keyword to a category. */
struct ihm_keyword *ihm_keyword_bool_new(struct ihm_category *category,
const char *name)
{
struct ihm_keyword *key = ihm_keyword_new(category, name);
key->type = IHM_BOOL;
return key;
}
/* Add a new string ihm_keyword to a category. */
struct ihm_keyword *ihm_keyword_str_new(struct ihm_category *category,
const char *name)
{
struct ihm_keyword *key = ihm_keyword_new(category, name);
key->type = IHM_STRING;
return key;
}
static void set_keyword_to_default(struct ihm_keyword *key)
{
if (key->type == IHM_STRING) {
key->data.str = NULL;
}
key->own_data = false;
}
/* Set the value of a given keyword from the given string */
static void set_value_from_string(struct ihm_reader *reader,
struct ihm_category *category,
struct ihm_keyword *key, char *str,
bool own_data, struct ihm_error **err)
{
char *ch;
/* If a key is duplicated, overwrite it with the new value */
if (key->in_file && key->type == IHM_STRING && key->own_data) {
free(key->data.str);
key->data.str = NULL;
}
switch(key->type) {
case IHM_STRING:
key->own_data = own_data;
if (own_data) {
key->data.str = strdup(str);
} else {
key->data.str = str;
}
key->omitted = key->unknown = false;
break;
case IHM_INT:
key->data.ival = strtol(str, &ch, 10);
if (*ch) {
ihm_error_set(err, IHM_ERROR_VALUE,
"Cannot parse '%s' as integer in file, line %d",
str, reader->linenum);
return;
}
key->omitted = key->unknown = false;
break;
case IHM_FLOAT:
key->data.fval = strtod(str, &ch);
if (*ch) {
ihm_error_set(err, IHM_ERROR_VALUE,
"Cannot parse '%s' as float in file, line %d",
str, reader->linenum);
return;
}
key->omitted = key->unknown = false;
break;
case IHM_BOOL:
key->omitted = key->unknown = false;
if (strcasecmp(str, "YES") == 0) {
key->data.bval = true;
} else if (strcasecmp(str, "NO") == 0) {
key->data.bval = false;
} else {
key->omitted = true;
}
break;
}
key->in_file = true;
}
/* Set the given keyword to the 'omitted' special value */
static void set_omitted_value(struct ihm_keyword *key)
{
/* If a key is duplicated, overwrite it with the new value */
if (key->in_file && key->own_data && key->type == IHM_STRING) {
free(key->data.str);
}
key->omitted = true;
key->unknown = false;
set_keyword_to_default(key);
key->in_file = true;
}
/* Set the given keyword to the 'unknown' special value */
static void set_unknown_value(struct ihm_keyword *key)
{
/* If a key is duplicated, overwrite it with the new value */
if (key->in_file && key->own_data && key->type == IHM_STRING) {
free(key->data.str);
}
key->omitted = false;
key->unknown = true;
set_keyword_to_default(key);
key->in_file = true;
}
/* Make a new ihm_file */
struct ihm_file *ihm_file_new(ihm_file_read_callback read_callback,
void *data, ihm_free_callback free_func)
{
struct ihm_file *file =
(struct ihm_file *)ihm_malloc(sizeof(struct ihm_file));
file->buffer = ihm_string_new();
file->line_start = file->next_line_start = 0;
file->read_callback = read_callback;
file->data = data;
file->free_func = free_func;
return file;
}
/* Free memory used by ihm_file */
static void ihm_file_free(struct ihm_file *file)
{
ihm_string_free(file->buffer);
if (file->free_func) {
(*file->free_func) (file->data);
}
free(file);
}
/* Read data from a file descriptor */
static ssize_t fd_read_callback(char *buffer, size_t buffer_len, void *data,
struct ihm_error **err)
{
int fd = POINTER_TO_INT(data);
ssize_t readlen;
while(1) {
#if defined(_WIN32) || defined(_WIN64)
readlen = _read(fd, buffer, buffer_len);
#else
readlen = read(fd, buffer, buffer_len);
#endif
if (readlen != -1 || errno != EAGAIN) break;
/* If EAGAIN encountered, wait for more data to become available */
usleep(100);
}
if (readlen == -1) {
ihm_error_set(err, IHM_ERROR_IO, "%s", strerror(errno));
}
return readlen;
}
/* Read data from file to expand the in-memory buffer.
Returns the number of bytes read (0 on EOF), or -1 (and sets err) on error
*/
static ssize_t expand_buffer(struct ihm_file *fh, struct ihm_error **err)
{
static const size_t READ_SIZE = 4194304; /* Read 4MiB of data at a time */
size_t current_size;
ssize_t readlen;
/* Move any existing data to the start of the buffer (otherwise the buffer
will grow to the full size of the file) */
if (fh->line_start) {
ihm_string_erase(fh->buffer, 0, fh->line_start);
fh->next_line_start -= fh->line_start;
fh->line_start = 0;
}
current_size = fh->buffer->len;
ihm_string_set_size(fh->buffer, current_size + READ_SIZE);
readlen = (*fh->read_callback)(fh->buffer->str + current_size, READ_SIZE,
fh->data, err);
ihm_string_set_size(fh->buffer, current_size + (readlen == -1 ? 0 : readlen));
return readlen;
}
/* Read the next line from the file. Lines are terminated by \n, \r, \r\n,
or \0. On success, true is returned. fh->line_start points to the start of
the null-terminated line. *eof is set true iff the end of the line is
the end of the file.
On error, false is returned and err is set.
*/
static bool ihm_file_read_line(struct ihm_file *fh, int *eof,
struct ihm_error **err)
{
size_t line_end;
*eof = false;
fh->line_start = fh->next_line_start;
if (fh->line_start > fh->buffer->len) {
/* EOF occurred earlier - return it (plus an empty string) again */
*eof = true;
fh->line_start = 0;
fh->buffer->str[0] = '\0';
return true;
}
/* Line is only definitely terminated if there are characters after it
(embedded NULL, or \r followed by a possible \n) */
while((line_end = fh->line_start
+ strcspn(fh->buffer->str + fh->line_start, "\r\n"))
== fh->buffer->len) {
ssize_t num_added = expand_buffer(fh, err);
if (num_added < 0) {
return false; /* error occurred */
} else if (num_added == 0) {
*eof = true; /* end of file */
break;
}
}
fh->next_line_start = line_end + 1;
/* Handle \r\n terminator */
if (fh->buffer->str[line_end] == '\r'
&& fh->buffer->str[line_end + 1] == '\n') {
fh->next_line_start++;
}
fh->buffer->str[line_end] = '\0';
return true;
}
/* Make a new ihm_file that will read data from the given file descriptor */
struct ihm_file *ihm_file_new_from_fd(int fd)
{
return ihm_file_new(fd_read_callback, INT_TO_POINTER(fd), NULL);
}
/* Make a new struct ihm_reader */
struct ihm_reader *ihm_reader_new(struct ihm_file *fh, bool binary)
{
struct ihm_reader *reader =
(struct ihm_reader *)ihm_malloc(sizeof(struct ihm_reader));
reader->fh = fh;
reader->binary = binary;
reader->linenum = 0;
reader->tmp_str = ihm_string_new();
reader->tokens = ihm_array_new(sizeof(struct ihm_token));
reader->token_index = 0;
reader->category_map = ihm_mapping_new(ihm_category_free);
reader->unknown_category_callback = NULL;
reader->unknown_category_data = NULL;
reader->unknown_category_free_func = NULL;
reader->unknown_keyword_callback = NULL;
reader->unknown_keyword_data = NULL;
reader->unknown_keyword_free_func = NULL;
reader->num_blocks_left = -1;
reader->cmp_read_err = NULL;
return reader;
}
/* Free memory used by a struct ihm_reader */
void ihm_reader_free(struct ihm_reader *reader)
{
ihm_string_free(reader->tmp_str);
ihm_array_free(reader->tokens);
ihm_mapping_free(reader->category_map);
ihm_file_free(reader->fh);
if (reader->unknown_category_free_func) {
(*reader->unknown_category_free_func) (reader->unknown_category_data);
}
if (reader->unknown_keyword_free_func) {
(*reader->unknown_keyword_free_func) (reader->unknown_keyword_data);
}
if (reader->cmp_read_err) {
ihm_error_free(reader->cmp_read_err);
}
free(reader);
}
/* Set a callback for unknown categories.
The given callback is called whenever a category is encountered in the
file that is not handled (by ihm_category_new).
*/
void ihm_reader_unknown_category_callback_set(struct ihm_reader *reader,
ihm_unknown_category_callback callback,
void *data, ihm_free_callback free_func)
{
if (reader->unknown_category_free_func) {
(*reader->unknown_category_free_func) (reader->unknown_category_data);
}
reader->unknown_category_callback = callback;
reader->unknown_category_data = data;
reader->unknown_category_free_func = free_func;
}
/* Set a callback for unknown keywords.
The given callback is called whenever a keyword is encountered in the
file that is not handled (within a category that is handled by
ihm_category_new).
*/
void ihm_reader_unknown_keyword_callback_set(struct ihm_reader *reader,
ihm_unknown_keyword_callback callback,
void *data, ihm_free_callback free_func)
{
if (reader->unknown_keyword_free_func) {
(*reader->unknown_keyword_free_func) (reader->unknown_keyword_data);
}
reader->unknown_keyword_callback = callback;
reader->unknown_keyword_data = data;
reader->unknown_keyword_free_func = free_func;
}
/* Remove all categories from the reader. */
void ihm_reader_remove_all_categories(struct ihm_reader *reader)
{
ihm_mapping_remove_all(reader->category_map);
if (reader->unknown_category_free_func) {
(*reader->unknown_category_free_func) (reader->unknown_category_data);
}
reader->unknown_category_callback = NULL;
reader->unknown_category_data = NULL;
reader->unknown_category_free_func = NULL;
if (reader->unknown_keyword_free_func) {
(*reader->unknown_keyword_free_func) (reader->unknown_keyword_data);
}
reader->unknown_keyword_callback = NULL;
reader->unknown_keyword_data = NULL;
reader->unknown_keyword_free_func = NULL;
}
/* Given the start of a quoted string, find the end and add a token for it */
static size_t handle_quoted_token(struct ihm_reader *reader,
char *line, size_t len,
size_t start_pos, const char *quote_type,
struct ihm_error **err)
{
char *pt = line + start_pos;
char *end = pt;
/* Get the next quote that is followed by whitespace (or line end).
In mmCIF a quote within a string is not considered an end quote as
long as it is not followed by whitespace. */
do {
end = strchr(end + 1, pt[0]);
} while (end && *end && end[1] && !strchr(" \t", end[1]));
if (end && *end) {
struct ihm_token t;
int tok_end = end - pt + start_pos;
/* A quoted string is always a literal string, even if it is
"?" or ".", not an unknown/omitted value */
t.type = MMCIF_TOKEN_VALUE;
t.str = line + start_pos + 1;
line[tok_end] = '\0';
ihm_array_append(reader->tokens, &t);
return tok_end + 1; /* step past the closing quote */
} else {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"%s-quoted string not terminated in file, line %d",
quote_type, reader->linenum);
return len;
}
}
/* Get the next token from the line. */
static size_t get_next_token(struct ihm_reader *reader, char *line,
size_t len, size_t start_pos,
struct ihm_error **err)
{
/* Skip initial whitespace */
char *pt = line + start_pos;
start_pos += strspn(pt, " \t");
pt = line + start_pos;
if (*pt == '\0') {
return len;
} else if (*pt == '"') {
return handle_quoted_token(reader, line, len, start_pos, "Double", err);
} else if (*pt == '\'') {
return handle_quoted_token(reader, line, len, start_pos, "Single", err);
} else if (*pt == '#') {
/* Comment - discard the rest of the line */
return len;
} else {
struct ihm_token t;
int tok_end = start_pos + strcspn(pt, " \t");
t.str = line + start_pos;
line[tok_end] = '\0';
if (strcmp(t.str, "loop_") == 0) {
t.type = MMCIF_TOKEN_LOOP;
} else if (strncmp(t.str, "data_", 5) == 0) {
t.type = MMCIF_TOKEN_DATA;
} else if (strncmp(t.str, "save_", 5) == 0) {
t.type = MMCIF_TOKEN_SAVE;
} else if (t.str[0] == '_') {
t.type = MMCIF_TOKEN_VARIABLE;
} else if (t.str[0] == '.' && t.str[1] == '\0') {
t.type = MMCIF_TOKEN_OMITTED;
} else if (t.str[0] == '?' && t.str[1] == '\0') {
t.type = MMCIF_TOKEN_UNKNOWN;
} else {
/* Note that we do no special processing for other reserved words
(global_, stop_). But the probability of them occurring
where we expect a value is pretty small. */
t.type = MMCIF_TOKEN_VALUE;
}
ihm_array_append(reader->tokens, &t);
return tok_end + 1;
}
}
/* Break up a line into tokens, populating reader->tokens. */
static void tokenize(struct ihm_reader *reader, char *line,
struct ihm_error **err)
{
size_t start_pos, len = strlen(line);
ihm_array_clear(reader->tokens);
if (len > 0 && line[0] == '#') {
/* Skip comment lines */
return;
}
for (start_pos = 0; start_pos < len && !*err;
start_pos = get_next_token(reader, line, len, start_pos, err)) {
}
if (*err) {
ihm_array_clear(reader->tokens);
}
}
/* Return a pointer to the current line */
static char *line_pt(struct ihm_reader *reader)
{
return reader->fh->buffer->str + reader->fh->line_start;
}
/* Read a semicolon-delimited (multiline) token */
static void read_multiline_token(struct ihm_reader *reader,
int ignore_multiline, struct ihm_error **err)
{
int eof = 0;
int start_linenum = reader->linenum;
while (!eof) {
reader->linenum++;
if (!ihm_file_read_line(reader->fh, &eof, err)) {
return;
} else if (line_pt(reader)[0] == ';') {
struct ihm_token t;
t.type = MMCIF_TOKEN_VALUE;
t.str = reader->tmp_str->str;
ihm_array_clear(reader->tokens);
ihm_array_append(reader->tokens, &t);
reader->token_index = 0;
return;
} else if (!ignore_multiline) {
ihm_string_append(reader->tmp_str, "\n");
ihm_string_append(reader->tmp_str, line_pt(reader));
}
}
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"End of file while reading multiline string "
"which started on line %d", start_linenum);
}
/* Return the number of tokens still available in the current line. */
static unsigned get_num_line_tokens(struct ihm_reader *reader)
{
return reader->tokens->len - reader->token_index;
}
/* Push back the last token returned by get_token() so it can
be read again. */
static void unget_token(struct ihm_reader *reader)
{
reader->token_index--;
}
/* Get the next token from an mmCIF file, or NULL on end of file.
The memory used by the token is valid for N calls to this function, where
N is the result of get_num_line_tokens().
If ignore_multiline is true, the string contents of any multiline
value tokens (those that are semicolon-delimited) are not stored
in memory. */
static struct ihm_token *get_token(struct ihm_reader *reader,
int ignore_multiline,
struct ihm_error **err)
{
int eof = 0;
if (reader->tokens->len <= reader->token_index) {
do {
/* No tokens left - read the next non-blank line in */
reader->linenum++;
if (!ihm_file_read_line(reader->fh, &eof, err)) {
return NULL;
} else if (line_pt(reader)[0] == ';') {
if (!ignore_multiline) {
/* Skip initial semicolon */
ihm_string_assign(reader->tmp_str, line_pt(reader) + 1);
}
read_multiline_token(reader, ignore_multiline, err);
if (*err) {
return NULL;
}
} else {
tokenize(reader, line_pt(reader), err);
if (*err) {
return NULL;
} else {
reader->token_index = 0;
}
}
} while (reader->tokens->len == 0 && !eof);
}
if (reader->tokens->len == 0) {
return NULL;
} else {
return &ihm_array_index(reader->tokens, struct ihm_token,
reader->token_index++);
}
}
/* Break up a variable token into category and keyword */
static void parse_category_keyword(struct ihm_reader *reader,
char *str, char **category,
char **keyword, struct ihm_error **err)
{
char *dot;
size_t wordlen;
dot = strchr(str, '.');
if (!dot) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"No period found in mmCIF variable name (%s) at line %d",
str, reader->linenum);
return;
}
wordlen = strcspn(str, " \t");
str[wordlen] = '\0';
*dot = '\0';
*category = str;
*keyword = dot + 1;
}
/* Read a line that sets a single value, e.g. _entry.id 1YTI */
static void read_value(struct ihm_reader *reader,
struct ihm_token *key_token, struct ihm_error **err)
{
struct ihm_category *category;
char *category_name, *keyword_name;
parse_category_keyword(reader, key_token->str, &category_name,
&keyword_name, err);
if (*err)
return;
category = (struct ihm_category *)ihm_mapping_lookup(reader->category_map,
category_name);
if (category) {
struct ihm_keyword *key;
key = (struct ihm_keyword *)ihm_mapping_lookup(category->keyword_map,
keyword_name);
if (key) {
struct ihm_token *val_token = get_token(reader, false, err);
if (val_token && val_token->type == MMCIF_TOKEN_VALUE) {
set_value_from_string(reader, category, key, val_token->str, true, err);
} else if (val_token && val_token->type == MMCIF_TOKEN_OMITTED) {
set_omitted_value(key);
} else if (val_token && val_token->type == MMCIF_TOKEN_UNKNOWN) {
set_unknown_value(key);
} else if (!*err) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"No valid value found for %s.%s in file, line %d",
category->name, key->name, reader->linenum);
}
} else if (reader->unknown_keyword_callback) {
(*reader->unknown_keyword_callback)(reader, category_name, keyword_name,
reader->linenum,
reader->unknown_keyword_data, err);
}
} else if (reader->unknown_category_callback) {
(*reader->unknown_category_callback)(reader, category_name,
reader->linenum,
reader->unknown_category_data, err);
}
}
/* Handle a single token listing category and keyword from a loop_ construct.
The relevant ihm_keyword is returned, or NULL if we are not interested
in this keyword. */
static struct ihm_keyword *handle_loop_index(struct ihm_reader *reader,
struct ihm_category **catpt,
struct ihm_token *token,
bool first_loop,
struct ihm_error **err)
{
struct ihm_category *category;
char *category_name, *keyword_name;
parse_category_keyword(reader, token->str, &category_name,
&keyword_name, err);
if (*err)
return NULL;
category = (struct ihm_category *)ihm_mapping_lookup(reader->category_map,
category_name);
if (first_loop) {
*catpt = category;
if (!category && reader->unknown_category_callback) {
(*reader->unknown_category_callback)(reader, category_name,
reader->linenum,
reader->unknown_category_data, err);
if (*err) {
return NULL;
}
}
} else if (*catpt != category) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"mmCIF files cannot contain multiple categories "
"within a single loop at line %d", reader->linenum);
return NULL;
}
if (category) {
struct ihm_keyword *key;
key = (struct ihm_keyword *)ihm_mapping_lookup(category->keyword_map,
keyword_name);
if (key) {
return key;
} else if (reader->unknown_keyword_callback) {
(*reader->unknown_keyword_callback)(reader, category_name, keyword_name,
reader->linenum,
reader->unknown_keyword_data, err);
if (*err) {
return NULL;
}
}
}
return NULL;
}
static void check_keywords_in_file(void *k, void *value, void *user_data)
{
struct ihm_keyword *key = (struct ihm_keyword *)value;
bool *in_file = (bool *)user_data;
*in_file |= key->in_file;
}
static void clear_keywords(void *k, void *value, void *user_data)
{
struct ihm_keyword *key = (struct ihm_keyword *)value;
if (key->own_data && key->type == IHM_STRING) {
free(key->data.str);
}
key->in_file = false;
set_keyword_to_default(key);
}
/* Call the category's data callback function.
If force is false, only call it if data has actually been read in. */
static void call_category(struct ihm_reader *reader,
struct ihm_category *category, bool force,
struct ihm_error **err)
{
if (category->data_callback) {
if (!force) {
/* Check to see if at least one keyword was given a value */
ihm_mapping_foreach(category->keyword_map, check_keywords_in_file,
&force);
}
if (force) {
(*category->data_callback) (reader, reader->linenum, category->data, err);
}
}
/* Clear out keyword values, ready for the next set of data */
ihm_mapping_foreach(category->keyword_map, clear_keywords, NULL);
}
/* Read the list of keywords from a loop_ construct. */
static struct ihm_array *read_loop_keywords(struct ihm_reader *reader,
struct ihm_category **category,
struct ihm_error **err)
{
bool first_loop = true;
struct ihm_token *token;
/* An array of ihm_keyword*, in the order the values should be given.
Any NULL pointers correspond to keywords we're not interested in. */
struct ihm_array *keywords = ihm_array_new(sizeof(struct ihm_keyword*));
*category = NULL;
while (!*err && (token = get_token(reader, false, err))) {
if (token->type == MMCIF_TOKEN_VARIABLE) {
struct ihm_keyword *k = handle_loop_index(reader, category,
token, first_loop, err);
ihm_array_append(keywords, &k);
first_loop = false;
} else if (token->type == MMCIF_TOKEN_VALUE
|| token->type == MMCIF_TOKEN_UNKNOWN
|| token->type == MMCIF_TOKEN_OMITTED) {
/* OK, end of keywords; proceed on to values */
unget_token(reader);
break;
} else {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"Was expecting a keyword or value for loop at line %d",
reader->linenum);
}
}
if (*err) {
ihm_array_free(keywords);
return NULL;
} else {
return keywords;
}
}
/* Read data for a loop_ construct */
static void read_loop_data(struct ihm_reader *reader,
struct ihm_category *category, unsigned len,
struct ihm_keyword **keywords,
struct ihm_error **err)
{
while (!*err) {
/* Does the current line contain an entire row in the loop? */
int oneline = get_num_line_tokens(reader) >= len;
unsigned i;
for (i = 0; !*err && i < len; ++i) {
struct ihm_token *token = get_token(reader, false, err);
if (*err) {
break;
} else if (token && token->type == MMCIF_TOKEN_VALUE) {
if (keywords[i]) {
set_value_from_string(reader, category, keywords[i], token->str,
!oneline, err);
}
} else if (token && token->type == MMCIF_TOKEN_OMITTED) {
if (keywords[i]) {
set_omitted_value(keywords[i]);
}
} else if (token && token->type == MMCIF_TOKEN_UNKNOWN) {
if (keywords[i]) {
set_unknown_value(keywords[i]);
}
} else if (i == 0) {
/* OK, end of the loop */
if (token) {
unget_token(reader);
}
return;
} else {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"Wrong number of data values in loop (should be an "
"exact multiple of the number of keys) at line %d",
reader->linenum);
}
}
if (!*err) {
call_category(reader, category, true, err);
}
}
}
/* Read a loop_ construct from the file. */
static void read_loop(struct ihm_reader *reader, struct ihm_error **err)
{
struct ihm_array *keywords;
struct ihm_category *category;
keywords = read_loop_keywords(reader, &category, err);
if (*err) {
return;
}
if (category) {
read_loop_data(reader, category, keywords->len,
(struct ihm_keyword **)keywords->data, err);
}
ihm_array_free(keywords);
}
struct category_foreach_data {
struct ihm_error **err;
struct ihm_reader *reader;
};
static void call_category_foreach(void *key, void *value, void *user_data)
{
struct category_foreach_data *d = (struct category_foreach_data *)user_data;
struct ihm_category *category = (struct ihm_category *)value;
if (!*(d->err)) {
call_category(d->reader, category, false, d->err);
}
}
/* Process any data stored in all categories */
static void call_all_categories(struct ihm_reader *reader,
struct ihm_error **err)
{
struct category_foreach_data d;
d.err = err;
d.reader = reader;
ihm_mapping_foreach(reader->category_map, call_category_foreach, &d);
}
static void finalize_category_foreach(void *key, void *value, void *user_data)
{
struct category_foreach_data *d = (struct category_foreach_data *)user_data;
struct ihm_category *category = (struct ihm_category *)value;
if (!*(d->err) && category->finalize_callback) {
(*category->finalize_callback)(d->reader, d->reader->linenum,
category->data, d->err);
}
}
/* Call each category's finalize callback */
static void finalize_all_categories(struct ihm_reader *reader,
struct ihm_error **err)
{
struct category_foreach_data d;
d.err = err;
d.reader = reader;
ihm_mapping_foreach(reader->category_map, finalize_category_foreach, &d);
}
static void end_frame_category_foreach(void *key, void *value, void *user_data)
{
struct category_foreach_data *d = (struct category_foreach_data *)user_data;
struct ihm_category *category = (struct ihm_category *)value;
if (!*(d->err) && category->end_frame_callback) {
(*category->end_frame_callback)(d->reader, d->reader->linenum,
category->data, d->err);
}
}
/* Call each category's end_frame callback */
static void end_frame_all_categories(struct ihm_reader *reader,
struct ihm_error **err)
{
struct category_foreach_data d;
d.err = err;
d.reader = reader;
ihm_mapping_foreach(reader->category_map, end_frame_category_foreach, &d);
}
static void sort_category_foreach(void *key, void *value, void *user_data)
{
struct ihm_category *category = (struct ihm_category *)value;
ihm_mapping_sort(category->keyword_map);
}
/* Make sure that all mappings are sorted before we try to use them */
static void sort_mappings(struct ihm_reader *reader)
{
ihm_mapping_sort(reader->category_map);
ihm_mapping_foreach(reader->category_map, sort_category_foreach, NULL);
}
/* Read an entire mmCIF file. */
static bool read_mmcif_file(struct ihm_reader *reader, bool *more_data,
struct ihm_error **err)
{
int ndata = 0, in_save = 0;
struct ihm_token *token;
sort_mappings(reader);
while (!*err && (token = get_token(reader, true, err))) {
if (token->type == MMCIF_TOKEN_VARIABLE) {
read_value(reader, token, err);
} else if (token->type == MMCIF_TOKEN_DATA) {
ndata++;
/* Only read the first data block */
if (ndata > 1) {
/* Allow reading the next data block */
unget_token(reader);
break;
}
} else if (token->type == MMCIF_TOKEN_LOOP) {
read_loop(reader, err);
} else if (token->type == MMCIF_TOKEN_SAVE) {
in_save = !in_save;
if (!in_save) {
call_all_categories(reader, err);
end_frame_all_categories(reader, err);
}
}
}
if (!*err) {
call_all_categories(reader, err);
finalize_all_categories(reader, err);
}
if (*err) {
*more_data = false;
return false;
} else {
*more_data = (ndata > 1);
return true;
}
}
/* Read exactly sz bytes from the given file. Return a pointer to the
location in the file read buffer of those bytes. This pointer is only
valid until the next file read. */
static bool ihm_file_read_bytes(struct ihm_file *fh, char **buf, size_t sz,
struct ihm_error **err)
{
/* Read at least 4MiB of data at a time */
static const ssize_t READ_SIZE = 4194304;
if (fh->line_start + sz > fh->buffer->len) {
size_t current_size, to_read;
ssize_t readlen, needed;
/* Move any existing data to the start of the buffer, so it doesn't
grow to the full size of the file */
if (fh->line_start) {
ihm_string_erase(fh->buffer, 0, fh->line_start);
fh->line_start = 0;
}
/* Fill buffer with new data, at least sz long (but could be more) */
current_size = fh->buffer->len;
needed = sz - current_size;
to_read = READ_SIZE > needed ? READ_SIZE : needed;
/* Expand buffer as needed */
ihm_string_set_size(fh->buffer, current_size + to_read);
readlen = (*fh->read_callback)(
fh->buffer->str + current_size, to_read, fh->data, err);
if (*err) return false;
if (readlen < needed) {
ihm_error_set(err, IHM_ERROR_IO, "Less data read than requested");
return false;
}
/* Set buffer size to match data actually read */
ihm_string_set_size(fh->buffer, current_size + readlen);
}
*buf = fh->buffer->str + fh->line_start;
fh->line_start += sz;
return true;
}
/* Read callback for the cmp library */
static bool bcif_cmp_read(cmp_ctx_t *ctx, void *data, size_t limit)
{
char *buf;
struct ihm_reader *reader = (struct ihm_reader *)ctx->buf;
if (!ihm_file_read_bytes(reader->fh, &buf, limit, &reader->cmp_read_err)) {
return false;
} else {
memcpy(data, buf, limit);
return true;
}
}
/* Skip callback for the cmp library */
static bool bcif_cmp_skip(cmp_ctx_t *ctx, size_t count)
{
char *buf;
struct ihm_reader *reader = (struct ihm_reader *)ctx->buf;
if (!ihm_file_read_bytes(reader->fh, &buf, count, &reader->cmp_read_err)) {
return false;
} else {
return true;
}
}
/* Read the next msgpack object from the BinaryCIF file; it must be a map.
Return true on success and return the number of elements in the map;
return false on error (and set err)
*/
static bool read_bcif_map(struct ihm_reader *reader, uint32_t *map_size,
struct ihm_error **err)
{
if (!cmp_read_map(&reader->cmp, map_size)) {
if (!ihm_error_move(err, &reader->cmp_read_err)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "Was expecting a map; %s",
cmp_strerror(&reader->cmp));
}
return false;
} else {
return true;
}
}
/* Like read_bcif_map, but if a NIL object is encountered instead, act
as if a zero-size map was read. */
static bool read_bcif_map_or_nil(struct ihm_reader *reader, uint32_t *map_size,
struct ihm_error **err)
{
cmp_object_t obj;
if (!cmp_read_object(&reader->cmp, &obj)) {
if (!ihm_error_move(err, &reader->cmp_read_err)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "%s",
cmp_strerror(&reader->cmp));
}
return false;
}
switch(obj.type) {
case CMP_TYPE_NIL:
*map_size = 0;
return true;
case CMP_TYPE_FIXMAP:
case CMP_TYPE_MAP16:
case CMP_TYPE_MAP32:
*map_size = obj.as.map_size;
return true;
default:
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "Was expecting a map or nil");
return false;
}
}
/* Read the next number (any kind of int or float) object from the BinaryCIF
file, and return it as a double.
*/
static bool read_bcif_any_double(struct ihm_reader *reader, double *value,
struct ihm_error **err)
{
cmp_object_t obj;
if (!cmp_read_object(&reader->cmp, &obj)) {
if (!ihm_error_move(err, &reader->cmp_read_err)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "%s",
cmp_strerror(&reader->cmp));
}
return false;
}
switch(obj.type) {
case CMP_TYPE_POSITIVE_FIXNUM:
case CMP_TYPE_UINT8:
*value = obj.as.u8;
return true;
case CMP_TYPE_UINT16:
*value = obj.as.u16;
return true;
case CMP_TYPE_UINT32:
*value = obj.as.u32;
return true;
case CMP_TYPE_NEGATIVE_FIXNUM:
case CMP_TYPE_SINT8:
*value = obj.as.s8;
return true;
case CMP_TYPE_SINT16:
*value = obj.as.s16;
return true;
case CMP_TYPE_SINT32:
*value = obj.as.s32;
return true;
case CMP_TYPE_FLOAT:
*value = obj.as.flt;
return true;
case CMP_TYPE_DOUBLE:
*value = obj.as.dbl;
return true;
default:
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "Was expecting a number");
return false;
}
}
/* Read the next msgpack object from the BinaryCIF file; it must be an array.
Return true on success and return the number of elements in the array;
return false on error (and set err)
*/
static bool read_bcif_array(struct ihm_reader *reader, uint32_t *array_size,
struct ihm_error **err)
{
if (!cmp_read_array(&reader->cmp, array_size)) {
if (!ihm_error_move(err, &reader->cmp_read_err)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "Was expecting an array; %s",
cmp_strerror(&reader->cmp));
}
return false;
} else {
return true;
}
}
/* Skip the next msgpack object from the BinaryCIF file; it can be any kind
of simple object (not an array or map).
*/
static bool skip_bcif_object(struct ihm_reader *reader, struct ihm_error **err)
{
if (!cmp_skip_object(&reader->cmp, NULL)) {
if (!ihm_error_move(err, &reader->cmp_read_err)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "Could not skip object; %s",
cmp_strerror(&reader->cmp));
}
return false;
} else {
return true;
}
}
/* Skip the next msgpack object from the BinaryCIF file; it can be any kind
of object, including an array or map.
*/
static bool skip_bcif_object_no_limit(struct ihm_reader *reader,
struct ihm_error **err)
{
if (!cmp_skip_object_no_limit(&reader->cmp)) {
if (!ihm_error_move(err, &reader->cmp_read_err)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "Could not skip object; %s",
cmp_strerror(&reader->cmp));
}
return false;
} else {
return true;
}
}
/* Read the next integer object from the BinaryCIF file.
*/
static bool read_bcif_int(struct ihm_reader *reader, int32_t *value,
struct ihm_error **err)
{
if (!cmp_read_int(&reader->cmp, value)) {
if (!ihm_error_move(err, &reader->cmp_read_err)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "Was expecting an integer; %s",
cmp_strerror(&reader->cmp));
}
return false;
} else {
return true;
}
}
/* Read the next string from the BinaryCIF file and return a pointer to it.
This pointer points into ihm_reader and is valid until the next read. */
static bool read_bcif_string(struct ihm_reader *reader, char **str,
struct ihm_error **err)
{
char *buf;
uint32_t strsz;
if (!cmp_read_str_size(&reader->cmp, &strsz)) {
if (!ihm_error_move(err, &reader->cmp_read_err)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "Was expecting a string; %s",
cmp_strerror(&reader->cmp));
}
return false;
}
if (!ihm_file_read_bytes(reader->fh, &buf, strsz, err)) return false;
/* Copy into reader's temporary string buffer and return a pointer to it */
ihm_string_assign_n(reader->tmp_str, buf, strsz);
*str = reader->tmp_str->str;
return true;
}
/* Read the next string from the BinaryCIF file and store a copy of it at
the given pointer. The caller is responsible for freeing it later. */
static bool read_bcif_string_dup(struct ihm_reader *reader, char **str,
struct ihm_error **err)
{
char *buf;
uint32_t strsz;
if (!cmp_read_str_size(&reader->cmp, &strsz)) {
if (!ihm_error_move(err, &reader->cmp_read_err)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "Was expecting a string; %s",
cmp_strerror(&reader->cmp));
}
return false;
}
if (!ihm_file_read_bytes(reader->fh, &buf, strsz, err)) return false;
/* strdup into new buffer; we can't use strndup as Windows doesn't have it */
free(*str);
*str = (char *)ihm_malloc(strsz + 1);
memcpy(*str, buf, strsz);
(*str)[strsz] = '\0';
return true;
}
/* Read the next string from the BinaryCIF file. Set match if it compares
equal to str. This is slightly more efficient than returning the
null-terminated string and then comparing it as it eliminates a copy. */
static bool read_bcif_exact_string(struct ihm_reader *reader, const char *str,
bool *match, struct ihm_error **err)
{
char *buf;
uint32_t actual_len, want_len = strlen(str);
if (!cmp_read_str_size(&reader->cmp, &actual_len)) {
if (!ihm_error_move(err, &reader->cmp_read_err)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "Was expecting a string; %s",
cmp_strerror(&reader->cmp));
}
return false;
}
if (!ihm_file_read_bytes(reader->fh, &buf, actual_len, err)) return false;
*match = (actual_len == want_len && strncmp(str, buf, want_len) == 0);
return true;
}
/* Read the next binary object from the BinaryCIF file and store a copy of it
at the given pointer. The caller is responsible for freeing it later. */
static bool read_bcif_binary_dup(struct ihm_reader *reader, char **bin,
size_t *bin_size, struct ihm_error **err)
{
char *buf;
uint32_t binsz;
if (!cmp_read_bin_size(&reader->cmp, &binsz)) {
if (!ihm_error_move(err, &reader->cmp_read_err)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, "Was expecting binary; %s",
cmp_strerror(&reader->cmp));
}
return false;
}
if (!ihm_file_read_bytes(reader->fh, &buf, binsz, err)) return false;
/* memcpy into new buffer */
free(*bin);
*bin = (char *)ihm_malloc(binsz);
*bin_size = binsz;
memcpy(*bin, buf, binsz);
return true;
}
/* Read the header from a BinaryCIF file to get the number of data blocks */
static bool read_bcif_header(struct ihm_reader *reader, struct ihm_error **err)
{
uint32_t map_size, i;
if (!read_bcif_map(reader, &map_size, err)) return false;
for (i = 0; i < map_size; ++i) {
bool match;
if (!read_bcif_exact_string(reader, "dataBlocks", &match,
err)) return false;
if (match) {
uint32_t array_size;
if (!read_bcif_array(reader, &array_size, err)) return false;
reader->num_blocks_left = array_size;
return true;
} else {
if (!skip_bcif_object(reader, err)) return false;
}
}
reader->num_blocks_left = 0;
return true;
}
/* The type of data stored in bcif_data */
typedef enum {
BCIF_DATA_NULL, /* No data present (e.g. empty mask) */
BCIF_DATA_RAW, /* Raw data, before decoding */
BCIF_DATA_INT8, /* Array of signed bytes */
BCIF_DATA_UINT8, /* Array of unsigned bytes */
BCIF_DATA_INT16, /* Array of signed 16-bit integers */
BCIF_DATA_UINT16, /* Array of unsigned 16-bit integers */
BCIF_DATA_INT32, /* Array of signed 32-bit integers */
BCIF_DATA_UINT32, /* Array of unsigned 32-bit integers */
BCIF_DATA_FLOAT, /* Array of single-precision floating point values */
BCIF_DATA_DOUBLE, /* Array of double-precision floating point values */
BCIF_DATA_STRING /* Array of char* null-terminated strings */
} bcif_data_type;
/* All possible C types stored in bcif_data */
union bcif_data_c {
char *raw;
int8_t *int8;
uint8_t *uint8;
int16_t *int16;
uint16_t *uint16;
int32_t *int32;
uint32_t *uint32;
float *float32;
double *float64;
char **string;
};
/* Data stored in BinaryCIF for a column, mask, or StringArray offsets.
This data can be of multiple types, e.g. raw, int array, etc. */
struct bcif_data {
/* The type of the data */
bcif_data_type type;
/* The data itself */
union bcif_data_c data;
/* The size of the data (e.g. array dimension) */
size_t size;
};
/* Initialize a new bcif_data */
static void bcif_data_init(struct bcif_data *d)
{
d->type = BCIF_DATA_NULL;
d->size = 0;
}
/* Free memory used by a bcif_data */
static void bcif_data_free(struct bcif_data *d)
{
switch(d->type) {
case BCIF_DATA_NULL:
break;
case BCIF_DATA_RAW:
free(d->data.raw);
break;
case BCIF_DATA_INT8:
free(d->data.int8);
break;
case BCIF_DATA_UINT8:
free(d->data.uint8);
break;
case BCIF_DATA_INT16:
free(d->data.int16);
break;
case BCIF_DATA_UINT16:
free(d->data.uint16);
break;
case BCIF_DATA_INT32:
free(d->data.int32);
break;
case BCIF_DATA_UINT32:
free(d->data.uint32);
break;
case BCIF_DATA_FLOAT:
free(d->data.float32);
break;
case BCIF_DATA_DOUBLE:
free(d->data.float64);
break;
case BCIF_DATA_STRING:
free(d->data.string);
break;
}
}
/* Overwrite bcif_data with new raw data */
static void bcif_data_assign_raw(struct bcif_data *d, char *data, size_t size)
{
bcif_data_free(d);
d->type = BCIF_DATA_RAW;
d->data.raw = data;
d->size = size;
}
/* All valid and supported raw encoder types */
typedef enum {
BCIF_ENC_NONE,
BCIF_ENC_STRING_ARRAY,
BCIF_ENC_BYTE_ARRAY,
BCIF_ENC_INTEGER_PACKING,
BCIF_ENC_DELTA,
BCIF_ENC_RUN_LENGTH,
BCIF_ENC_FIXED_POINT,
BCIF_ENC_INTERVAL_QUANT
} bcif_encoding_kind;
/* An encoding used to compress raw data in BinaryCIF */
struct bcif_encoding {
/* The encoder type */
bcif_encoding_kind kind;
/* Origin (for delta encoding) */
int32_t origin;
/* Factor (for fixed point encoding) */
int32_t factor;
/* Min value (for interval quantization encoding) */
double minval;
/* Max value (for interval quantization encoding) */
double maxval;
/* Number of steps (for interval quantization encoding) */
int32_t numsteps;
/* ByteArray type */
int32_t type;
/* Encoding of StringArray data */
struct bcif_encoding *first_data_encoding;
/* Encoding of StringArray offset */
struct bcif_encoding *first_offset_encoding;
/* String data for StringArray encoding */
char *string_data;
/* Data for offsets for StringArray encoding */
struct bcif_data offsets;
/* Next encoding, or NULL */
struct bcif_encoding *next;
};
/* A single column in a BinaryCIF category */
struct bcif_column {
/* Keyword name */
char *name;
/* Data and size */
struct bcif_data data;
/* Mask data and size (or NULL) */
struct bcif_data mask_data;
/* Singly-linked list of data encodings */
struct bcif_encoding *first_encoding;
/* Singly-linked list of mask encodings */
struct bcif_encoding *first_mask_encoding;
/* The corresponding ihm_keyword, if any */
struct ihm_keyword *keyword;
/* Temporary buffer for keyword value as a string */
char *str;
/* Next column, or NULL */
struct bcif_column *next;
};
/* A single category in a BinaryCIF file */
struct bcif_category {
/* Category name */
char *name;
/* Singly-linked list of column (keyword) information */
struct bcif_column *first_column;
};
/* Create and return a new bcif_encoding */
static struct bcif_encoding *bcif_encoding_new()
{
struct bcif_encoding *enc = (struct bcif_encoding *)ihm_malloc(
sizeof(struct bcif_encoding));
enc->kind = BCIF_ENC_NONE;
enc->origin = 0;
enc->factor = 1;
enc->minval = 0.;
enc->maxval = 0.;
enc->numsteps = 1;
enc->type = -1;
enc->first_data_encoding = NULL;
enc->first_offset_encoding = NULL;
enc->string_data = NULL;
bcif_data_init(&enc->offsets);
enc->next = NULL;
return enc;
}
/* Free memory used by a bcif_encoding */
static void bcif_encoding_free(struct bcif_encoding *enc)
{
while(enc->first_data_encoding) {
struct bcif_encoding *inenc = enc->first_data_encoding;
enc->first_data_encoding = inenc->next;
bcif_encoding_free(inenc);
}
while(enc->first_offset_encoding) {
struct bcif_encoding *inenc = enc->first_offset_encoding;
enc->first_offset_encoding = inenc->next;
bcif_encoding_free(inenc);
}
free(enc->string_data);
bcif_data_free(&enc->offsets);
free(enc);
}
/* Create and return a new bcif_column */
static struct bcif_column *bcif_column_new()
{
struct bcif_column *c = (struct bcif_column *)ihm_malloc(
sizeof(struct bcif_column));
c->name = NULL;
bcif_data_init(&c->data);
bcif_data_init(&c->mask_data);
c->first_encoding = NULL;
c->first_mask_encoding = NULL;
c->keyword = NULL;
c->str = NULL;
c->next = NULL;
return c;
}
/* Free memory used by a bcif_column */
static void bcif_column_free(struct bcif_column *col)
{
free(col->name);
bcif_data_init(&col->data);
bcif_data_init(&col->mask_data);
while(col->first_encoding) {
struct bcif_encoding *enc = col->first_encoding;
col->first_encoding = enc->next;
bcif_encoding_free(enc);
}
while(col->first_mask_encoding) {
struct bcif_encoding *enc = col->first_mask_encoding;
col->first_mask_encoding = enc->next;
bcif_encoding_free(enc);
}
free(col->str);
free(col);
}
/* Initialize a new bcif_category */
static void bcif_category_init(struct bcif_category *cat)
{
cat->name = NULL;
cat->first_column = NULL;
}
/* Free memory used by a bcif_category */
static void bcif_category_free(struct bcif_category *cat)
{
free(cat->name);
while(cat->first_column) {
struct bcif_column *col = cat->first_column;
cat->first_column = col->next;
bcif_column_free(col);
}
}
static bool read_bcif_encodings(struct ihm_reader *reader,
struct bcif_encoding **first,
bool allow_string_array,
struct ihm_error **err);
/* Read a single encoding from a BinaryCIF file */
static bool read_bcif_encoding(struct ihm_reader *reader,
struct bcif_encoding *enc,
bool allow_string_array,
struct ihm_error **err)
{
uint32_t map_size, i;
if (!read_bcif_map(reader, &map_size, err)) return false;
for (i = 0; i < map_size; ++i) {
char *str;
if (!read_bcif_string(reader, &str, err)) return false;
if (strcmp(str, "kind") == 0) {
if (!read_bcif_string(reader, &str, err)) return false;
if (strcmp(str, "StringArray") == 0) {
if (!allow_string_array) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"StringArray decoding cannot be used for data "
"or offset encoding");
return false;
}
enc->kind = BCIF_ENC_STRING_ARRAY;
} else if (strcmp(str, "ByteArray") == 0) {
enc->kind = BCIF_ENC_BYTE_ARRAY;
} else if (strcmp(str, "IntegerPacking") == 0) {
enc->kind = BCIF_ENC_INTEGER_PACKING;
} else if (strcmp(str, "Delta") == 0) {
enc->kind = BCIF_ENC_DELTA;
} else if (strcmp(str, "RunLength") == 0) {
enc->kind = BCIF_ENC_RUN_LENGTH;
} else if (strcmp(str, "FixedPoint") == 0) {
enc->kind = BCIF_ENC_FIXED_POINT;
} else if (strcmp(str, "IntervalQuantization") == 0) {
enc->kind = BCIF_ENC_INTERVAL_QUANT;
}
} else if (strcmp(str, "dataEncoding") == 0) {
/* dataEncoding and offsetEncoding should not include StringArray
encoding */
if (!read_bcif_encodings(reader, &enc->first_data_encoding,
false, err)) return false;
} else if (strcmp(str, "offsetEncoding") == 0) {
if (!read_bcif_encodings(reader, &enc->first_offset_encoding,
false, err)) return false;
} else if (strcmp(str, "stringData") == 0) {
if (!read_bcif_string_dup(reader, &enc->string_data, err)) return false;
} else if (strcmp(str, "offsets") == 0) {
char *data = NULL;
size_t data_size;
if (!read_bcif_binary_dup(reader, &data, &data_size, err)) return false;
bcif_data_assign_raw(&enc->offsets, data, data_size);
} else if (strcmp(str, "origin") == 0) {
if (!read_bcif_int(reader, &enc->origin, err)) return false;
} else if (strcmp(str, "factor") == 0) {
if (!read_bcif_int(reader, &enc->factor, err)) return false;
} else if (strcmp(str, "type") == 0) {
if (!read_bcif_int(reader, &enc->type, err)) return false;
} else if (strcmp(str, "min") == 0) {
if (!read_bcif_any_double(reader, &enc->minval, err)) return false;
} else if (strcmp(str, "max") == 0) {
if (!read_bcif_any_double(reader, &enc->maxval, err)) return false;
} else if (strcmp(str, "numSteps") == 0) {
if (!read_bcif_int(reader, &enc->numsteps, err)) return false;
} else {
if (!skip_bcif_object_no_limit(reader, err)) return false;
}
}
return true;
}
/* Read all encoding information from a BinaryCIF file */
static bool read_bcif_encodings(struct ihm_reader *reader,
struct bcif_encoding **first,
bool allow_string_array,
struct ihm_error **err)
{
uint32_t array_size, i;
if (!read_bcif_array(reader, &array_size, err)) return false;
for (i = 0; i < array_size; ++i) {
struct bcif_encoding *enc = bcif_encoding_new();
if (!read_bcif_encoding(reader, enc, allow_string_array, err)) {
bcif_encoding_free(enc);
return false;
} else {
enc->next = *first;
*first = enc;
}
}
return true;
}
/* Read raw data from a BinaryCIF file */
static bool read_bcif_data(struct ihm_reader *reader,
struct bcif_column *col,
struct ihm_error **err)
{
uint32_t map_size, i;
if (!read_bcif_map(reader, &map_size, err)) return false;
for (i = 0; i < map_size; ++i) {
char *str;
if (!read_bcif_string(reader, &str, err)) return false;
if (strcmp(str, "data") == 0) {
char *data = NULL;
size_t data_size;
if (!read_bcif_binary_dup(reader, &data, &data_size, err)) return false;
bcif_data_assign_raw(&col->data, data, data_size);
} else if (strcmp(str, "encoding") == 0) {
if (!read_bcif_encodings(reader, &col->first_encoding,
true, err)) return false;
} else {
if (!skip_bcif_object_no_limit(reader, err)) return false;
}
}
return true;
}
/* Read a column's mask from a BinaryCIF file */
static bool read_bcif_mask(struct ihm_reader *reader,
struct bcif_column *col,
struct ihm_error **err)
{
uint32_t map_size, i;
if (!read_bcif_map_or_nil(reader, &map_size, err)) return false;
for (i = 0; i < map_size; ++i) {
char *str;
if (!read_bcif_string(reader, &str, err)) return false;
if (strcmp(str, "encoding") == 0) {
if (!read_bcif_encodings(reader, &col->first_mask_encoding, true,
err)) return false;
} else if (strcmp(str, "data") == 0) {
char *mask_data = NULL;
size_t mask_data_size;
if (!read_bcif_binary_dup(reader, &mask_data,
&mask_data_size, err)) return false;
bcif_data_assign_raw(&col->mask_data, mask_data, mask_data_size);
} else {
if (!skip_bcif_object(reader, err)) return false;
}
}
return true;
}
/* Read a single column from a BinaryCIF file */
static bool read_bcif_column(struct ihm_reader *reader,
struct bcif_column *col,
struct ihm_category *ihm_cat,
struct ihm_error **err)
{
uint32_t map_size, i;
/* If we already read the category name then we can potentially skip
reading data/mask if we don't have a handler for the keyword */
bool skip = false;
if (!read_bcif_map(reader, &map_size, err)) return false;
for (i = 0; i < map_size; ++i) {
char *str;
if (!read_bcif_string(reader, &str, err)) return false;
if (strcmp(str, "name") == 0) {
if (!read_bcif_string_dup(reader, &col->name, err)) return false;
if (ihm_cat) {
struct ihm_keyword *key;
key = (struct ihm_keyword *)ihm_mapping_lookup(
ihm_cat->keyword_map, col->name);
if (!key) {
skip = true;
}
}
} else if (!skip && strcmp(str, "data") == 0) {
if (!read_bcif_data(reader, col, err)) return false;
} else if (!skip && strcmp(str, "mask") == 0) {
if (!read_bcif_mask(reader, col, err)) return false;
} else {
if (!skip_bcif_object_no_limit(reader, err)) return false;
}
}
return true;
}
/* Read all columns for a category from a BinaryCIF file */
static bool read_bcif_columns(struct ihm_reader *reader,
struct bcif_category *cat,
struct ihm_category *ihm_cat,
struct ihm_error **err)
{
uint32_t array_size, i;
if (!read_bcif_array(reader, &array_size, err)) return false;
for (i = 0; i < array_size; ++i) {
struct bcif_column *col = bcif_column_new();
if (!read_bcif_column(reader, col, ihm_cat, err)) {
bcif_column_free(col);
return false;
} else {
col->next = cat->first_column;
cat->first_column = col;
}
}
return true;
}
/* Read a single category from a BinaryCIF file */
static bool read_bcif_category(struct ihm_reader *reader,
struct bcif_category *cat,
struct ihm_category **ihm_cat,
struct ihm_error **err)
{
uint32_t map_size, i;
bool skip = false;
*ihm_cat = NULL;
if (!read_bcif_map(reader, &map_size, err)) return false;
for (i = 0; i < map_size; ++i) {
char *str;
if (!read_bcif_string(reader, &str, err)) return false;
if (strcmp(str, "name") == 0) {
if (!read_bcif_string_dup(reader, &cat->name, err)) return false;
*ihm_cat = (struct ihm_category *)ihm_mapping_lookup(
reader->category_map, cat->name);
if (!*ihm_cat) {
skip = true; /* no need to read columns if we don't have a callback */
}
} else if (!skip && strcmp(str, "columns") == 0) {
if (!read_bcif_columns(reader, cat, *ihm_cat, err)) return false;
} else {
if (!skip_bcif_object_no_limit(reader, err)) return false;
}
}
return true;
}
/* Valid ByteArray data types */
#define BYTE_ARRAY_INT8 1
#define BYTE_ARRAY_INT16 2
#define BYTE_ARRAY_INT32 3
#define BYTE_ARRAY_UINT8 4
#define BYTE_ARRAY_UINT16 5
#define BYTE_ARRAY_UINT32 6
#define BYTE_ARRAY_FLOAT 32
#define BYTE_ARRAY_DOUBLE 33
/* Make sure the input data size for ByteArray decoding is correct, and
set the output size. */
static bool handle_byte_array_size(struct bcif_data *d, size_t type_size,
struct ihm_error **err)
{
static const uint32_t ul = 1;
if (d->size % type_size != 0) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"ByteArray raw data size is not a multiple of the type size");
return false;
}
/* If we're on a bigendian platform, byteswap the array (ByteArray is
always little endian) */
if ((int)(*((unsigned char *)&ul)) == 0 && type_size > 1) {
size_t i, j, start;
for (i = 0, start = 0; i < d->size; ++i, start += type_size) {
for (j = 0; j < type_size / 2; ++j) {
char tmp = d->data.raw[start + j];
d->data.raw[start + j] = d->data.raw[start + type_size - j];
d->data.raw[start + type_size - j] = tmp;
}
}
}
d->size /= type_size;
return true;
}
/* Decode data using BinaryCIF ByteArray encoding */
static bool decode_bcif_byte_array(struct bcif_data *d,
struct bcif_encoding *enc,
struct ihm_error **err)
{
if (d->type != BCIF_DATA_RAW) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"ByteArray not given raw data as input");
return false;
}
switch(enc->type) {
case BYTE_ARRAY_INT8:
d->type = BCIF_DATA_INT8;
d->data.int8 = (int8_t *)d->data.raw;
break;
case BYTE_ARRAY_UINT8:
d->type = BCIF_DATA_UINT8;
d->data.uint8 = (uint8_t *)d->data.raw;
break;
case BYTE_ARRAY_INT16:
if (!handle_byte_array_size(d, sizeof(int16_t), err)) return false;
d->type = BCIF_DATA_INT16;
d->data.int16 = (int16_t *)d->data.raw;
break;
case BYTE_ARRAY_UINT16:
if (!handle_byte_array_size(d, sizeof(uint16_t), err)) return false;
d->type = BCIF_DATA_UINT16;
d->data.uint16 = (uint16_t *)d->data.raw;
break;
case BYTE_ARRAY_INT32:
if (!handle_byte_array_size(d, sizeof(int32_t), err)) return false;
d->type = BCIF_DATA_INT32;
d->data.int32 = (int32_t *)d->data.raw;
break;
case BYTE_ARRAY_UINT32:
if (!handle_byte_array_size(d, sizeof(uint32_t), err)) return false;
d->type = BCIF_DATA_UINT32;
d->data.uint32 = (uint32_t *)d->data.raw;
break;
case BYTE_ARRAY_FLOAT:
if (!handle_byte_array_size(d, sizeof(float), err)) return false;
d->type = BCIF_DATA_FLOAT;
d->data.float32 = (float *)d->data.raw;
break;
case BYTE_ARRAY_DOUBLE:
if (!handle_byte_array_size(d, sizeof(double), err)) return false;
d->type = BCIF_DATA_DOUBLE;
d->data.float64 = (double *)d->data.raw;
break;
default:
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"ByteArray unhandled data type %d", enc->type);
return false;
}
return true;
}
#define DECODE_BCIF_INT_PACK(limit_check, datapt, datatyp) \
{ \
int32_t *outdata, value; \
size_t i, j; \
size_t outsz = 0; \
/* Get the size of the decoded array. Limit values don't count. */ \
for (i = 0; i < d->size; ++i) { \
datatyp t = datapt[i]; \
if (!(limit_check)) { outsz++; } \
} \
outdata = (int32_t *)ihm_malloc(outsz * sizeof(int32_t)); \
j = 0; \
value = 0; \
for (i = 0; i < d->size; ++i) { \
datatyp t = datapt[i]; \
if (limit_check) { \
value += t; \
} else { \
outdata[j++] = value + t; \
value = 0; \
} \
} \
bcif_data_free(d); \
/* todo: can the output be unsigned 32-bit ? */ \
d->type = BCIF_DATA_INT32; \
d->size = outsz; \
d->data.int32 = outdata; \
}
/* Decode data using BinaryCIF IntegerPacking encoding */
static bool decode_bcif_integer_packing(struct bcif_data *d,
struct bcif_encoding *enc,
struct ihm_error **err)
{
/* Encoded data must be 8- or 16-bit integers (signed or unsigned).
The behavior is similar in each case, so use a macro */
switch(d->type) {
case BCIF_DATA_UINT8:
DECODE_BCIF_INT_PACK(t == 0xFF, d->data.uint8, uint8_t);
break;
case BCIF_DATA_INT8:
DECODE_BCIF_INT_PACK(t == 0x7F || t == -0x80, d->data.int8, int8_t);
break;
case BCIF_DATA_UINT16:
DECODE_BCIF_INT_PACK(t == 0xFFFF, d->data.uint16, uint16_t);
break;
case BCIF_DATA_INT16:
DECODE_BCIF_INT_PACK(t == 0x7FFF || t == -0x8000, d->data.int16, int16_t);
break;
default:
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"IntegerPacking bad input data type %d", d->type);
return false;
}
return true;
}
#define DECODE_BCIF_DELTA(datapt, outpt, datatyp) \
{ \
int32_t value; \
size_t i; \
value = enc->origin; \
for (i = 0; i < d->size; ++i) { \
value += datapt[i]; \
outpt[i] = value; \
} \
}
#define DECODE_BCIF_DELTA_PROMOTE(datapt, datatyp) \
{ \
int32_t *outdata = (int32_t *)ihm_malloc(d->size * sizeof(int32_t)); \
DECODE_BCIF_DELTA(datapt, outdata, datatyp) \
bcif_data_free(d); \
d->type = BCIF_DATA_INT32; \
d->data.int32 = outdata; \
}
/* Decode data using BinaryCIF Delta encoding */
static bool decode_bcif_delta(struct bcif_data *d,
struct bcif_encoding *enc,
struct ihm_error **err)
{
switch (d->type) {
case BCIF_DATA_INT8:
DECODE_BCIF_DELTA_PROMOTE(d->data.int8, int8_t);
break;
case BCIF_DATA_UINT8:
DECODE_BCIF_DELTA_PROMOTE(d->data.uint8, uint8_t);
break;
case BCIF_DATA_INT16:
DECODE_BCIF_DELTA_PROMOTE(d->data.int16, int16_t);
break;
case BCIF_DATA_UINT16:
DECODE_BCIF_DELTA_PROMOTE(d->data.uint16, uint16_t);
break;
case BCIF_DATA_INT32:
DECODE_BCIF_DELTA(d->data.int32, d->data.int32, int32_t);
break;
default:
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"Delta not given integers as input");
return false;
}
return true;
}
#define DECODE_BCIF_RUN_LENGTH(datapt, datatyp) \
{ \
size_t i, k; \
int32_t outsz, j, *outdata; \
outsz = 0; \
for (i = 1; i < d->size; i += 2) { \
int32_t ts = datapt[i]; \
/* Try to catch invalid (or malicious) counts. Counts cannot be negative
and the largest count seen in a very large PDB structure (3j3q) is
about 2.4m, so we are unlikely to see counts of 40m in real systems */ \
if (ts < 0 || ts > 40000000) { \
ihm_error_set(err, IHM_ERROR_FILE_FORMAT, \
"Bad run length repeat count %d", ts); \
return false; \
} \
outsz += ts; \
} \
assert(outsz > 0); \
outdata = (int32_t *)ihm_malloc(outsz * sizeof(int32_t)); \
for (i = 0, k = 0; i < d->size; i += 2) { \
int32_t value = datapt[i]; \
int32_t n_repeats = datapt[i + 1]; \
for (j = 0; j < n_repeats; ++j) { \
outdata[k++] = value; \
} \
} \
bcif_data_free(d); \
d->type = BCIF_DATA_INT32; \
d->size = outsz; \
d->data.int32 = outdata; \
}
/* Decode data using BinaryCIF RunLength encoding */
static bool decode_bcif_run_length(struct bcif_data *d,
struct bcif_encoding *enc,
struct ihm_error **err)
{
if (d->size % 2 != 0) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"Run length data size (%d) is not even", d->size);
return false;
}
switch (d->type) {
case BCIF_DATA_INT8:
DECODE_BCIF_RUN_LENGTH(d->data.int8, int8_t);
break;
case BCIF_DATA_UINT8:
DECODE_BCIF_RUN_LENGTH(d->data.uint8, uint8_t);
break;
case BCIF_DATA_INT16:
DECODE_BCIF_RUN_LENGTH(d->data.int16, int16_t);
break;
case BCIF_DATA_UINT16:
DECODE_BCIF_RUN_LENGTH(d->data.uint16, uint16_t);
break;
case BCIF_DATA_INT32:
DECODE_BCIF_RUN_LENGTH(d->data.int32, int32_t);
break;
default:
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"RunLength not given integers as input");
return false;
}
return true;
}
#define DECODE_BCIF_FIXED_POINT(datapt) \
{ \
size_t i; \
/* We ignore srcType and always output double (not float) */ \
double *outdata = (double *)ihm_malloc(d->size * sizeof(double)); \
for (i = 0; i < d->size; ++i) { \
outdata[i] = (double)datapt[i] / enc->factor; \
} \
bcif_data_free(d); \
d->type = BCIF_DATA_DOUBLE; \
d->data.float64 = outdata; \
}
/* Decode data using BinaryCIF FixedPoint encoding */
static bool decode_bcif_fixed_point(struct bcif_data *d,
struct bcif_encoding *enc,
struct ihm_error **err)
{
switch (d->type) {
case BCIF_DATA_INT8:
DECODE_BCIF_FIXED_POINT(d->data.int8);
break;
case BCIF_DATA_UINT8:
DECODE_BCIF_FIXED_POINT(d->data.uint8);
break;
case BCIF_DATA_INT16:
DECODE_BCIF_FIXED_POINT(d->data.int16);
break;
case BCIF_DATA_UINT16:
DECODE_BCIF_FIXED_POINT(d->data.uint16);
break;
case BCIF_DATA_INT32:
DECODE_BCIF_FIXED_POINT(d->data.int32);
break;
case BCIF_DATA_UINT32:
DECODE_BCIF_FIXED_POINT(d->data.uint32);
break;
default:
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"FixedPoint not given integers as input");
return false;
}
return true;
}
#define DECODE_BCIF_INTERVAL_QUANT(datapt) \
{ \
size_t i; \
/* We ignore srcType and always output double (not float) */ \
double *outdata = (double *)ihm_malloc(d->size * sizeof(double)); \
double delta = (enc->maxval - enc->minval) / (enc->numsteps - 1); \
for (i = 0; i < d->size; ++i) { \
outdata[i] = enc->minval + delta * datapt[i]; \
} \
bcif_data_free(d); \
d->type = BCIF_DATA_DOUBLE; \
d->data.float64 = outdata; \
}
/* Decode data using BinaryCIF IntervalQuantization encoding */
static bool decode_bcif_interval_quant(struct bcif_data *d,
struct bcif_encoding *enc,
struct ihm_error **err)
{
if (enc->numsteps < 2) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"IntervalQuantization num_steps (%d) must be at least 2",
enc->numsteps);
return false;
}
switch (d->type) {
case BCIF_DATA_INT8:
DECODE_BCIF_INTERVAL_QUANT(d->data.int8);
break;
case BCIF_DATA_UINT8:
DECODE_BCIF_INTERVAL_QUANT(d->data.uint8);
break;
case BCIF_DATA_INT16:
DECODE_BCIF_INTERVAL_QUANT(d->data.int16);
break;
case BCIF_DATA_UINT16:
DECODE_BCIF_INTERVAL_QUANT(d->data.uint16);
break;
case BCIF_DATA_INT32:
DECODE_BCIF_INTERVAL_QUANT(d->data.int32);
break;
case BCIF_DATA_UINT32:
DECODE_BCIF_INTERVAL_QUANT(d->data.uint32);
break;
default:
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"IntervalQuantization not given integers as input");
return false;
}
return true;
}
/* Return true iff the data type is int32, or another integer that
can be promoted to that type */
static bool require_bcif_data_is_int32(struct bcif_data *d)
{
switch(d->type) {
case BCIF_DATA_INT8:
case BCIF_DATA_UINT8:
case BCIF_DATA_INT16:
case BCIF_DATA_UINT16:
case BCIF_DATA_INT32:
return true;
default:
return false;
}
}
/* Get the ith element of the data array. It must be of a type that can be
promoted to int32_t (see require_bcif_data_is_int32) */
static int32_t get_int_data(struct bcif_data *d, int i)
{
switch(d->type) {
case BCIF_DATA_INT8:
return d->data.int8[i];
case BCIF_DATA_UINT8:
return d->data.uint8[i];
case BCIF_DATA_INT16:
return d->data.int16[i];
case BCIF_DATA_UINT16:
return d->data.uint16[i];
case BCIF_DATA_INT32:
return d->data.int32[i];
default:
return 0;
}
}
/* Decode data using BinaryCIF StringArray encoding */
static bool decode_bcif_string_array(struct bcif_data *d,
struct bcif_encoding *enc,
struct ihm_error **err)
{
char *newstring, **strarr;
int32_t stringsz;
size_t i;
int *starts, start;
if (!require_bcif_data_is_int32(d)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"StringArray not given integers as input");
return false;
}
if (!require_bcif_data_is_int32(&enc->offsets)) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"StringArray not given integers as offsets");
return false;
}
/* Make sure offsets are in range */
stringsz = strlen(enc->string_data);
for (i = 0; i < enc->offsets.size; ++i) {
if (get_int_data(&enc->offsets, i) < 0
|| get_int_data(&enc->offsets, i) > stringsz) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"StringArray offset %d out of range 0-%d",
get_int_data(&enc->offsets, i), 0, stringsz);
return false;
}
}
/* Add nulls to string_data so we can point directly into it */
stringsz = 0;
for (i = 0; i < enc->offsets.size - 1; ++i) {
stringsz += 1 + get_int_data(&enc->offsets, i + 1)
- get_int_data(&enc->offsets, i);
}
newstring = (char *)ihm_malloc(stringsz);
starts = (int *)ihm_malloc((enc->offsets.size - 1) * sizeof(int));
start = 0;
for (i = 0; i < enc->offsets.size - 1; ++i) {
stringsz = get_int_data(&enc->offsets, i + 1)
- get_int_data(&enc->offsets, i);
memcpy(newstring + start, enc->string_data + get_int_data(&enc->offsets, i),
stringsz);
newstring[start + stringsz] = '\0';
starts[i] = start;
start += stringsz + 1;
}
free(enc->string_data);
enc->string_data = newstring;
strarr = (char **)ihm_malloc(d->size * sizeof(char *));
for (i = 0; i < d->size; ++i) {
int32_t strnum = get_int_data(d, i);
/* If strnum out of range, return a null string (this usually corresponds
to masked data) */
if (strnum < 0 || (size_t)strnum >= enc->offsets.size) {
strarr[i] = "";
} else {
strarr[i] = enc->string_data + starts[strnum];
}
}
free(starts);
bcif_data_free(d);
d->type = BCIF_DATA_STRING;
d->data.string = strarr;
return true;
}
/* Decode raw BinaryCIF data by using all encoders specified */
static bool decode_bcif_data(struct bcif_data *d, struct bcif_encoding *enc,
struct ihm_error **err)
{
while (enc) {
switch(enc->kind) {
case BCIF_ENC_BYTE_ARRAY:
if (!decode_bcif_byte_array(d, enc, err)) return false;
break;
case BCIF_ENC_INTEGER_PACKING:
if (!decode_bcif_integer_packing(d, enc, err)) return false;
break;
case BCIF_ENC_DELTA:
if (!decode_bcif_delta(d, enc, err)) return false;
break;
case BCIF_ENC_RUN_LENGTH:
if (!decode_bcif_run_length(d, enc, err)) return false;
break;
case BCIF_ENC_FIXED_POINT:
if (!decode_bcif_fixed_point(d, enc, err)) return false;
break;
case BCIF_ENC_INTERVAL_QUANT:
if (!decode_bcif_interval_quant(d, enc, err)) return false;
break;
case BCIF_ENC_STRING_ARRAY:
if (!decode_bcif_data(&enc->offsets, enc->first_offset_encoding,
err)) return false;
if (!decode_bcif_data(d, enc->first_data_encoding, err)) return false;
if (!decode_bcif_string_array(d, enc, err)) return false;
break;
default:
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"Unhandled encoding type %d", enc->kind);
return false;
}
enc = enc->next;
}
return true;
}
/* Map BinaryCIF columns to ihm_keywords */
static bool check_bcif_columns(struct ihm_reader *reader,
struct bcif_category *cat,
struct ihm_category *ihm_cat,
struct ihm_error **err)
{
struct bcif_column *col;
/* Match columns to ihm_keywords; call back for any unknown */
for (col = cat->first_column; col; col = col->next) {
col->keyword = (struct ihm_keyword *)ihm_mapping_lookup(
ihm_cat->keyword_map, col->name);
if (!col->keyword && reader->unknown_keyword_callback) {
(*reader->unknown_keyword_callback)(reader, cat->name, col->name, 0,
reader->unknown_keyword_data, err);
if (*err) return false;
}
}
return true;
}
/* Decode and check the column's data */
static bool process_column_data(struct bcif_column *col,
struct ihm_error **err)
{
if (!decode_bcif_data(&col->data, col->first_encoding, err)) return false;
switch(col->data.type) {
case BCIF_DATA_INT32:
case BCIF_DATA_INT8:
case BCIF_DATA_UINT8:
case BCIF_DATA_INT16:
case BCIF_DATA_UINT16:
case BCIF_DATA_FLOAT:
case BCIF_DATA_DOUBLE:
case BCIF_DATA_STRING:
return true;
default:
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"Unsupported column data type %d", col->data.type);
return false;
}
}
/* Decode and check the column's mask, if any */
static bool process_column_mask(struct bcif_column *col,
struct ihm_error **err)
{
if (col->mask_data.type == BCIF_DATA_NULL) {
return true;
}
if (!decode_bcif_data(&col->mask_data, col->first_mask_encoding,
err)) return false;
/* Masks are supposed to be uint8 but some of our decoders return int32
for simplicity. If this happened, map back to uint8 */
if (col->mask_data.type == BCIF_DATA_INT32) {
uint8_t *newdata;
size_t i;
newdata = (uint8_t *)ihm_malloc(col->mask_data.size * sizeof(uint8_t));
for (i = 0; i < col->mask_data.size; ++i) {
newdata[i] = (uint8_t)col->mask_data.data.int32[i];
}
free(col->mask_data.data.int32);
col->mask_data.data.uint8 = newdata;
col->mask_data.type = BCIF_DATA_UINT8;
}
if (col->mask_data.type != BCIF_DATA_UINT8) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"Unsupported column mask data type %d", col->mask_data.type);
return false;
}
return true;
}
static void set_value_from_bcif_string(struct ihm_keyword *key, char *str,
struct ihm_error **err)
{
char *ch;
switch(key->type) {
case IHM_STRING:
/* In BinaryCIF the string is always owned by the file buffer,
not the keyword */
key->own_data = false;
key->data.str = str;
key->omitted = false;
break;
case IHM_INT:
key->data.ival = strtol(str, &ch, 10);
if (*ch) {
ihm_error_set(err, IHM_ERROR_VALUE,
"Cannot parse '%s' as integer in file", str);
} else {
key->omitted = false;
}
break;
case IHM_FLOAT:
key->data.fval = strtod(str, &ch);
if (*ch) {
ihm_error_set(err, IHM_ERROR_VALUE,
"Cannot parse '%s' as float in file", str);
} else {
key->omitted = false;
}
break;
case IHM_BOOL:
key->omitted = false;
if (strcasecmp(str, "YES") == 0) {
key->data.bval = true;
} else if (strcasecmp(str, "NO") == 0) {
key->data.bval = false;
} else {
key->omitted = true;
}
break;
}
if (!*err) {
key->in_file = true;
key->unknown = false;
}
}
static void set_value_from_bcif_double(struct ihm_keyword *key, double fval,
char *buffer)
{
key->omitted = key->unknown = false;
key->in_file = true;
switch(key->type) {
case IHM_STRING:
/* We (not the keyword) own buffer */
key->own_data = false;
sprintf(buffer, "%g", fval);
key->data.str = buffer;
break;
case IHM_INT:
/* Truncate float to int. This matches Python's behavior
of int(some_float) */
key->data.ival = (int)fval;
break;
case IHM_FLOAT:
key->data.fval = fval;
break;
case IHM_BOOL:
key->omitted = true;
break;
}
}
static void set_value_from_bcif_int(struct ihm_keyword *key, int32_t ival,
char *buffer)
{
key->omitted = key->unknown = false;
key->in_file = true;
switch(key->type) {
case IHM_STRING:
/* We (not the keyword) own buffer */
key->own_data = false;
sprintf(buffer, "%d", ival);
key->data.str = buffer;
break;
case IHM_INT:
key->data.ival = ival;
break;
case IHM_FLOAT:
key->data.fval = ival;
break;
case IHM_BOOL:
key->omitted = true;
break;
}
}
/* Set the value of a given keyword from the given BinaryCIF data */
static void set_value_from_data(struct ihm_reader *reader,
struct ihm_category *category,
struct ihm_keyword *key, struct bcif_data *data,
size_t irow, char *buffer,
struct ihm_error **err)
{
/* If a key is duplicated, overwrite it with the new value */
if (key->in_file && key->type == IHM_STRING && key->own_data) {
free(key->data.str);
key->data.str = NULL;
}
/* BinaryCIF data is typed (not always a string like mmCIF), so we may
need to convert to the desired output type. */
switch(data->type) {
case BCIF_DATA_STRING:
set_value_from_bcif_string(key, data->data.string[irow], err);
break;
case BCIF_DATA_FLOAT:
/* promote to double */
set_value_from_bcif_double(key, data->data.float32[irow], buffer);
break;
case BCIF_DATA_DOUBLE:
set_value_from_bcif_double(key, data->data.float64[irow], buffer);
break;
case BCIF_DATA_INT8:
/* promote to int32 */
set_value_from_bcif_int(key, data->data.int8[irow], buffer);
break;
case BCIF_DATA_UINT8:
/* promote to int32 */
set_value_from_bcif_int(key, data->data.uint8[irow], buffer);
break;
case BCIF_DATA_INT16:
/* promote to int32 */
set_value_from_bcif_int(key, data->data.int16[irow], buffer);
break;
case BCIF_DATA_UINT16:
/* promote to int32 */
set_value_from_bcif_int(key, data->data.uint16[irow], buffer);
break;
case BCIF_DATA_INT32:
set_value_from_bcif_int(key, data->data.int32[irow], buffer);
break;
default:
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"Unhandled data type %d", data->type);
break;
}
}
/* Send the data for one category row to the callback */
static bool process_bcif_row(struct ihm_reader *reader,
struct bcif_category *cat,
struct ihm_category *ihm_cat,
size_t irow, struct ihm_error **err)
{
struct bcif_column *col;
for (col = cat->first_column; col; col = col->next) {
if (!col->keyword) continue;
if (col->mask_data.type == BCIF_DATA_UINT8
&& col->mask_data.data.uint8[irow] == 1) {
set_omitted_value(col->keyword);
} else if (col->mask_data.type == BCIF_DATA_UINT8
&& col->mask_data.data.uint8[irow] == 2) {
set_unknown_value(col->keyword);
} else {
set_value_from_data(reader, ihm_cat, col->keyword, &col->data, irow,
col->str, err);
if (*err) return false;
}
}
call_category(reader, ihm_cat, true, err);
if (*err) return false;
return true;
}
/* Check a read-in category, and send out the data via callbacks */
static bool process_bcif_category(struct ihm_reader *reader,
struct bcif_category *cat,
struct ihm_category *ihm_cat,
struct ihm_error **err)
{
struct bcif_column *col;
size_t i, n_rows = 0;
if (!ihm_cat) {
if (reader->unknown_category_callback) {
(*reader->unknown_category_callback)(
reader, cat->name, 0, reader->unknown_category_data, err);
if (*err) return false;
}
return true;
}
if (!check_bcif_columns(reader, cat, ihm_cat, err)) return false;
for (col = cat->first_column; col; col = col->next) {
if (!col->keyword) continue;
if (!process_column_data(col, err)
|| !process_column_mask(col, err)) return false;
/* Make buffer for value as a string; should be long enough to
store any int or double */
col->str = (char *)ihm_malloc(80);
if (n_rows == 0) {
n_rows = col->data.size;
} else if (col->data.size != n_rows) {
ihm_error_set(err, IHM_ERROR_FILE_FORMAT,
"Column size mismatch %d != %d in category %s",
col->data.size, n_rows, cat->name);
return false;
}
}
for (i = 0; i < n_rows; ++i) {
if (!process_bcif_row(reader, cat, ihm_cat, i, err)) return false;
}
if (ihm_cat->finalize_callback) {
(*ihm_cat->finalize_callback)(reader, reader->linenum, ihm_cat->data, err);
if (*err) return false;
}
return true;
}
/* Read all categories from a BinaryCIF file */
static bool read_bcif_categories(struct ihm_reader *reader,
struct ihm_error **err)
{
uint32_t ncat, icat;
if (!read_bcif_array(reader, &ncat, err)) return false;
for (icat = 0; icat < ncat; ++icat) {
struct bcif_category cat;
struct ihm_category *ihm_cat;
bcif_category_init(&cat);
if (!read_bcif_category(reader, &cat, &ihm_cat, err)
|| !process_bcif_category(reader, &cat, ihm_cat, err)) {
bcif_category_free(&cat);
return false;
} else {
bcif_category_free(&cat);
}
}
return true;
}
/* Read the next data block from a BinaryCIF file */
static bool read_bcif_block(struct ihm_reader *reader, struct ihm_error **err)
{
uint32_t map_size, i;
if (!read_bcif_map(reader, &map_size, err)) return false;
for (i = 0; i < map_size; ++i) {
bool match;
if (!read_bcif_exact_string(reader, "categories", &match,
err)) return false;
if (match) {
if (!read_bcif_categories(reader, err)) return false;
} else {
if (!skip_bcif_object(reader, err)) return false;
}
}
reader->num_blocks_left--;
return true;
}
/* Read an entire BinaryCIF file. */
static bool read_bcif_file(struct ihm_reader *reader, bool *more_data,
struct ihm_error **err)
{
*more_data = false;
sort_mappings(reader);
if (reader->num_blocks_left == -1) {
cmp_init(&reader->cmp, reader, bcif_cmp_read, bcif_cmp_skip, NULL);
if (!read_bcif_header(reader, err)) return false;
}
if (reader->num_blocks_left > 0) {
if (!read_bcif_block(reader, err)) return false;
}
*more_data = (reader->num_blocks_left > 0);
return true;
}
/* Read an entire mmCIF or BinaryCIF file. */
bool ihm_read_file(struct ihm_reader *reader, bool *more_data,
struct ihm_error **err)
{
if (reader->binary) {
return read_bcif_file(reader, more_data, err);
} else {
return read_mmcif_file(reader, more_data, err);
}
}
python-ihm-2.7/src/ihm_format.h 0000664 0000000 0000000 00000017615 15035733372 0016565 0 ustar 00root root 0000000 0000000 /** \file ihm_format.h Routines for handling mmCIF or BinaryCIF format files.
*
* The file is read sequentially. All values for desired keywords in
* desired categories are collected (other parts of the file are ignored).
*
* For mmCIF, at the end of the file and each save frame a callback function
* for each category is called to process the data. In the case of mmCIF
* loops, this callback will be called multiple times, once for each entry
* in the loop.
*
* For BinaryCIF, the category callback will be called as each category
* is encountered in the file, once per row.
*/
#ifndef IHM_FORMAT_H
#define IHM_FORMAT_H
#include /* For size_t */
#if defined(_MSC_VER)
#include
typedef SSIZE_T ssize_t;
#if _MSC_VER > 1800
#include /* For bool */
#else
typedef int bool;
#define true 1
#define false 0
#endif
#else
#include /* For bool */
#include /* For ssize_t */
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* IHM error types */
typedef enum {
IHM_ERROR_VALUE, /* Bad value */
IHM_ERROR_IO, /* Input/output error */
IHM_ERROR_FILE_FORMAT, /* File format error */
} IHMErrorCode;
/* Error reported by IHM functions. The caller is responsible for freeing
the memory used by this struct by calling ihm_error_free(). */
struct ihm_error {
/* The type of error */
IHMErrorCode code;
/* Human-readable error message */
char *msg;
};
/* Free the memory used by an ihm_error */
void ihm_error_free(struct ihm_error *err);
/* Set the error indicator */
void ihm_error_set(struct ihm_error **err, IHMErrorCode code,
const char *format, ...);
#ifndef SWIG
typedef enum {
IHM_STRING = 1,
IHM_INT,
IHM_FLOAT,
IHM_BOOL
} ihm_keyword_type;
/* A keyword in an mmCIF or BinaryCIF file. Holds a description of its
format and any value read from the file. */
struct ihm_keyword {
char *name;
/* Type of value (string, int, float) */
ihm_keyword_type type;
/* Last value read from the file */
union {
char *str;
int ival;
double fval;
bool bval;
} data;
/* If true, we own the memory for data */
bool own_data;
/* true iff this keyword is in the file (not necessarily with a value) */
bool in_file;
/* true iff the keyword is in the file but the value is omitted ('.') */
bool omitted;
/* true iff the keyword is in the file but the value is unknown ('?') */
bool unknown;
};
#endif
/* Opaque types */
struct ihm_reader;
struct ihm_category;
/* Callback for mmCIF/BinaryCIF category data. Should set err on failure */
typedef void (*ihm_category_callback)(struct ihm_reader *reader, int linenum,
void *data, struct ihm_error **err);
/* Callback for unknown mmCIF/BinaryCIF categories. Should set err on failure */
typedef void (*ihm_unknown_category_callback)(struct ihm_reader *reader,
const char *category, int linenum,
void *data,
struct ihm_error **err);
/* Callback for unknown mmCIF/BinaryCIF keywords. Should set err on failure */
typedef void (*ihm_unknown_keyword_callback)(struct ihm_reader *reader,
const char *category,
const char *keyword, int linenum,
void *data,
struct ihm_error **err);
/* Callback to free arbitrary data */
typedef void (*ihm_free_callback)(void *data);
/* Make a new struct ihm_category and add it to the reader. */
struct ihm_category *ihm_category_new(struct ihm_reader *reader,
const char *name,
ihm_category_callback data_callback,
ihm_category_callback end_frame_callback,
ihm_category_callback finalize_callback,
void *data, ihm_free_callback free_func);
/* Set a callback for unknown categories.
The given callback is called whenever a category is encountered in the
file that is not handled (by ihm_category_new).
*/
void ihm_reader_unknown_category_callback_set(struct ihm_reader *reader,
ihm_unknown_category_callback callback,
void *data, ihm_free_callback free_func);
/* Set a callback for unknown keywords.
The given callback is called whenever a keyword is encountered in the
file that is not handled (within a category that is handled by
ihm_category_new).
*/
void ihm_reader_unknown_keyword_callback_set(struct ihm_reader *reader,
ihm_unknown_keyword_callback callback,
void *data, ihm_free_callback free_func);
/* Remove all categories from the reader.
This also removes any unknown category or keyword callbacks.
*/
void ihm_reader_remove_all_categories(struct ihm_reader *reader);
/* Add a new integer ihm_keyword to a category. */
struct ihm_keyword *ihm_keyword_int_new(struct ihm_category *category,
const char *name);
/* Add a new floating-point ihm_keyword to a category. */
struct ihm_keyword *ihm_keyword_float_new(struct ihm_category *category,
const char *name);
/* Add a new boolean ihm_keyword to a category. */
struct ihm_keyword *ihm_keyword_bool_new(struct ihm_category *category,
const char *name);
/* Add a new string ihm_keyword to a category. */
struct ihm_keyword *ihm_keyword_str_new(struct ihm_category *category,
const char *name);
struct ihm_file;
struct ihm_string;
/* Read data into the ihm_file buffer.
Return the number of bytes read (0 on EOF), or -1 (and sets err) on failure.
*/
typedef ssize_t (*ihm_file_read_callback)(char *buffer, size_t buffer_len,
void *data, struct ihm_error **err);
/* Track a file (or filelike object) that the data is read from */
struct ihm_file {
/* Raw data read from the file */
struct ihm_string *buffer;
/* Offset into buffer of the start of the current line */
size_t line_start;
/* Offset into buffer of the start of the next line, or line_start if the
line hasn't been read yet */
size_t next_line_start;
/* Callback function to read more data into buffer */
ihm_file_read_callback read_callback;
/* Data to pass to callback function */
void *data;
/* Function to free callback_data (or NULL) */
ihm_free_callback free_func;
};
/* Make a new ihm_file, used to handle reading data from a file.
`read_callback` is used to read a chunk of data from the file;
`data` is arbitrary data that is passed to the read callback;
`free_func` is used to do any necessary cleanup of `data` when
the ihm_file structure is freed. */
struct ihm_file *ihm_file_new(ihm_file_read_callback read_callback,
void *data, ihm_free_callback free_func);
/* Make a new ihm_file that will read data from the given file descriptor */
struct ihm_file *ihm_file_new_from_fd(int fd);
/* Make a new struct ihm_reader.
To read an mmCIF file, set binary=false; to read BinaryCIF, set binary=true.
*/
struct ihm_reader *ihm_reader_new(struct ihm_file *fh, bool binary);
/* Free memory used by a struct ihm_reader.
Note that this does not close the
underlying file descriptor or object that is wrapped by ihm_file. */
void ihm_reader_free(struct ihm_reader *reader);
/* Read a data block from an mmCIF or BinaryCIF file.
*more_data is set true iff more data blocks are available after this one.
Return false and set err on error. */
bool ihm_read_file(struct ihm_reader *reader, bool *more_data,
struct ihm_error **err);
#ifdef __cplusplus
}
#endif
#endif /* IHM_FORMAT_H */
python-ihm-2.7/src/ihm_format.i 0000664 0000000 0000000 00000047063 15035733372 0016566 0 ustar 00root root 0000000 0000000 %module _format
%{
#include
#include "ihm_format.h"
%}
/* Get simple return values */
%apply bool *OUTPUT { bool * };
%ignore ihm_keyword;
%ignore ihm_error_set;
/* Convert ihm_error to a Python exception */
%init {
file_format_error = PyErr_NewException("_format.FileFormatError", NULL, NULL);
Py_INCREF(file_format_error);
PyModule_AddObject(m, "FileFormatError", file_format_error);
}
%{
static PyObject *file_format_error;
static void handle_error(struct ihm_error *err)
{
PyObject *py_err_type = PyExc_IOError;
switch(err->code) {
case IHM_ERROR_FILE_FORMAT:
py_err_type = file_format_error;
break;
case IHM_ERROR_VALUE:
py_err_type = PyExc_ValueError;
break;
case IHM_ERROR_IO:
py_err_type = PyExc_IOError;
break;
}
/* Don't overwrite a Python exception already raised (e.g. by a callback) */
if (!PyErr_Occurred()) {
PyErr_SetString(py_err_type, err->msg);
}
ihm_error_free(err);
}
%}
%typemap(in, numinputs=0) struct ihm_error **err (struct ihm_error *temp) {
temp = NULL;
$1 = &temp;
}
%typemap(argout) struct ihm_error **err {
if (*$1) {
handle_error(*$1);
Py_DECREF(resultobj);
SWIG_fail;
}
}
%{
/* Read data from a Python filelike object, in text mode */
static ssize_t pyfile_text_read_callback(char *buffer, size_t buffer_len,
void *data, struct ihm_error **err)
{
Py_ssize_t read_len;
char *read_str;
static char fmt[] = "(n)";
PyObject *bytes = NULL;
PyObject *read_method = data;
/* Note that we can read up to `buffer_len` *bytes*, but Python's read()
can return Unicode *characters*. One Unicode character can require up to
4 bytes to be represented with UTF-8, so limit the read accordingly.
(mmCIF files are supposed to be ASCII but we should be liberal in what
we accept.) */
PyObject *result = PyObject_CallFunction(read_method, fmt, buffer_len / 4);
if (!result) {
ihm_error_set(err, IHM_ERROR_VALUE, "read failed");
return -1;
}
if (PyUnicode_Check(result)) {
/* This returns const char * on Python 3.7 or later */
if (!(read_str = (char *)PyUnicode_AsUTF8AndSize(result, &read_len))) {
ihm_error_set(err, IHM_ERROR_VALUE, "string creation failed");
Py_DECREF(result);
return -1;
}
} else if (PyBytes_Check(result)) {
char *bytes_buffer;
Py_ssize_t bytes_length;
bytes = result;
/* Convert to Unicode. Since we don't know the encoding, choose something
permissive (latin-1). mmCIF files are supposed to be ASCII anyway. */
if (PyBytes_AsStringAndSize(bytes, &bytes_buffer, &bytes_length) < 0) {
Py_DECREF(bytes);
ihm_error_set(err, IHM_ERROR_VALUE, "PyBytes_AsStringAndSize failed");
return -1;
}
result = PyUnicode_DecodeLatin1(bytes_buffer, bytes_length, NULL);
Py_DECREF(bytes);
if (!result) {
ihm_error_set(err, IHM_ERROR_VALUE, "latin1 string creation failed");
return -1;
}
/* This returns const char * on Python 3.7 or later */
if (!(read_str = (char *)PyUnicode_AsUTF8AndSize(result, &read_len))) {
ihm_error_set(err, IHM_ERROR_VALUE, "string creation failed");
Py_DECREF(result);
return -1;
}
} else {
ihm_error_set(err, IHM_ERROR_VALUE, "read method should return a string");
Py_DECREF(result);
return -1;
}
if (read_len > buffer_len) {
ihm_error_set(err, IHM_ERROR_VALUE,
"Python read method returned too many bytes");
Py_DECREF(result);
return -1;
}
memcpy(buffer, read_str, read_len);
Py_DECREF(result);
return read_len;
}
/* Read data from a Python filelike object, in binary mode */
static ssize_t pyfile_binary_read_callback(char *buffer, size_t buffer_len,
void *data, struct ihm_error **err)
{
Py_ssize_t read_len;
char *read_str;
static char fmt[] = "(n)";
PyObject *read_method = data;
PyObject *result = PyObject_CallFunction(read_method, fmt, buffer_len);
if (!result) {
ihm_error_set(err, IHM_ERROR_VALUE, "read failed");
return -1;
}
if (PyBytes_Check(result)) {
if (PyBytes_AsStringAndSize(result, &read_str, &read_len) < 0) {
ihm_error_set(err, IHM_ERROR_VALUE, "PyBytes_AsStringAndSize failed");
return -1;
}
} else {
ihm_error_set(err, IHM_ERROR_VALUE, "read method should return bytes");
Py_DECREF(result);
return -1;
}
if (read_len > buffer_len) {
ihm_error_set(err, IHM_ERROR_VALUE,
"Python read method returned too many bytes");
Py_DECREF(result);
return -1;
}
memcpy(buffer, read_str, read_len);
Py_DECREF(result);
return read_len;
}
/* Read data from a Python filelike object directly into the buffer */
static ssize_t pyfile_binary_readinto_callback(
char *buffer, size_t buffer_len,
void *data, struct ihm_error **err)
{
PyObject *readinto_method = data;
PyObject *memview, *result;
Py_ssize_t read_len;
memview = PyMemoryView_FromMemory(buffer, buffer_len, PyBUF_WRITE);
result = PyObject_CallFunctionObjArgs(readinto_method, memview, NULL);
Py_DECREF(memview);
if (!result) {
ihm_error_set(err, IHM_ERROR_VALUE, "Python readinto failed");
return -1;
}
if (!PyLong_Check(result)) {
ihm_error_set(err, IHM_ERROR_VALUE, "Python readinto did not return int");
Py_DECREF(result);
return -1;
}
if ((read_len = PyLong_AsSsize_t(result)) == -1 && PyErr_Occurred()) {
ihm_error_set(err, IHM_ERROR_VALUE, "Python readinto bad return");
Py_DECREF(result);
return -1;
}
Py_DECREF(result);
if (read_len > buffer_len) {
ihm_error_set(err, IHM_ERROR_VALUE,
"Python readinto method returned too many bytes");
return -1;
} else {
return read_len;
}
}
static void pyfile_free(void *data)
{
PyObject *read_method = data;
Py_DECREF(read_method);
}
static PyObject *get_optional_attr_str(PyObject *obj, const char *attr) {
PyObject *method = PyObject_GetAttrString(obj, attr);
if (!method) {
PyErr_Clear();
}
return method;
}
%}
%inline %{
/* Wrap a Python file object as an ihm_file */
struct ihm_file *ihm_file_new_from_python(PyObject *pyfile, bool binary,
struct ihm_error **err)
{
PyObject *read_method;
ihm_file_read_callback read_callback;
read_callback = binary ? pyfile_binary_read_callback
: pyfile_text_read_callback;
/* In binary mode, we can avoid a copy if the object supports readinto() */
if (binary && (read_method = get_optional_attr_str(pyfile, "readinto"))) {
read_callback = pyfile_binary_readinto_callback;
} else {
/* Look for a read() method and use that to read data */
if (!(read_method = PyObject_GetAttrString(pyfile, "read"))) {
ihm_error_set(err, IHM_ERROR_VALUE, "no read method");
return NULL;
}
}
return ihm_file_new(read_callback, read_method, pyfile_free);
}
%}
%{
struct category_handler_data {
/* The Python callable object that is given the data */
PyObject *callable;
/* Python value used for keywords not in the file (usually None) */
PyObject *not_in_file;
/* Python value used for keywords marked as omitted, '.' (usually None) */
PyObject *omitted;
/* Python value used for keywords marked as unknown, '?' (usually "?") */
PyObject *unknown;
/* The number of keywords in the category that we extract from the file */
int num_keywords;
/* Array of the keywords */
struct ihm_keyword **keywords;
};
static void category_handler_data_free(void *data)
{
struct category_handler_data *hd = data;
Py_DECREF(hd->callable);
Py_XDECREF(hd->not_in_file);
Py_XDECREF(hd->omitted);
Py_XDECREF(hd->unknown);
/* Don't need to free each hd->keywords[i] as the ihm_reader owns
these pointers */
free(hd->keywords);
free(hd);
}
/* Called for each category (or loop construct data line) with data */
static void handle_category_data(struct ihm_reader *reader, int linenum,
void *data, struct ihm_error **err)
{
int i;
struct category_handler_data *hd = data;
struct ihm_keyword **keys;
PyObject *ret, *tuple;
/* make a tuple of the data */
tuple = PyTuple_New(hd->num_keywords);
if (!tuple) {
ihm_error_set(err, IHM_ERROR_VALUE, "tuple creation failed");
return;
}
for (i = 0, keys = hd->keywords; i < hd->num_keywords; ++i, ++keys) {
PyObject *val;
if (!(*keys)->in_file) {
val = hd->not_in_file;
Py_INCREF(val);
} else if ((*keys)->omitted) {
val = hd->omitted;
Py_INCREF(val);
} else if ((*keys)->unknown) {
val = hd->unknown;
Py_INCREF(val);
} else {
switch((*keys)->type) {
case IHM_STRING:
val = PyUnicode_FromString((*keys)->data.str);
if (!val) {
ihm_error_set(err, IHM_ERROR_VALUE, "string creation failed");
Py_DECREF(tuple);
return;
}
break;
case IHM_INT:
val = PyLong_FromLong((*keys)->data.ival);
break;
case IHM_FLOAT:
val = PyFloat_FromDouble((*keys)->data.fval);
break;
case IHM_BOOL:
val = (*keys)->data.bval ? Py_True : Py_False;
Py_INCREF(val);
break;
}
}
/* Steals ref to val */
PyTuple_SET_ITEM(tuple, i, val);
}
/* pass the data to Python */
ret = PyObject_CallObject(hd->callable, tuple);
Py_DECREF(tuple);
if (ret) {
Py_DECREF(ret); /* discard return value */
} else {
/* Pass Python exception back to the original caller */
ihm_error_set(err, IHM_ERROR_VALUE, "Python error");
}
}
/* Called at the end of each save frame for each category */
static void end_frame_category(struct ihm_reader *reader, int linenum,
void *data, struct ihm_error **err)
{
PyObject *ret;
struct category_handler_data *hd = data;
ret = PyObject_CallMethod(hd->callable, "end_save_frame", NULL);
if (ret) {
Py_DECREF(ret); /* discard return value */
} else {
/* Pass Python exception back to the original caller */
ihm_error_set(err, IHM_ERROR_VALUE, "Python error");
}
}
static struct category_handler_data *do_add_handler(
struct ihm_reader *reader, char *name,
PyObject *keywords, PyObject *int_keywords,
PyObject *float_keywords,
PyObject *bool_keywords, PyObject *callable,
ihm_category_callback data_callback,
ihm_category_callback end_frame_callback,
ihm_category_callback finalize_callback,
struct ihm_error **err)
{
Py_ssize_t seqlen, i;
struct ihm_category *category;
struct category_handler_data *hd;
if (!PySequence_Check(keywords)) {
ihm_error_set(err, IHM_ERROR_VALUE, "'keywords' should be a sequence");
return NULL;
}
if (!PyAnySet_Check(int_keywords)) {
ihm_error_set(err, IHM_ERROR_VALUE, "'int_keywords' should be a set");
return NULL;
}
if (!PyAnySet_Check(float_keywords)) {
ihm_error_set(err, IHM_ERROR_VALUE, "'float_keywords' should be a set");
return NULL;
}
if (!PyAnySet_Check(bool_keywords)) {
ihm_error_set(err, IHM_ERROR_VALUE, "'bool_keywords' should be a set");
return NULL;
}
if (!PyCallable_Check(callable)) {
ihm_error_set(err, IHM_ERROR_VALUE,
"'callable' should be a callable object");
return NULL;
}
seqlen = PySequence_Length(keywords);
hd = malloc(sizeof(struct category_handler_data));
Py_INCREF(callable);
hd->callable = callable;
hd->not_in_file = NULL;
hd->omitted = NULL;
hd->unknown = NULL;
hd->num_keywords = seqlen;
hd->keywords = malloc(sizeof(struct ihm_keyword *) * seqlen);
category = ihm_category_new(reader, name, data_callback, end_frame_callback,
finalize_callback, hd,
category_handler_data_free);
if (!(hd->not_in_file = PyObject_GetAttrString(callable, "not_in_file"))
|| !(hd->omitted = PyObject_GetAttrString(callable, "omitted"))
|| !(hd->unknown = PyObject_GetAttrString(callable, "unknown"))) {
ihm_error_set(err, IHM_ERROR_VALUE, "missing attribute");
return NULL;
}
for (i = 0; i < seqlen; ++i) {
const char *key_name;
PyObject *o = PySequence_GetItem(keywords, i);
if (PyUnicode_Check(o)) {
key_name = PyUnicode_AsUTF8(o);
if (PySet_Contains(int_keywords, o) == 1) {
hd->keywords[i] = ihm_keyword_int_new(category, key_name);
} else if (PySet_Contains(float_keywords, o) == 1) {
hd->keywords[i] = ihm_keyword_float_new(category, key_name);
} else if (PySet_Contains(bool_keywords, o) == 1) {
hd->keywords[i] = ihm_keyword_bool_new(category, key_name);
} else {
hd->keywords[i] = ihm_keyword_str_new(category, key_name);
}
Py_DECREF(o);
} else {
Py_XDECREF(o);
ihm_error_set(err, IHM_ERROR_VALUE,
"keywords[%ld] should be a string", i);
return NULL;
}
}
return hd;
}
/* Pass unknown category info to a Python callable */
static void unknown_category_python(struct ihm_reader *reader,
const char *category, int linenum,
void *data, struct ihm_error **err)
{
static char fmt[] = "(si)";
PyObject *callable = data;
PyObject *result = PyObject_CallFunction(callable, fmt, category, linenum);
if (!result) {
ihm_error_set(err, IHM_ERROR_VALUE, "Python error");
} else {
Py_DECREF(result);
}
}
/* Pass unknown keyword info to a Python callable */
static void unknown_keyword_python(struct ihm_reader *reader,
const char *category, const char *keyword,
int linenum, void *data,
struct ihm_error **err)
{
static char fmt[] = "(ssi)";
PyObject *callable = data;
PyObject *result = PyObject_CallFunction(callable, fmt, category, keyword,
linenum);
if (!result) {
ihm_error_set(err, IHM_ERROR_VALUE, "Python error");
} else {
Py_DECREF(result);
}
}
/* Treat data as a Python object, and decrease its refcount */
static void free_python_callable(void *data)
{
PyObject *obj = data;
Py_DECREF(obj);
}
%}
%inline %{
/* Add a handler for unknown categories */
void add_unknown_category_handler(struct ihm_reader *reader,
PyObject *callable, struct ihm_error **err)
{
if (!PyCallable_Check(callable)) {
ihm_error_set(err, IHM_ERROR_VALUE,
"'callable' should be a callable object");
return;
}
Py_INCREF(callable);
ihm_reader_unknown_category_callback_set(reader, unknown_category_python,
callable, free_python_callable);
}
/* Add a handler for unknown keywords */
void add_unknown_keyword_handler(struct ihm_reader *reader,
PyObject *callable, struct ihm_error **err)
{
if (!PyCallable_Check(callable)) {
ihm_error_set(err, IHM_ERROR_VALUE,
"'callable' should be a callable object");
return;
}
Py_INCREF(callable);
ihm_reader_unknown_keyword_callback_set(reader, unknown_keyword_python,
callable, free_python_callable);
}
/* Add a generic category handler which collects all specified keywords for
the given category and passes them to a Python callable */
void add_category_handler(struct ihm_reader *reader, char *name,
PyObject *keywords, PyObject *int_keywords,
PyObject *float_keywords, PyObject *bool_keywords,
PyObject *callable, struct ihm_error **err)
{
do_add_handler(reader, name, keywords, int_keywords, float_keywords,
bool_keywords, callable, handle_category_data,
end_frame_category, NULL, err);
}
%}
%{
/* Called for each _pdbx_poly_seq_scheme line */
static void handle_poly_seq_scheme_data(struct ihm_reader *reader, int linenum,
void *data, struct ihm_error **err)
{
int i, seq_id, pdb_seq_num, auth_seq_num;
char *seq_id_endptr, *pdb_seq_num_endptr, *auth_seq_num_endptr;
struct category_handler_data *hd = data;
struct ihm_keyword **keys;
/* If both asym_id (1st keyword) and pdb_strand_id (6th keyword) are
present, but different, call the Python handler */
if (hd->keywords[0]->in_file && hd->keywords[5]->in_file &&
!hd->keywords[0]->omitted && !hd->keywords[5]->omitted &&
!hd->keywords[0]->unknown && !hd->keywords[5]->unknown &&
strcmp(hd->keywords[0]->data.str, hd->keywords[5]->data.str) != 0) {
handle_category_data(reader, linenum, data, err);
return;
}
for (i = 0, keys = hd->keywords; i < 4; ++i, ++keys) {
/* Call Python handler if any of asym_id, seq_id, pdb_seq_num,
or auth_seq_num are missing */
if (!(*keys)->in_file || (*keys)->omitted || (*keys)->unknown) {
handle_category_data(reader, linenum, data, err);
return;
}
}
/* If seq_id (2nd keyword), pdb_seq_num (3rd keyword), and
auth_seq_num (4th keyword) are identical integers, and
pdb_ins_code (5th keyword) is blank or missing,
nothing needs to be done */
seq_id = strtol(hd->keywords[1]->data.str, &seq_id_endptr, 10);
pdb_seq_num = strtol(hd->keywords[2]->data.str, &pdb_seq_num_endptr, 10);
auth_seq_num = strtol(hd->keywords[3]->data.str, &auth_seq_num_endptr, 10);
if (!*seq_id_endptr && !*pdb_seq_num_endptr && !*auth_seq_num_endptr
&& seq_id == pdb_seq_num && seq_id == auth_seq_num
&& (!hd->keywords[4]->in_file || hd->keywords[4]->omitted
|| hd->keywords[4]->unknown)) {
return;
} else {
/* Otherwise, call the normal handler */
handle_category_data(reader, linenum, data, err);
}
}
%}
%inline %{
/* Add a handler specifically for the _pdbx_poly_seq_scheme table.
This speeds up processing by skipping the callback to Python in
the common case where seq_id==pdb_seq_num==auth_seq_num,
asym_id==pdb_strand_id, and pdb_ins_code is blank */
void add_poly_seq_scheme_handler(struct ihm_reader *reader, char *name,
PyObject *keywords, PyObject *int_keywords,
PyObject *float_keywords,
PyObject *bool_keywords, PyObject *callable,
struct ihm_error **err)
{
struct category_handler_data *hd;
hd = do_add_handler(reader, name, keywords, int_keywords, float_keywords,
bool_keywords, callable, handle_poly_seq_scheme_data,
NULL, NULL, err);
if (hd) {
/* Make sure the Python handler and the C handler agree on the order
of the keywords */
assert(hd->num_keywords >= 6);
assert(strcmp(hd->keywords[1]->name, "seq_id") == 0);
assert(strcmp(hd->keywords[2]->name, "pdb_seq_num") == 0);
assert(strcmp(hd->keywords[3]->name, "auth_seq_num") == 0);
assert(strcmp(hd->keywords[4]->name, "pdb_ins_code") == 0);
assert(strcmp(hd->keywords[5]->name, "pdb_strand_id") == 0);
}
}
/* Test function so we can make sure finalize callbacks work */
void _test_finalize_callback(struct ihm_reader *reader, char *name,
PyObject *keywords, PyObject *int_keywords,
PyObject *float_keywords, PyObject *bool_keywords,
PyObject *callable, struct ihm_error **err)
{
do_add_handler(reader, name, keywords, int_keywords, float_keywords,
bool_keywords, callable, handle_category_data, NULL,
handle_category_data, err);
}
%}
%include "ihm_format.h"
python-ihm-2.7/test/ 0000775 0000000 0000000 00000000000 15035733372 0014445 5 ustar 00root root 0000000 0000000 python-ihm-2.7/test/input/ 0000775 0000000 0000000 00000000000 15035733372 0015604 5 ustar 00root root 0000000 0000000 python-ihm-2.7/test/input/15133C.pdb 0000664 0000000 0000000 00000000117 15035733372 0017051 0 ustar 00root root 0000000 0000000 ATOM 2 CA TYR A 7 -8.986 11.688 -5.817 1.00 91.82 C
python-ihm-2.7/test/input/6ep0.bcif.gz 0000664 0000000 0000000 00000325305 15035733372 0017632 0 ustar 00root root 0000000 0000000 ‹‰Î¡e 6ep0.bcif ì]{tÛÖyø”(Y¶c'v›&A–KbKæÃ–ã4kG“‰ˆ¢R–Ç-…X ”£îÑ,iþÜñI“;==é’8~Ûqk'n³4Ûɺӳ-©OÿèμõmggÛ9[³6v*?ÄÝ€ A|ƒ¦lÈò‚Àý~÷»ßý÷ûðüQŠ%¹Å¿ã˜‡wô¼ÈqŒ€ïx9ºHñͱo¸Ç|cîB$ö2y@xñ¹#¾xx<wŸ ‘Êr