14#ifndef MMTF_STRUCTURE_DATA_H
15#define MMTF_STRUCTURE_DATA_H
34#define MMTF_SPEC_VERSION_MAJOR 1
35#define MMTF_SPEC_VERSION_MINOR 1
115 for(
size_t i = 16; i--;) {
228 bool hasConsistentData(
bool verbose=
false, uint32_t chain_name_max_length = 4)
const;
237 std::string
print(std::string delim=
"\t")
const;
250 return !(*
this == c);
255 void copyMapData_(std::map<std::string, msgpack::object>& target,
256 const std::map<std::string, msgpack::object>& source);
280inline bool isDefaultValue(
const std::map<std::string, msgpack::object>& value);
298inline bool is_polymer(
const unsigned int chain_index,
299 const std::vector<Entity>& entity_list);
334inline bool is_hetatm(
const unsigned int chain_index,
335 const std::vector<Entity>& entity_list,
336 const GroupType& group_type);
348bool isValidDateFormatOptional(
const std::string& s) {
352 if (s.length() != 10)
return false;
354 if (s[4] !=
'-' || s[7] !=
'-')
return false;
356 std::istringstream is(s);
359 if (is >> y >> dash1 >> m >> dash2 >> d) {
360 return (dash1 ==
'-' && dash2 ==
'-');
368bool hasRightSizeOptional(
const std::vector<T>& v,
int exp_size) {
373template<
typename T,
typename Tnum>
374bool hasValidIndices(
const T* v,
size_t size, Tnum num) {
376 for (
size_t i = 0; i < size; ++i) {
377 if (v[i] < T(0) || v[i] >= tnum)
return false;
381template<
typename T,
typename Tnum>
382bool hasValidIndices(
const std::vector<T>& v, Tnum num) {
383 if (v.empty())
return true;
384 else return hasValidIndices(&v[0], v.size(), num);
392 std::stringstream version;
394 return version.str();
398 std::stringstream ss(version_string);
414 return value.empty();
418 return value.empty();
422 return value.empty();
433 const std::vector<Entity>& entity_list) {
434 for (std::size_t i = 0; i < entity_list.size(); ++i) {
435 if ( std::find(entity_list[i].chainIndexList.begin(),
436 entity_list[i].chainIndexList.end(),
438 != entity_list[i].chainIndexList.end()) {
439 return ( entity_list[i].type ==
"polymer"
440 || entity_list[i].type ==
"POLYMER");
443 std::stringstream err;
444 err <<
"'is_polymer' unable to find chain_index: " << chain_index
445 <<
" in entity list";
450 const char* hetatm_type[] = {
451 "D-BETA-PEPTIDE, C-GAMMA LINKING",
452 "D-GAMMA-PEPTIDE, C-DELTA LINKING",
453 "D-PEPTIDE COOH CARBOXY TERMINUS",
454 "D-PEPTIDE NH3 AMINO TERMINUS",
457 "D-SACCHARIDE 1,4 AND 1,4 LINKING",
458 "D-SACCHARIDE 1,4 AND 1,6 LINKING",
459 "DNA OH 3 PRIME TERMINUS",
460 "DNA OH 5 PRIME TERMINUS",
464 "L-BETA-PEPTIDE, C-GAMMA LINKING",
465 "L-GAMMA-PEPTIDE, C-DELTA LINKING",
466 "L-PEPTIDE COOH CARBOXY TERMINUS",
467 "L-PEPTIDE NH3 AMINO TERMINUS",
470 "L-SACCHARIDE 1,4 AND 1,4 LINKING",
471 "L-SACCHARIDE 1,4 AND 1,6 LINKING",
472 "RNA OH 3 PRIME TERMINUS",
473 "RNA OH 5 PRIME TERMINUS",
481 for (
int i=0; hetatm_type[i]; ++i) {
482 if (strcmp(type,hetatm_type[i]) == 0)
return true;
488 const std::vector<Entity>& entity_list,
510 mmtfProducer =
"mmtf-cpp library (github.com/rcsb/mmtf-cpp)";
518 if (
this != &obj) copyAllData_(obj);
573 std::vector<int8_t> allowed_bond_orders;
574 allowed_bond_orders.push_back(-1);
575 allowed_bond_orders.push_back(1);
576 allowed_bond_orders.push_back(2);
577 allowed_bond_orders.push_back(3);
578 allowed_bond_orders.push_back(4);
581 if (!hasRightSizeOptional(
unitCell, 6)) {
583 std::cout <<
"inconsistent unitCell (unitCell length != 6)" << std::endl;
590 std::cout <<
"inconsistent depositionDate (does not match 'YYYY-MM-DD' "
591 "or empty)" << std::endl;
597 std::cout <<
"inconsistent releaseDate (does not match 'YYYY-MM-DD' "
598 "or empty)" << std::endl;
606 std::cout <<
"inconsistent ncsOperatorList idx: " << i <<
" found size: "
619 std::cout <<
"inconsistent BioAssemby transform i j: " << i
620 <<
" " << j << std::endl;
626 for (
size_t i = 0; i <
entityList.size(); ++i) {
630 std::cout <<
"inconsistent entity idx: " << i << std::endl;
636 for (
size_t i = 0; i <
groupList.size(); ++i) {
641 std::cout <<
"inconsistent group::atomNameList size at idx: "
648 std::cout <<
"inconsistent group::elementList size at idx: "
656 std::cout <<
"inconsistent group::bondAtomList size: " <<
657 g.
bondAtomList.size() <<
" != group::bondOrderList size(*2): " <<
663 if (std::find(allowed_bond_orders.begin(), allowed_bond_orders.end(),
666 std::cout <<
"Cannot have bond order of: " << (int)g.
bondOrderList[j]
667 <<
" allowed bond orders are: -1, 1, 2, 3 or 4. at idx: " << j << std::endl;
676 std::cout <<
"Cannot have bondResonanceList without both " <<
677 "bondOrderList and bondAtomList! at idx: " << i << std::endl;
683 std::cout <<
"inconsistent group::bondOrderSize size: " <<
684 g.
bondOrderList.size() <<
" != group::bondResonanceList size: " <<
691 std::cout <<
"inconsistent group::bondAtomList size: " <<
692 g.
bondAtomList.size() <<
" != group::bondResonanceList size(*2): " <<
701 std::cout <<
"group::bondResonanceList had a Resonance of: "
709 std::cout <<
"group::bondResonanceList had a Resonance of: "
711 << (
int)g.
bondOrderList[j] <<
" we require unknown bondOrders to have resonance"
720 std::cout <<
"inconsistent group::bondAtomList indices (not all in [0, "
721 << num_atoms - 1 <<
"]) at idx: " << i << std::endl;
730 std::cout <<
"inconsistent bondAtomList size: " <<
731 bondAtomList.size() <<
" != bondOrderList size(*2): " <<
737 if (std::find(allowed_bond_orders.begin(), allowed_bond_orders.end(),
740 std::cout <<
"Cannot have bond order of: " << (int)
bondOrderList[i]
741 <<
" allowed bond orders are: -1, 1, 2, 3 or 4. at idx: " << i << std::endl;
750 std::cout <<
"Cannot have bondResonanceList without both " <<
751 "bondOrderList and bondAtomList!" << std::endl;
757 std::cout <<
"inconsistent bondAtomList size: " <<
758 bondAtomList.size() <<
" != bondResonanceList size(*2): " <<
766 std::cout <<
"bondResonanceList had a Resonance of: "
774 std::cout <<
"bondResonanceList had a Resonance of: "
776 << (
int)
bondOrderList[i] <<
" we require unknown bondOrders to have resonance"
785 std::cout <<
"inconsistent bondAtomList indices (not all in [0, "
786 <<
numAtoms - 1 <<
"])" << std::endl;
793 std::cout <<
"inconsistent xCoordList size" << std::endl;
799 std::cout <<
"inconsistent yCoordList size" << std::endl;
805 std::cout <<
"inconsistent zCoordList size" << std::endl;
811 std::cout <<
"inconsistent bFactorList size" << std::endl;
817 std::cout <<
"inconsistent atomIdList size" << std::endl;
823 std::cout <<
"inconsistent altLocList size" << std::endl;
829 std::cout <<
"inconsistent occupancyList size" << std::endl;
835 std::cout <<
"inconsistent groupIdList size" << std::endl;
841 std::cout <<
"inconsistent groupTypeList size" << std::endl;
847 std::cout <<
"inconsistent secStructList size" << std::endl;
853 std::cout <<
"inconsistent insCodeList size" << std::endl;
859 std::cout <<
"inconsistent sequenceIndexList size" << std::endl;
865 std::cout <<
"inconsistent chainIdList size" << std::endl;
871 std::cout <<
"inconsistent chainNameList size" << std::endl;
877 std::cout <<
"inconsistent groupsPerChain size" << std::endl;
883 std::cout <<
"inconsistent chainsPerModel size" << std::endl;
890 std::cout <<
"inconsistent groupTypeList indices (not all in [0, "
891 <<
groupList.size() - 1 <<
"])" << std::endl;
896 std::vector<int32_t> sequenceIndexSize(
numChains);
897 for (
size_t i = 0; i <
entityList.size(); ++i) {
904 int bond_count_from_atom = 0;
905 int bond_count_from_order = 0;
906 int bond_count_from_resonance = 0;
907 bool all_bond_orderLists_are_default =
true;
908 bool all_bond_resonanceLists_are_default =
true;
909 bool all_bond_atomLists_are_default =
true;
911 all_bond_orderLists_are_default =
false;
915 all_bond_resonanceLists_are_default =
false;
919 all_bond_atomLists_are_default =
false;
926 for (
int model_idx = 0; model_idx <
numModels; ++model_idx) {
928 for (
int j = 0; j <
chainsPerModel[model_idx]; ++j, ++chain_idx) {
930 if (
chainIdList[chain_idx].size() > chain_name_max_length) {
932 std::cout <<
"inconsistent chainIdList size at chain_idx: "
933 << chain_idx <<
" size: "
939 &&
chainNameList[chain_idx].size() > chain_name_max_length) {
941 std::cout <<
"inconsistent chainNameList size at chain_idx:"
942 << chain_idx <<
" size: "
948 for (
int k = 0; k <
groupsPerChain[chain_idx]; ++k, ++group_idx) {
953 if (idx < -1 || idx >= sequenceIndexSize[chain_idx]) {
955 std::cout <<
"inconsistent sequenceIndexSize at"
956 " chain_idx: " << chain_idx << std::endl;
966 all_bond_orderLists_are_default =
false;
970 all_bond_resonanceLists_are_default =
false;
974 all_bond_atomLists_are_default =
false;
982 if (!all_bond_orderLists_are_default) {
983 if (bond_count_from_order !=
numBonds) {
985 std::cout <<
"inconsistent numBonds vs bond order count" << std::endl;
990 if (!all_bond_resonanceLists_are_default) {
991 if (bond_count_from_resonance !=
numBonds) {
993 std::cout <<
"inconsistent numBonds vs bond resonance count" << std::endl;
998 if (!all_bond_atomLists_are_default) {
999 if (bond_count_from_atom !=
numBonds) {
1001 std::cout <<
"inconsistent numBonds vs bond atom list count" << std::endl;
1008 std::cout <<
"inconsistent numChains" << std::endl;
1014 std::cout <<
"inconsistent numGroups size" << std::endl;
1020 std::cout <<
"inconsistent numAtoms size" << std::endl;
1029 std::ostringstream out;
1036 for (
int i = 0; i <
numModels; i++, modelIndex++) {
1038 for (
int j = 0; j <
chainsPerModel[modelIndex]; j++, chainIndex++) {
1040 for (
int k = 0; k <
groupsPerChain[chainIndex]; k++, groupIndex++) {
1045 for (
int l = 0; l < groupAtomCount; l++, atomIndex++) {
1048 out <<
"HETATM" << delim;
1050 out <<
"ATOM" << delim;
1053 out << std::setfill(
'0') << std::internal << std::setw(6) <<
1054 std::right <<
atomIdList[atomIndex] << delim;
1055 }
else out <<
"." << delim;
1062 out <<
"." << delim;
1064 }
else out <<
"." << delim;
1072 }
else out <<
"." << delim;
1079 out <<
"." << delim;
1080 else out << int(
insCodeList[groupIndex]) << delim;
1083 out << std::fixed << std::setprecision(3);
1091 }
else out <<
"." << delim;
1095 }
else out <<
"." << delim;
1107inline void StructureData::copyMapData_(
1108 std::map<std::string, msgpack::object>& target,
1109 const std::map<std::string, msgpack::object>& source) {
1111 std::map<std::string, msgpack::object>::const_iterator it;
1112 for (it = source.begin(); it != source.end(); ++it) {
1114 target[it->first] = tmp_object;
1118inline void StructureData::copyAllData_(
const StructureData& obj) {
Exception thrown when failing during decoding.
Definition errors.hpp:23
Definition binary_decoder.hpp:25
void setDefaultValue(T &value)
Set default value to given type.
Definition structure_data.hpp:426
bool isVersionSupported(const std::string &version_string)
Check if version is supported (minor revisions ok, major ones not)
Definition structure_data.hpp:397
T getDefaultValue()
Get default value for given type.
Definition structure_data.hpp:406
std::string getVersionString()
Get string representation of MMTF spec version implemented here.
Definition structure_data.hpp:391
bool is_hetatm(const char *type)
Check if group type consists of HETATM atoms.
Definition structure_data.hpp:449
bool isDefaultValue(const T &value)
Definition structure_data.hpp:409
bool is_polymer(const unsigned int chain_index, const std::vector< Entity > &entity_list)
Check if chain is a polymer chain.
Definition structure_data.hpp:432
Data store for the biological assembly annotation.
Definition structure_data.hpp:132
std::string name
Definition structure_data.hpp:134
bool operator==(BioAssembly const &c) const
Definition structure_data.hpp:136
std::vector< Transform > transformList
Definition structure_data.hpp:133
MSGPACK_DEFINE_MAP(transformList, name)
Entity type.
Definition structure_data.hpp:83
std::string type
Definition structure_data.hpp:86
std::string sequence
Definition structure_data.hpp:87
std::string description
Definition structure_data.hpp:85
std::vector< int32_t > chainIndexList
Definition structure_data.hpp:84
MSGPACK_DEFINE_MAP(chainIndexList, description, type, sequence)
bool operator==(Entity const &c) const
Definition structure_data.hpp:89
Group (residue) level data store.
Definition structure_data.hpp:53
char singleLetterCode
Definition structure_data.hpp:61
std::string chemCompType
Definition structure_data.hpp:62
std::vector< std::string > elementList
Definition structure_data.hpp:56
std::vector< int32_t > formalChargeList
Definition structure_data.hpp:54
std::vector< std::string > atomNameList
Definition structure_data.hpp:55
std::vector< int8_t > bondOrderList
Definition structure_data.hpp:58
std::vector< int8_t > bondResonanceList
Definition structure_data.hpp:59
std::string groupName
Definition structure_data.hpp:60
std::vector< int32_t > bondAtomList
Definition structure_data.hpp:57
bool operator==(GroupType const &c) const
Definition structure_data.hpp:64
Top level MMTF data container.
Definition structure_data.hpp:157
int32_t numBonds
Definition structure_data.hpp:173
std::string mmtfVersion
Definition structure_data.hpp:158
float rFree
Definition structure_data.hpp:171
std::vector< int32_t > sequenceIndexList
Definition structure_data.hpp:193
std::vector< char > altLocList
Definition structure_data.hpp:187
std::vector< int8_t > bondOrderList
Definition structure_data.hpp:180
std::string structureId
Definition structure_data.hpp:162
std::vector< int8_t > bondResonanceList
Definition structure_data.hpp:181
int32_t numGroups
Definition structure_data.hpp:175
std::vector< float > zCoordList
Definition structure_data.hpp:184
std::map< std::string, msgpack::object > chainProperties
Definition structure_data.hpp:202
int32_t numModels
Definition structure_data.hpp:177
StructureData & operator=(const StructureData &f)
Overload for assignment operator.
Definition structure_data.hpp:517
std::vector< float > xCoordList
Definition structure_data.hpp:182
std::map< std::string, msgpack::object > groupProperties
Definition structure_data.hpp:201
std::vector< int32_t > groupsPerChain
Definition structure_data.hpp:196
std::string print(std::string delim="\t") const
Read out the contents of mmtf::StructureData in a PDB-like fashion Columns are in order: ATOM/HETATM ...
Definition structure_data.hpp:1028
std::vector< float > bFactorList
Definition structure_data.hpp:185
StructureData()
Construct object with default values set.
Definition structure_data.hpp:497
std::vector< int32_t > atomIdList
Definition structure_data.hpp:186
int32_t numChains
Definition structure_data.hpp:176
std::vector< std::string > chainIdList
Definition structure_data.hpp:194
bool operator==(const StructureData &c) const
Compare two StructureData classes for equality.
Definition structure_data.hpp:522
std::string spaceGroup
Definition structure_data.hpp:161
std::vector< int32_t > groupIdList
Definition structure_data.hpp:189
std::string depositionDate
Definition structure_data.hpp:164
std::vector< int32_t > bondAtomList
Definition structure_data.hpp:179
std::map< std::string, msgpack::object > atomProperties
Definition structure_data.hpp:200
bool operator!=(const StructureData &c) const
Compare two StructureData classes for inequality.
Definition structure_data.hpp:249
std::vector< GroupType > groupList
Definition structure_data.hpp:178
std::map< std::string, msgpack::object > extraProperties
Definition structure_data.hpp:204
std::vector< char > insCodeList
Definition structure_data.hpp:192
bool hasConsistentData(bool verbose=false, uint32_t chain_name_max_length=4) const
Check consistency of structural data.
Definition structure_data.hpp:572
float rWork
Definition structure_data.hpp:172
float resolution
Definition structure_data.hpp:170
std::string mmtfProducer
Definition structure_data.hpp:159
std::vector< std::vector< float > > ncsOperatorList
Definition structure_data.hpp:166
msgpack::zone msgpack_zone
Definition structure_data.hpp:198
std::vector< int32_t > chainsPerModel
Definition structure_data.hpp:197
std::vector< int32_t > groupTypeList
Definition structure_data.hpp:190
std::vector< BioAssembly > bioAssemblyList
Definition structure_data.hpp:167
std::vector< std::string > chainNameList
Definition structure_data.hpp:195
std::string releaseDate
Definition structure_data.hpp:165
std::vector< float > unitCell
Definition structure_data.hpp:160
std::vector< int8_t > secStructList
Definition structure_data.hpp:191
std::vector< Entity > entityList
Definition structure_data.hpp:168
std::map< std::string, msgpack::object > bondProperties
Definition structure_data.hpp:199
int32_t numAtoms
Definition structure_data.hpp:174
std::vector< float > occupancyList
Definition structure_data.hpp:188
std::vector< float > yCoordList
Definition structure_data.hpp:183
std::map< std::string, msgpack::object > modelProperties
Definition structure_data.hpp:203
std::vector< std::string > experimentalMethods
Definition structure_data.hpp:169
std::string title
Definition structure_data.hpp:163
#define MMTF_SPEC_VERSION_MINOR
Definition structure_data.hpp:35
#define MMTF_SPEC_VERSION_MAJOR
MMTF spec version which this library implements.
Definition structure_data.hpp:34