MMTF-C++
The C++ language MMTF libraries
Loading...
Searching...
No Matches
structure_data.hpp
Go to the documentation of this file.
1// *************************************************************************
2//
3// Licensed under the MIT License (see accompanying LICENSE file).
4//
5// The authors of this code are: Gabriel Studer, Gerardo Tauriello,
6// Daniel Farrell.
7//
8// Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
9// Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
10// Gazal Kalyan, Alexander Rose.
11//
12// *************************************************************************
13
14#ifndef MMTF_STRUCTURE_DATA_H
15#define MMTF_STRUCTURE_DATA_H
16
17#include "errors.hpp"
18
19#include <string>
20#include <vector>
21#include <algorithm>
22#include <stdint.h>
23#include <sstream>
24#include <limits>
25#include <msgpack.hpp>
26#include <iostream>
27#include <iomanip>
28
29namespace mmtf {
30
34#define MMTF_SPEC_VERSION_MAJOR 1
35#define MMTF_SPEC_VERSION_MINOR 1
36
40inline std::string getVersionString();
41
46inline bool isVersionSupported(const std::string& version_string);
47
53struct GroupType { // NOTE: can't use MSGPACK_DEFINE_MAP due to char
54 std::vector<int32_t> formalChargeList;
55 std::vector<std::string> atomNameList;
56 std::vector<std::string> elementList;
57 std::vector<int32_t> bondAtomList;
58 std::vector<int8_t> bondOrderList;
59 std::vector<int8_t> bondResonanceList;
60 std::string groupName;
62 std::string chemCompType;
63
76};
77
83struct Entity {
84 std::vector<int32_t> chainIndexList;
85 std::string description;
86 std::string type;
87 std::string sequence;
88
89 bool operator==(Entity const & c) const {
90 return(
93 type == c.type &&
94 sequence == c.sequence);
95 }
96
100 type,
101 sequence);
102};
103
109struct Transform {
110 std::vector<int32_t> chainIndexList;
111 float matrix[16];
112
113 bool operator==(Transform const & c) const {
114 bool comp = true;
115 for(size_t i = 16; i--;) {
116 if ( matrix[i] != c.matrix[i] ) {
117 comp = false;
118 break;
119 }
120 }
121 return (chainIndexList == c.chainIndexList && comp);
122 }
123
125};
126
133 std::vector<Transform> transformList;
134 std::string name;
135
136 bool operator==(BioAssembly const & c) const {
137 return (
139 name == c.name);
140 }
141
143};
144
158 std::string mmtfVersion;
159 std::string mmtfProducer;
160 std::vector<float> unitCell;
161 std::string spaceGroup;
162 std::string structureId;
163 std::string title;
164 std::string depositionDate;
165 std::string releaseDate;
166 std::vector<std::vector<float> > ncsOperatorList;
167 std::vector<BioAssembly> bioAssemblyList;
168 std::vector<Entity> entityList;
169 std::vector<std::string> experimentalMethods;
171 float rFree;
172 float rWork;
173 int32_t numBonds;
174 int32_t numAtoms;
175 int32_t numGroups;
176 int32_t numChains;
177 int32_t numModels;
178 std::vector<GroupType> groupList;
179 std::vector<int32_t> bondAtomList;
180 std::vector<int8_t> bondOrderList;
181 std::vector<int8_t> bondResonanceList;
182 std::vector<float> xCoordList;
183 std::vector<float> yCoordList;
184 std::vector<float> zCoordList;
185 std::vector<float> bFactorList;
186 std::vector<int32_t> atomIdList;
187 std::vector<char> altLocList;
188 std::vector<float> occupancyList;
189 std::vector<int32_t> groupIdList;
190 std::vector<int32_t> groupTypeList;
191 std::vector<int8_t> secStructList;
192 std::vector<char> insCodeList;
193 std::vector<int32_t> sequenceIndexList;
194 std::vector<std::string> chainIdList;
195 std::vector<std::string> chainNameList;
196 std::vector<int32_t> groupsPerChain;
197 std::vector<int32_t> chainsPerModel;
198 mutable msgpack::zone msgpack_zone;
199 std::map<std::string, msgpack::object> bondProperties;
200 std::map<std::string, msgpack::object> atomProperties;
201 std::map<std::string, msgpack::object> groupProperties;
202 std::map<std::string, msgpack::object> chainProperties;
203 std::map<std::string, msgpack::object> modelProperties;
204 std::map<std::string, msgpack::object> extraProperties;
205
210
214 StructureData(const StructureData& obj);
215
220
228 bool hasConsistentData(bool verbose=false, uint32_t chain_name_max_length = 4) const;
229
237 std::string print(std::string delim="\t") const;
238
243 bool operator==(const StructureData& c) const;
244
249 bool operator!=(const StructureData& c) const {
250 return !(*this == c);
251 }
252
253private:
254 // Helper to copy map data
255 void copyMapData_(std::map<std::string, msgpack::object>& target,
256 const std::map<std::string, msgpack::object>& source);
257 // Helper to copy all data
258 void copyAllData_(const StructureData& obj);
259};
260
261
265template <typename T>
266inline T getDefaultValue();
267
268
273template <typename T>
274inline bool isDefaultValue(const T& value);
275template <typename T>
276inline bool isDefaultValue(const std::vector<T>& value);
277template <>
278inline bool isDefaultValue(const std::string& value);
279template <>
280inline bool isDefaultValue(const std::map<std::string, msgpack::object>& value);
281
282
287template <typename T>
288inline void setDefaultValue(T& value);
289
290
298inline bool is_polymer(const unsigned int chain_index,
299 const std::vector<Entity>& entity_list);
300
314inline bool is_hetatm(const char* type);
315
334inline bool is_hetatm(const unsigned int chain_index,
335 const std::vector<Entity>& entity_list,
336 const GroupType& group_type);
337
338
339// *************************************************************************
340// IMPLEMENTATION
341// *************************************************************************
342
343// helpers in anonymous namespace (only visible in this file)
344namespace {
345
346// check optional date string
347// -> either default or "YYYY-MM-DD" (only string format checked, not date)
348bool isValidDateFormatOptional(const std::string& s) {
349 // default?
350 if (isDefaultValue(s)) return true;
351 // check length
352 if (s.length() != 10) return false;
353 // check delimiters
354 if (s[4] != '-' || s[7] != '-') return false;
355 // check format
356 std::istringstream is(s);
357 int d, m, y;
358 char dash1, dash2;
359 if (is >> y >> dash1 >> m >> dash2 >> d) {
360 return (dash1 == '-' && dash2 == '-');
361 } else {
362 return false;
363 }
364}
365
366// check if optional vector has right size
367template<typename T>
368bool hasRightSizeOptional(const std::vector<T>& v, int exp_size) {
369 return (isDefaultValue(v) || (int)v.size() == exp_size);
370}
371
372// check if all indices in vector are in [0, num-1] (T = integer type)
373template<typename T, typename Tnum>
374bool hasValidIndices(const T* v, size_t size, Tnum num) {
375 T tnum = T(num);
376 for (size_t i = 0; i < size; ++i) {
377 if (v[i] < T(0) || v[i] >= tnum) return false;
378 }
379 return true;
380}
381template<typename T, typename Tnum>
382bool hasValidIndices(const std::vector<T>& v, Tnum num) {
383 if (v.empty()) return true;
384 else return hasValidIndices(&v[0], v.size(), num);
385}
386
387} // anon ns
388
389// VERSIONING
390
391inline std::string getVersionString() {
392 std::stringstream version;
394 return version.str();
395}
396
397inline bool isVersionSupported(const std::string& version_string) {
398 std::stringstream ss(version_string);
399 int major = -1;
400 return ((ss >> major) && (major <= MMTF_SPEC_VERSION_MAJOR));
401}
402
403// DEFAULT VALUES
404
405template <typename T>
406inline T getDefaultValue() { return std::numeric_limits<T>::max(); }
407
408template <typename T>
409inline bool isDefaultValue(const T& value) {
410 return (value == getDefaultValue<T>());
411}
412template <typename T>
413inline bool isDefaultValue(const std::vector<T>& value) {
414 return value.empty();
415}
416template <>
417inline bool isDefaultValue(const std::string& value) {
418 return value.empty();
419}
420template <>
421inline bool isDefaultValue(const std::map<std::string, msgpack::object>& value) {
422 return value.empty();
423}
424
425template <typename T>
426inline void setDefaultValue(T& value) {
427 value = getDefaultValue<T>();
428}
429
430// HELPERS
431
432bool is_polymer(const unsigned int chain_index,
433 const std::vector<Entity>& entity_list) {
434 for (std::size_t i = 0; i < entity_list.size(); ++i) {
435 if ( std::find(entity_list[i].chainIndexList.begin(),
436 entity_list[i].chainIndexList.end(),
437 chain_index)
438 != entity_list[i].chainIndexList.end()) {
439 return ( entity_list[i].type == "polymer"
440 || entity_list[i].type == "POLYMER");
441 }
442 }
443 std::stringstream err;
444 err << "'is_polymer' unable to find chain_index: " << chain_index
445 << " in entity list";
446 throw DecodeError(err.str());
447}
448
449bool is_hetatm(const char* type) {
450 const char* hetatm_type[] = {
451 "D-BETA-PEPTIDE, C-GAMMA LINKING",
452 "D-GAMMA-PEPTIDE, C-DELTA LINKING",
453 "D-PEPTIDE COOH CARBOXY TERMINUS",
454 "D-PEPTIDE NH3 AMINO TERMINUS",
455 "D-PEPTIDE LINKING",
456 "D-SACCHARIDE",
457 "D-SACCHARIDE 1,4 AND 1,4 LINKING",
458 "D-SACCHARIDE 1,4 AND 1,6 LINKING",
459 "DNA OH 3 PRIME TERMINUS",
460 "DNA OH 5 PRIME TERMINUS",
461 "DNA LINKING",
462 "L-DNA LINKING",
463 "L-RNA LINKING",
464 "L-BETA-PEPTIDE, C-GAMMA LINKING",
465 "L-GAMMA-PEPTIDE, C-DELTA LINKING",
466 "L-PEPTIDE COOH CARBOXY TERMINUS",
467 "L-PEPTIDE NH3 AMINO TERMINUS",
468 //"L-PEPTIDE LINKING", // All canonical L AA
469 "L-SACCHARIDE",
470 "L-SACCHARIDE 1,4 AND 1,4 LINKING",
471 "L-SACCHARIDE 1,4 AND 1,6 LINKING",
472 "RNA OH 3 PRIME TERMINUS",
473 "RNA OH 5 PRIME TERMINUS",
474 "RNA LINKING",
475 "NON-POLYMER",
476 "OTHER",
477 //"PEPTIDE LINKING", // GLY
478 "PEPTIDE-LIKE",
479 "SACCHARIDE",
480 0 };
481 for (int i=0; hetatm_type[i]; ++i) {
482 if (strcmp(type,hetatm_type[i]) == 0) return true;
483 }
484 return false;
485}
486
487bool is_hetatm(const unsigned int chain_index,
488 const std::vector<Entity>& entity_list,
489 const GroupType& group_type) {
490 return ( is_hetatm(group_type.chemCompType.c_str())
491 || !is_polymer(chain_index, entity_list));
492}
493
494
495// CLASS StructureData
496
498 // no need to do anything with strings and vectors
502 // numXX set to 0 to have consistent data
503 numBonds = 0;
504 numAtoms = 0;
505 numGroups = 0;
506 numChains = 0;
507 numModels = 0;
508 // set version and producer
510 mmtfProducer = "mmtf-cpp library (github.com/rcsb/mmtf-cpp)";
511}
512
514 copyAllData_(obj);
515}
516
518 if (this != &obj) copyAllData_(obj);
519 return *this;
520}
521
522inline bool StructureData::operator==(const StructureData& c) const {
523 return (
526 unitCell == c.unitCell &&
527 spaceGroup == c.spaceGroup &&
529 title == c.title &&
534 entityList == c.entityList &&
536 resolution == c.resolution &&
537 rFree == c.rFree &&
538 rWork == c.rWork &&
539 numBonds == c.numBonds &&
540 numAtoms == c.numAtoms &&
541 numGroups == c.numGroups &&
542 numChains == c.numChains &&
543 numModels == c.numModels &&
544 groupList == c.groupList &&
547 xCoordList == c.xCoordList &&
548 yCoordList == c.yCoordList &&
549 zCoordList == c.zCoordList &&
551 atomIdList == c.atomIdList &&
552 altLocList == c.altLocList &&
569}
570
571
572inline bool StructureData::hasConsistentData(bool verbose, uint32_t chain_name_max_length) const {
573 std::vector<int8_t> allowed_bond_orders;
574 allowed_bond_orders.push_back(-1);
575 allowed_bond_orders.push_back(1);
576 allowed_bond_orders.push_back(2);
577 allowed_bond_orders.push_back(3);
578 allowed_bond_orders.push_back(4);
579
580 // check unitCell: if given, must be of length 6
581 if (!hasRightSizeOptional(unitCell, 6)) {
582 if (verbose) {
583 std::cout << "inconsistent unitCell (unitCell length != 6)" << std::endl;
584 }
585 return false;
586 }
587 // check dates
588 if (!isValidDateFormatOptional(depositionDate)) {
589 if (verbose) {
590 std::cout << "inconsistent depositionDate (does not match 'YYYY-MM-DD' "
591 "or empty)" << std::endl;
592 }
593 return false;
594 }
595 if (!isValidDateFormatOptional(releaseDate)) {
596 if (verbose) {
597 std::cout << "inconsistent releaseDate (does not match 'YYYY-MM-DD' "
598 "or empty)" << std::endl;
599 }
600 return false;
601 }
602 // check ncsOperatorList: all elements must have length 16
603 for (size_t i = 0; i < ncsOperatorList.size(); ++i) {
604 if ((int)ncsOperatorList[i].size() != 16) {
605 if (verbose) {
606 std::cout << "inconsistent ncsOperatorList idx: " << i << " found size: "
607 << ncsOperatorList[i].size() << " != 16" << std::endl;
608 }
609 return false;
610 }
611 }
612 // check chain indices in bioAssembly-transforms and entities
613 for (size_t i = 0; i < bioAssemblyList.size(); ++i) {
614 const BioAssembly& ba = bioAssemblyList[i];
615 for (size_t j = 0; j < ba.transformList.size(); ++j) {
616 const Transform & t = ba.transformList[j];
617 if (!hasValidIndices(t.chainIndexList, numChains)) {
618 if (verbose) {
619 std::cout << "inconsistent BioAssemby transform i j: " << i
620 << " " << j << std::endl;
621 }
622 return false;
623 }
624 }
625 }
626 for (size_t i = 0; i < entityList.size(); ++i) {
627 const Entity& ent = entityList[i];
628 if (!hasValidIndices(ent.chainIndexList, numChains)) {
629 if (verbose) {
630 std::cout << "inconsistent entity idx: " << i << std::endl;
631 }
632 return false;
633 }
634 }
635 // check groups
636 for (size_t i = 0; i < groupList.size(); ++i) {
637 const GroupType& g = groupList[i];
638 const size_t num_atoms = g.formalChargeList.size();
639 if (g.atomNameList.size() != num_atoms) {
640 if (verbose) {
641 std::cout << "inconsistent group::atomNameList size at idx: "
642 << i << std::endl;
643 }
644 return false;
645 }
646 if (g.elementList.size() != num_atoms) {
647 if (verbose) {
648 std::cout << "inconsistent group::elementList size at idx: "
649 << i << std::endl;
650 }
651 return false;
652 }
654 if (g.bondAtomList.size() != g.bondOrderList.size() * 2) {
655 if (verbose) {
656 std::cout << "inconsistent group::bondAtomList size: " <<
657 g.bondAtomList.size() << " != group::bondOrderList size(*2): " <<
658 g.bondOrderList.size()*2 << " at idx: " << i << std::endl;
659 }
660 return false;
661 }
662 for (size_t j = 0; j < g.bondOrderList.size(); ++j) {
663 if (std::find(allowed_bond_orders.begin(), allowed_bond_orders.end(),
664 g.bondOrderList[j]) == allowed_bond_orders.end()) {
665 if (verbose) {
666 std::cout << "Cannot have bond order of: " << (int)g.bondOrderList[j]
667 << " allowed bond orders are: -1, 1, 2, 3 or 4. at idx: " << j << std::endl;
668 }
669 return false;
670 }
671 }
672 }
675 if (verbose) {
676 std::cout << "Cannot have bondResonanceList without both " <<
677 "bondOrderList and bondAtomList! at idx: " << i << std::endl;
678 }
679 return false;
680 }
681 if (g.bondOrderList.size() != g.bondResonanceList.size()) {
682 if (verbose) {
683 std::cout << "inconsistent group::bondOrderSize size: " <<
684 g.bondOrderList.size() << " != group::bondResonanceList size: " <<
685 g.bondResonanceList.size() << " at idx: " << i << std::endl;
686 }
687 return false;
688 }
689 if (g.bondAtomList.size() != g.bondResonanceList.size() * 2) {
690 if (verbose) {
691 std::cout << "inconsistent group::bondAtomList size: " <<
692 g.bondAtomList.size() << " != group::bondResonanceList size(*2): " <<
693 g.bondResonanceList.size()*2 << " at idx: " << i << std::endl;
694 }
695 return false;
696 }
697 // Check bond resonance matches
698 for (size_t j = 0; j < g.bondResonanceList.size(); ++j) {
699 if (g.bondResonanceList[j] < -1 || g.bondResonanceList[j] > 1) {
700 if (verbose) {
701 std::cout << "group::bondResonanceList had a Resonance of: "
702 << (int)g.bondResonanceList[j] << " and only -1, 0, or 1 are allowed"
703 << std::endl;
704 }
705 return false;
706 }
707 if (g.bondOrderList[j] == -1 && g.bondResonanceList[j] != 1) {
708 if (verbose) {
709 std::cout << "group::bondResonanceList had a Resonance of: "
710 << (int)g.bondResonanceList[j] << " and group::bondOrderList had an order of "
711 << (int)g.bondOrderList[j] << " we require unknown bondOrders to have resonance"
712 << std::endl;
713 }
714 return false;
715 }
716 }
717 }
718 if (!hasValidIndices(g.bondAtomList, num_atoms)) {
719 if (verbose) {
720 std::cout << "inconsistent group::bondAtomList indices (not all in [0, "
721 << num_atoms - 1 << "]) at idx: " << i << std::endl;
722 }
723 return false;
724 }
725 }
726 // check global bonds
728 if (bondAtomList.size() != bondOrderList.size() * 2) {
729 if (verbose) {
730 std::cout << "inconsistent bondAtomList size: " <<
731 bondAtomList.size() << " != bondOrderList size(*2): " <<
732 bondOrderList.size()*2 << std::endl;
733 }
734 return false;
735 }
736 for (size_t i = 0; i < bondOrderList.size(); ++i) {
737 if (std::find(allowed_bond_orders.begin(), allowed_bond_orders.end(),
738 bondOrderList[i]) == allowed_bond_orders.end()) {
739 if (verbose) {
740 std::cout << "Cannot have bond order of: " << (int)bondOrderList[i]
741 << " allowed bond orders are: -1, 1, 2, 3 or 4. at idx: " << i << std::endl;
742 }
743 return false;
744 }
745 }
746 }
749 if (verbose) {
750 std::cout << "Cannot have bondResonanceList without both " <<
751 "bondOrderList and bondAtomList!" << std::endl;
752 }
753 return false;
754 }
755 if (bondAtomList.size() != bondResonanceList.size() * 2) {
756 if (verbose) {
757 std::cout << "inconsistent bondAtomList size: " <<
758 bondAtomList.size() << " != bondResonanceList size(*2): " <<
759 bondResonanceList.size()*2 << std::endl;
760 }
761 return false;
762 }
763 for (size_t i = 0; i < bondResonanceList.size(); ++i) {
764 if (bondResonanceList[i] < -1 || bondResonanceList[i] > 1) {
765 if (verbose) {
766 std::cout << "bondResonanceList had a Resonance of: "
767 << (int)bondResonanceList[i] << " and only -1, 0, or 1 are allowed"
768 << std::endl;
769 }
770 return false;
771 }
772 if (bondOrderList[i] == -1 && bondResonanceList[i] != 1) {
773 if (verbose) {
774 std::cout << "bondResonanceList had a Resonance of: "
775 << (int)bondResonanceList[i] << " and bondOrderList had an order of "
776 << (int)bondOrderList[i] << " we require unknown bondOrders to have resonance"
777 << std::endl;
778 }
779 return false;
780 }
781 }
782 }
783 if (!hasValidIndices(bondAtomList, numAtoms)) {
784 if (verbose) {
785 std::cout << "inconsistent bondAtomList indices (not all in [0, "
786 << numAtoms - 1 << "])" << std::endl;
787 }
788 return false;
789 }
790 // check vector sizes
791 if ((int)xCoordList.size() != numAtoms) {
792 if (verbose) {
793 std::cout << "inconsistent xCoordList size" << std::endl;
794 }
795 return false;
796 }
797 if ((int)yCoordList.size() != numAtoms) {
798 if (verbose) {
799 std::cout << "inconsistent yCoordList size" << std::endl;
800 }
801 return false;
802 }
803 if ((int)zCoordList.size() != numAtoms) {
804 if (verbose) {
805 std::cout << "inconsistent zCoordList size" << std::endl;
806 }
807 return false;
808 }
809 if (!hasRightSizeOptional(bFactorList, numAtoms)) {
810 if (verbose) {
811 std::cout << "inconsistent bFactorList size" << std::endl;
812 }
813 return false;
814 }
815 if (!hasRightSizeOptional(atomIdList, numAtoms)) {
816 if (verbose) {
817 std::cout << "inconsistent atomIdList size" << std::endl;
818 }
819 return false;
820 }
821 if (!hasRightSizeOptional(altLocList, numAtoms)) {
822 if (verbose) {
823 std::cout << "inconsistent altLocList size" << std::endl;
824 }
825 return false;
826 }
827 if (!hasRightSizeOptional(occupancyList, numAtoms)) {
828 if (verbose) {
829 std::cout << "inconsistent occupancyList size" << std::endl;
830 }
831 return false;
832 }
833 if ((int)groupIdList.size() != numGroups) {
834 if (verbose) {
835 std::cout << "inconsistent groupIdList size" << std::endl;
836 }
837 return false;
838 }
839 if ((int)groupTypeList.size() != numGroups) {
840 if (verbose) {
841 std::cout << "inconsistent groupTypeList size" << std::endl;
842 }
843 return false;
844 }
845 if (!hasRightSizeOptional(secStructList, numGroups)) {
846 if (verbose) {
847 std::cout << "inconsistent secStructList size" << std::endl;
848 }
849 return false;
850 }
851 if (!hasRightSizeOptional(insCodeList, numGroups)) {
852 if (verbose) {
853 std::cout << "inconsistent insCodeList size" << std::endl;
854 }
855 return false;
856 }
857 if (!hasRightSizeOptional(sequenceIndexList, numGroups)) {
858 if (verbose) {
859 std::cout << "inconsistent sequenceIndexList size" << std::endl;
860 }
861 return false;
862 }
863 if ((int)chainIdList.size() != numChains) {
864 if (verbose) {
865 std::cout << "inconsistent chainIdList size" << std::endl;
866 }
867 return false;
868 }
869 if (!hasRightSizeOptional(chainNameList, numChains)) {
870 if (verbose) {
871 std::cout << "inconsistent chainNameList size" << std::endl;
872 }
873 return false;
874 }
875 if ((int)groupsPerChain.size() != numChains) {
876 if (verbose) {
877 std::cout << "inconsistent groupsPerChain size" << std::endl;
878 }
879 return false;
880 }
881 if ((int)chainsPerModel.size() != numModels) {
882 if (verbose) {
883 std::cout << "inconsistent chainsPerModel size" << std::endl;
884 }
885 return false;
886 }
887 // check indices
888 if (!hasValidIndices(groupTypeList, groupList.size())) {
889 if (verbose) {
890 std::cout << "inconsistent groupTypeList indices (not all in [0, "
891 << groupList.size() - 1 << "])" << std::endl;
892 }
893 return false;
894 }
895 // collect sequence lengths from entities and use to check
896 std::vector<int32_t> sequenceIndexSize(numChains);
897 for (size_t i = 0; i < entityList.size(); ++i) {
898 const Entity& ent = entityList[i];
899 for (size_t j = 0; j < ent.chainIndexList.size(); ++j) {
900 sequenceIndexSize[ent.chainIndexList[j]] = ent.sequence.length();
901 }
902 }
903 // traverse structure for more checks
904 int bond_count_from_atom = 0;
905 int bond_count_from_order = 0;
906 int bond_count_from_resonance = 0;
907 bool all_bond_orderLists_are_default = true;
908 bool all_bond_resonanceLists_are_default = true;
909 bool all_bond_atomLists_are_default = true;
911 all_bond_orderLists_are_default = false;
912 bond_count_from_order = bondOrderList.size();
913 }
915 all_bond_resonanceLists_are_default = false;
916 bond_count_from_resonance = bondOrderList.size();
917 }
919 all_bond_atomLists_are_default = false;
920 bond_count_from_atom = bondAtomList.size()/2;
921 }
922 int chain_idx = 0; // will be count at end of loop
923 int group_idx = 0; // will be count at end of loop
924 int atom_idx = 0; // will be count at end of loop
925 // traverse models
926 for (int model_idx = 0; model_idx < numModels; ++model_idx) {
927 // traverse chains
928 for (int j = 0; j < chainsPerModel[model_idx]; ++j, ++chain_idx) {
929 // check chain names (fixed length)
930 if (chainIdList[chain_idx].size() > chain_name_max_length) {
931 if (verbose) {
932 std::cout << "inconsistent chainIdList size at chain_idx: "
933 << chain_idx << " size: "
934 << chainIdList[chain_idx].size() << std::endl;
935 }
936 return false;
937 }
939 && chainNameList[chain_idx].size() > chain_name_max_length) {
940 if (verbose) {
941 std::cout << "inconsistent chainNameList size at chain_idx:"
942 << chain_idx << " size: "
943 << chainNameList[chain_idx].size() << std::endl;
944 }
945 return false;
946 }
947 // traverse groups
948 for (int k = 0; k < groupsPerChain[chain_idx]; ++k, ++group_idx) {
949 // check seq. idx
951 const int32_t idx = sequenceIndexList[group_idx];
952 // -1 is ok here
953 if (idx < -1 || idx >= sequenceIndexSize[chain_idx]) {
954 if (verbose) {
955 std::cout << "inconsistent sequenceIndexSize at"
956 " chain_idx: " << chain_idx << std::endl;
957 }
958 return false;
959 }
960 }
961 // count atoms
962 const GroupType& group = groupList[groupTypeList[group_idx]];
963 atom_idx += group.atomNameList.size();
964 // count bonds
965 if (!isDefaultValue(group.bondOrderList)) {
966 all_bond_orderLists_are_default = false;
967 bond_count_from_order += group.bondOrderList.size();
968 }
969 if (!isDefaultValue(group.bondResonanceList)) {
970 all_bond_resonanceLists_are_default = false;
971 bond_count_from_resonance += group.bondResonanceList.size();
972 }
973 if (!isDefaultValue(group.bondAtomList)) {
974 all_bond_atomLists_are_default = false;
975 bond_count_from_atom += group.bondAtomList.size()/2;
976 }
977
978 }
979 }
980 }
981 // check sizes
982 if (!all_bond_orderLists_are_default) {
983 if (bond_count_from_order != numBonds) {
984 if (verbose) {
985 std::cout << "inconsistent numBonds vs bond order count" << std::endl;
986 }
987 return false;
988 }
989 }
990 if (!all_bond_resonanceLists_are_default) {
991 if (bond_count_from_resonance != numBonds) {
992 if (verbose) {
993 std::cout << "inconsistent numBonds vs bond resonance count" << std::endl;
994 }
995 return false;
996 }
997 }
998 if (!all_bond_atomLists_are_default) {
999 if (bond_count_from_atom != numBonds) {
1000 if (verbose) {
1001 std::cout << "inconsistent numBonds vs bond atom list count" << std::endl;
1002 }
1003 return false;
1004 }
1005 }
1006 if (chain_idx != numChains) {
1007 if (verbose) {
1008 std::cout << "inconsistent numChains" << std::endl;
1009 }
1010 return false;
1011 }
1012 if (group_idx != numGroups) {
1013 if (verbose) {
1014 std::cout << "inconsistent numGroups size" << std::endl;
1015 }
1016 return false;
1017 }
1018 if (atom_idx != numAtoms) {
1019 if (verbose) {
1020 std::cout << "inconsistent numAtoms size" << std::endl;
1021 }
1022 return false;
1023 }
1024 // All looks good :)
1025 return true;
1026}
1027
1028inline std::string StructureData::print(std::string delim) const {
1029 std::ostringstream out;
1030 int modelIndex = 0;
1031 int chainIndex = 0;
1032 int groupIndex = 0;
1033 int atomIndex = 0;
1034
1035 // traverse models
1036 for (int i = 0; i < numModels; i++, modelIndex++) {
1037 // traverse chains
1038 for (int j = 0; j < chainsPerModel[modelIndex]; j++, chainIndex++) {
1039 // traverse groups
1040 for (int k = 0; k < groupsPerChain[chainIndex]; k++, groupIndex++) {
1041 const mmtf::GroupType& group =
1042 groupList[groupTypeList[groupIndex]];
1043 int groupAtomCount = group.atomNameList.size();
1044
1045 for (int l = 0; l < groupAtomCount; l++, atomIndex++) {
1046 // ATOM or HETATM
1047 if (is_hetatm(chainIndex, entityList, group))
1048 out << "HETATM" << delim;
1049 else
1050 out << "ATOM" << delim;
1051 // Atom serial
1053 out << std::setfill('0') << std::internal << std::setw(6) <<
1054 std::right << atomIdList[atomIndex] << delim;
1055 } else out << "." << delim;
1056 // Atom name
1057 out << group.atomNameList[l] << delim;
1058 // Alternate location
1060 if ( altLocList[atomIndex] == ' ' ||
1061 altLocList[atomIndex] == 0x00 )
1062 out << "." << delim;
1063 else out << altLocList[atomIndex] << delim;
1064 } else out << "." << delim;
1065 // Group name
1066 out << group.groupName << delim;
1067 // Chain
1068 out << chainIdList[chainIndex] << delim;
1070 out << chainNameList[chainIndex];
1071 out << delim;
1072 } else out << "." << delim;
1073 // Group serial
1074 out << groupIdList[groupIndex] << delim;
1075 // Insertion code
1077 if ( insCodeList[groupIndex] == ' ' ||
1078 insCodeList[groupIndex] == 0x00 )
1079 out << "." << delim;
1080 else out << int(insCodeList[groupIndex]) << delim;
1081 } else out << ". ";
1082 // x, y, z
1083 out << std::fixed << std::setprecision(3);
1084 out << xCoordList[atomIndex] << delim;
1085 out << yCoordList[atomIndex] << delim;
1086 out << zCoordList[atomIndex] << delim;
1087
1088 // B-factor
1090 out << bFactorList[atomIndex] << delim;
1091 } else out << "." << delim;
1092 // Occupancy
1094 out << occupancyList[atomIndex] << delim;
1095 } else out << "." << delim;
1096 // Element
1097 out << group.elementList[l] << delim;
1098 // Charge
1099 out << group.formalChargeList[l] << "\n";
1100 }
1101 }
1102 }
1103 }
1104 return out.str();
1105}
1106
1107inline void StructureData::copyMapData_(
1108 std::map<std::string, msgpack::object>& target,
1109 const std::map<std::string, msgpack::object>& source) {
1110 target.clear();
1111 std::map<std::string, msgpack::object>::const_iterator it;
1112 for (it = source.begin(); it != source.end(); ++it) {
1113 msgpack::object tmp_object(it->second, msgpack_zone);
1114 target[it->first] = tmp_object;
1115 }
1116}
1117
1118inline void StructureData::copyAllData_(const StructureData& obj) {
1119 mmtfVersion = obj.mmtfVersion;
1120 mmtfProducer = obj.mmtfProducer;
1121 unitCell = obj.unitCell;
1122 spaceGroup = obj.spaceGroup;
1123 structureId = obj.structureId;
1124 title = obj.title;
1125 depositionDate = obj.depositionDate;
1126 releaseDate = obj.releaseDate;
1127 ncsOperatorList = obj.ncsOperatorList;
1128 bioAssemblyList = obj.bioAssemblyList;
1129 entityList = obj.entityList;
1130 experimentalMethods = obj.experimentalMethods;
1131 resolution = obj.resolution;
1132 rFree = obj.rFree;
1133 rWork = obj.rWork;
1134 numBonds = obj.numBonds;
1135 numAtoms = obj.numAtoms;
1136 numGroups = obj.numGroups;
1137 numChains = obj.numChains;
1138 numModels = obj.numModels;
1139 groupList = obj.groupList;
1140 bondAtomList = obj.bondAtomList;
1141 bondOrderList = obj.bondOrderList;
1142 xCoordList = obj.xCoordList;
1143 yCoordList = obj.yCoordList;
1144 zCoordList = obj.zCoordList;
1145 bFactorList = obj.bFactorList;
1146 atomIdList = obj.atomIdList;
1147 altLocList = obj.altLocList;
1148 occupancyList = obj.occupancyList;
1149 groupIdList = obj.groupIdList;
1150 groupTypeList = obj.groupTypeList;
1151 secStructList = obj.secStructList;
1152 insCodeList = obj.insCodeList;
1153 sequenceIndexList = obj.sequenceIndexList;
1154 chainIdList = obj.chainIdList;
1155 chainNameList = obj.chainNameList;
1156 groupsPerChain = obj.groupsPerChain;
1157 chainsPerModel = obj.chainsPerModel;
1158 copyMapData_(bondProperties, obj.bondProperties);
1159 copyMapData_(atomProperties, obj.atomProperties);
1160 copyMapData_(groupProperties, obj.groupProperties);
1161 copyMapData_(chainProperties, obj.chainProperties);
1162 copyMapData_(modelProperties, obj.modelProperties);
1163 copyMapData_(extraProperties, obj.extraProperties);
1164}
1165
1166} // mmtf namespace
1167
1168#endif
1169
Exception thrown when failing during decoding.
Definition errors.hpp:23
Definition binary_decoder.hpp:25
void setDefaultValue(T &value)
Set default value to given type.
Definition structure_data.hpp:426
bool isVersionSupported(const std::string &version_string)
Check if version is supported (minor revisions ok, major ones not)
Definition structure_data.hpp:397
T getDefaultValue()
Get default value for given type.
Definition structure_data.hpp:406
std::string getVersionString()
Get string representation of MMTF spec version implemented here.
Definition structure_data.hpp:391
bool is_hetatm(const char *type)
Check if group type consists of HETATM atoms.
Definition structure_data.hpp:449
bool isDefaultValue(const T &value)
Definition structure_data.hpp:409
bool is_polymer(const unsigned int chain_index, const std::vector< Entity > &entity_list)
Check if chain is a polymer chain.
Definition structure_data.hpp:432
Data store for the biological assembly annotation.
Definition structure_data.hpp:132
std::string name
Definition structure_data.hpp:134
bool operator==(BioAssembly const &c) const
Definition structure_data.hpp:136
std::vector< Transform > transformList
Definition structure_data.hpp:133
MSGPACK_DEFINE_MAP(transformList, name)
Entity type.
Definition structure_data.hpp:83
std::string type
Definition structure_data.hpp:86
std::string sequence
Definition structure_data.hpp:87
std::string description
Definition structure_data.hpp:85
std::vector< int32_t > chainIndexList
Definition structure_data.hpp:84
MSGPACK_DEFINE_MAP(chainIndexList, description, type, sequence)
bool operator==(Entity const &c) const
Definition structure_data.hpp:89
Group (residue) level data store.
Definition structure_data.hpp:53
char singleLetterCode
Definition structure_data.hpp:61
std::string chemCompType
Definition structure_data.hpp:62
std::vector< std::string > elementList
Definition structure_data.hpp:56
std::vector< int32_t > formalChargeList
Definition structure_data.hpp:54
std::vector< std::string > atomNameList
Definition structure_data.hpp:55
std::vector< int8_t > bondOrderList
Definition structure_data.hpp:58
std::vector< int8_t > bondResonanceList
Definition structure_data.hpp:59
std::string groupName
Definition structure_data.hpp:60
std::vector< int32_t > bondAtomList
Definition structure_data.hpp:57
bool operator==(GroupType const &c) const
Definition structure_data.hpp:64
Top level MMTF data container.
Definition structure_data.hpp:157
int32_t numBonds
Definition structure_data.hpp:173
std::string mmtfVersion
Definition structure_data.hpp:158
float rFree
Definition structure_data.hpp:171
std::vector< int32_t > sequenceIndexList
Definition structure_data.hpp:193
std::vector< char > altLocList
Definition structure_data.hpp:187
std::vector< int8_t > bondOrderList
Definition structure_data.hpp:180
std::string structureId
Definition structure_data.hpp:162
std::vector< int8_t > bondResonanceList
Definition structure_data.hpp:181
int32_t numGroups
Definition structure_data.hpp:175
std::vector< float > zCoordList
Definition structure_data.hpp:184
std::map< std::string, msgpack::object > chainProperties
Definition structure_data.hpp:202
int32_t numModels
Definition structure_data.hpp:177
StructureData & operator=(const StructureData &f)
Overload for assignment operator.
Definition structure_data.hpp:517
std::vector< float > xCoordList
Definition structure_data.hpp:182
std::map< std::string, msgpack::object > groupProperties
Definition structure_data.hpp:201
std::vector< int32_t > groupsPerChain
Definition structure_data.hpp:196
std::string print(std::string delim="\t") const
Read out the contents of mmtf::StructureData in a PDB-like fashion Columns are in order: ATOM/HETATM ...
Definition structure_data.hpp:1028
std::vector< float > bFactorList
Definition structure_data.hpp:185
StructureData()
Construct object with default values set.
Definition structure_data.hpp:497
std::vector< int32_t > atomIdList
Definition structure_data.hpp:186
int32_t numChains
Definition structure_data.hpp:176
std::vector< std::string > chainIdList
Definition structure_data.hpp:194
bool operator==(const StructureData &c) const
Compare two StructureData classes for equality.
Definition structure_data.hpp:522
std::string spaceGroup
Definition structure_data.hpp:161
std::vector< int32_t > groupIdList
Definition structure_data.hpp:189
std::string depositionDate
Definition structure_data.hpp:164
std::vector< int32_t > bondAtomList
Definition structure_data.hpp:179
std::map< std::string, msgpack::object > atomProperties
Definition structure_data.hpp:200
bool operator!=(const StructureData &c) const
Compare two StructureData classes for inequality.
Definition structure_data.hpp:249
std::vector< GroupType > groupList
Definition structure_data.hpp:178
std::map< std::string, msgpack::object > extraProperties
Definition structure_data.hpp:204
std::vector< char > insCodeList
Definition structure_data.hpp:192
bool hasConsistentData(bool verbose=false, uint32_t chain_name_max_length=4) const
Check consistency of structural data.
Definition structure_data.hpp:572
float rWork
Definition structure_data.hpp:172
float resolution
Definition structure_data.hpp:170
std::string mmtfProducer
Definition structure_data.hpp:159
std::vector< std::vector< float > > ncsOperatorList
Definition structure_data.hpp:166
msgpack::zone msgpack_zone
Definition structure_data.hpp:198
std::vector< int32_t > chainsPerModel
Definition structure_data.hpp:197
std::vector< int32_t > groupTypeList
Definition structure_data.hpp:190
std::vector< BioAssembly > bioAssemblyList
Definition structure_data.hpp:167
std::vector< std::string > chainNameList
Definition structure_data.hpp:195
std::string releaseDate
Definition structure_data.hpp:165
std::vector< float > unitCell
Definition structure_data.hpp:160
std::vector< int8_t > secStructList
Definition structure_data.hpp:191
std::vector< Entity > entityList
Definition structure_data.hpp:168
std::map< std::string, msgpack::object > bondProperties
Definition structure_data.hpp:199
int32_t numAtoms
Definition structure_data.hpp:174
std::vector< float > occupancyList
Definition structure_data.hpp:188
std::vector< float > yCoordList
Definition structure_data.hpp:183
std::map< std::string, msgpack::object > modelProperties
Definition structure_data.hpp:203
std::vector< std::string > experimentalMethods
Definition structure_data.hpp:169
std::string title
Definition structure_data.hpp:163
Transformation definition for a set of chains.
Definition structure_data.hpp:109
MSGPACK_DEFINE_MAP(chainIndexList, matrix)
std::vector< int32_t > chainIndexList
Definition structure_data.hpp:110
bool operator==(Transform const &c) const
Definition structure_data.hpp:113
float matrix[16]
Definition structure_data.hpp:111
#define MMTF_SPEC_VERSION_MINOR
Definition structure_data.hpp:35
#define MMTF_SPEC_VERSION_MAJOR
MMTF spec version which this library implements.
Definition structure_data.hpp:34