├── .readthedocs.yaml
├── LICENSE
├── README.md
├── docs
    ├── examples
    │   ├── ACRDIN11.json
    │   ├── ACRDIN12.json
    │   ├── ACSALA24.json
    │   ├── ACSALA32.json
    │   ├── ACSALA35.json
    │   └── example_structures_filter_data.json
    └── source
    │   ├── analysis.rst
    │   ├── conf.py
    │   ├── features.rst
    │   ├── index.rst
    │   ├── installation.rst
    │   ├── procedure.rst
    │   └── requirements.txt
├── input_files
    └── input_data_extraction.txt
├── source_code
    ├── Crystal_Math_CSA.ipynb
    ├── create_reference_fragments.py
    ├── csd_data_extraction.py
    ├── csd_operations.py
    ├── generate_molecule_fragments.py
    ├── get_analysis_data.py
    ├── get_structure_data.py
    ├── get_structures_list.py
    ├── input_checks.py
    ├── io_operations.py
    ├── maths.py
    ├── space_group_operations.py
    ├── structure_operations.py
    ├── utilities.py
    └── visualize.py
└── source_data
    ├── atomic_properties.json
    ├── close_contacts_properties.json
    ├── fragment_list.json
    ├── fragments_geometry_data.txt
    ├── reference_fragment_list.json
    ├── space_group_properties.json
    └── variables.json


/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for Sphinx projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3.9"
12 | 
13 | # Build documentation in the "docs/" directory with Sphinx
14 | sphinx:
15 |   configuration: docs/source/conf.py
16 |   
17 | # Optionally build your docs in additional formats such as PDF and ePub
18 | formats:
19 |   - pdf
20 |   - epub
21 | 
22 | # Python requirements
23 | python:
24 |   install:
25 |     - requirements: docs/source/requirements.txt
26 | 
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2024, Nikolaos Galanakis
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CrystalMath 
 2 | 
 3 | ## Tools for systematic exploration of the molecular structures in the CSD databasetowards a topological based CSP
 4 | 
 5 | CrystalMath provides a comprehensive statistical analysis of molecular crystal structures from the CSD database and custom structures in the *.cif format. It offers deep insights into molecular packing trends, intermolecular interactions, and the topological nuances that dictate these patterns.
 6 | 
 7 | The algorithm begins with a systematic exploration of the CSD, extracting and analyzing topological and geometrical data. This method integrates a fundamental understanding that molecular crystals conform to specific geometrical constraints and topological patterns. Through statistical analysis, CrystalMath derives logical rules and predictive models that enhance our understanding of molecular structures.
 8 | 
 9 | **For comprehensive documentation, including installation, usage, and examples, please visit the** [**CrystalMath documentation on Read the Docs**](https://crystal-math.readthedocs.io/en/latest/index.html)
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/docs/examples/example_structures_filter_data.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "ACRDIN11": {
  3 |         "space_group": "P21/n",
  4 |         "z_crystal": 4.0,
  5 |         "z_prime": 1.0,
  6 |         "species": ["C","H","N"],
  7 |         "fragments": ["acridin"],
  8 |         "contact_pairs": [
  9 |             ["C","C","vdW",true],
 10 |             ["C","H","vdW",true],
 11 |             ["H","C","vdW",true],
 12 |             ["H","H","vdW",false],
 13 |             ["C","N","vdW",false],
 14 |             ["N","C","vdW",false],
 15 |             ["C","H","vdW",false],
 16 |             ["H","C","vdW",false],
 17 |             ["H","N","vdW",true],
 18 |             ["N","H","vdW",true],
 19 |             ["C","C","vdW",false],
 20 |             ["H","H","vdW",true],
 21 |             ["C","N","vdW",true],
 22 |             ["N","C","vdW",true]
 23 |         ],
 24 |         "contact_central_fragments": [
 25 |             ["acridin","vdW",true],
 26 |             ["acridin","vdW",false]
 27 |         ],
 28 |         "contact_fragment_pairs": [
 29 |             ["acridin","acridin","vdW",true],
 30 |             ["acridin","acridin","vdW",false]
 31 |         ]
 32 |     },
 33 |     "ACRDIN12": {
 34 |         "space_group": "P21/n",
 35 |         "z_crystal": 4.0,
 36 |         "z_prime": 1.0,
 37 |         "species": ["C","H","N"],
 38 |         "fragments": ["acridin"],
 39 |         "contact_pairs": [
 40 |             ["C","C","vdW",true],
 41 |             ["C","C","vdW",false],
 42 |             ["H","C","vdW",true],
 43 |             ["C","H","vdW",true],
 44 |             ["H","C","vdW",false],
 45 |             ["C","H","vdW",false],
 46 |             ["H","H","vdW",false],
 47 |             ["H","H","vdW",true],
 48 |             ["N","C","vdW",false],
 49 |             ["C","N","vdW",false],
 50 |             ["N","H","vdW",true],
 51 |             ["H","N","vdW",true],
 52 |             ["N","C","vdW",true],
 53 |             ["C","N","vdW",true]
 54 |         ],
 55 |         "contact_central_fragments": [
 56 |             ["acridin","vdW",true],
 57 |             ["acridin","vdW",false]
 58 |         ],
 59 |         "contact_fragment_pairs": [
 60 |             ["acridin","acridin","vdW",true],
 61 |             ["acridin","acridin","vdW",false]
 62 |         ]
 63 |     },
 64 |     "ACSALA24": {
 65 |         "space_group": "P21/c",
 66 |         "z_crystal": 8.0,
 67 |         "z_prime": 2.0,
 68 |         "species": ["C","H","O"],
 69 |         "fragments": ["benzene","carboxylic_acid","ester_aromatic-aliphatic"],
 70 |         "contact_pairs": [
 71 |             ["C","C","vdW",true],
 72 |             ["C","O","vdW",false],
 73 |             ["O","C","vdW",false],
 74 |             ["C","H","vdW",false],
 75 |             ["H","C","vdW",false],
 76 |             ["C","C","vdW",false],
 77 |             ["C","O","vdW",true],
 78 |             ["O","C","vdW",true],
 79 |             ["O","O","vdW",false],
 80 |             ["O","H","vdW",true],
 81 |             ["H","O","vdW",true],
 82 |             ["O","O","vdW",true],
 83 |             ["C","H","vdW",true],
 84 |             ["H","C","vdW",true],
 85 |             ["H","H","vdW",false],
 86 |             ["O","O","hbond",false],
 87 |             ["O","H","vdW",false],
 88 |             ["H","O","vdW",false],
 89 |             ["O","H","hbond",true],
 90 |             ["H","O","hbond",true],
 91 |             ["H","H","vdW",true]
 92 |         ],
 93 |         "contact_central_fragments": [
 94 |             ["benzene","vdW",true],
 95 |             ["carboxylic_acid","vdW",true],
 96 |             ["ester_aromatic-aliphatic","vdW",true],
 97 |             ["benzene","vdW",false],
 98 |             ["carboxylic_acid","vdW",false],
 99 |             ["ester_aromatic-aliphatic","vdW",false],
100 |             ["carboxylic_acid","hbond",false],
101 |             ["carboxylic_acid","hbond",true]
102 |         ],
103 |         "contact_fragment_pairs": [
104 |             ["benzene","benzene","vdW",true],
105 |             ["benzene","ester_aromatic-aliphatic","vdW",true],
106 |             ["carboxylic_acid","benzene","vdW",true],
107 |             ["carboxylic_acid","ester_aromatic-aliphatic","vdW",true],
108 |             ["ester_aromatic-aliphatic","benzene","vdW",true],
109 |             ["benzene","carboxylic_acid","vdW",true],
110 |             ["ester_aromatic-aliphatic","carboxylic_acid","vdW",true],
111 |             ["benzene","ester_aromatic-aliphatic","vdW",false],
112 |             ["carboxylic_acid","ester_aromatic-aliphatic","vdW",false],
113 |             ["ester_aromatic-aliphatic","benzene","vdW",false],
114 |             ["ester_aromatic-aliphatic","carboxylic_acid","vdW",false],
115 |             ["benzene","benzene","vdW",false],
116 |             ["carboxylic_acid","benzene","vdW",false],
117 |             ["benzene","carboxylic_acid","vdW",false],
118 |             ["ester_aromatic-aliphatic","ester_aromatic-aliphatic","vdW",false],
119 |             ["carboxylic_acid","carboxylic_acid","vdW",true],
120 |             ["carboxylic_acid","carboxylic_acid","vdW",false],
121 |             ["ester_aromatic-aliphatic","ester_aromatic-aliphatic","vdW",true],
122 |             ["carboxylic_acid","carboxylic_acid","hbond",false],
123 |             ["carboxylic_acid","carboxylic_acid","hbond",true]
124 |         ]
125 |     },
126 |     "ACSALA32": {
127 |         "space_group": "P21/c",
128 |         "z_crystal": 4.0,
129 |         "z_prime": 1.0,
130 |         "species": ["C","H","O"],
131 |         "fragments": ["benzene","carboxylic_acid","ester_aromatic-aliphatic"],
132 |         "contact_pairs": [
133 |             ["O","C","vdW",true],
134 |             ["C","O","vdW",true],
135 |             ["O","H","vdW",false],
136 |             ["H","O","vdW",false],
137 |             ["H","C","vdW",false],
138 |             ["C","H","vdW",false],
139 |             ["O","C","vdW",false],
140 |             ["C","O","vdW",false],
141 |             ["O","H","vdW",true],
142 |             ["H","O","vdW",true],
143 |             ["C","C","vdW",false],
144 |             ["C","C","vdW",true],
145 |             ["C","H","vdW",true],
146 |             ["H","C","vdW",true],
147 |             ["H","H","vdW",false],
148 |             ["H","H","vdW",true],
149 |             ["O","O","vdW",false],
150 |             ["O","O","hbond",false],
151 |             ["H","O","hbond",true],
152 |             ["O","H","hbond",true]
153 |         ],
154 |         "contact_central_fragments": [
155 |             ["carboxylic_acid","vdW",true],
156 |             ["benzene","vdW",true],
157 |             ["carboxylic_acid","vdW",false],
158 |             ["benzene","vdW",false],
159 |             ["ester_aromatic-aliphatic","vdW",false],
160 |             ["ester_aromatic-aliphatic","vdW",true],
161 |             ["carboxylic_acid","hbond",false],
162 |             ["carboxylic_acid","hbond",true]
163 |         ],
164 |         "contact_fragment_pairs": [
165 |             ["carboxylic_acid","benzene","vdW",true],
166 |             ["benzene","carboxylic_acid","vdW",true],
167 |             ["carboxylic_acid","benzene","vdW",false],
168 |             ["benzene","carboxylic_acid","vdW",false],
169 |             ["ester_aromatic-aliphatic","benzene","vdW",false],
170 |             ["benzene","ester_aromatic-aliphatic","vdW",false],
171 |             ["ester_aromatic-aliphatic","benzene","vdW",true],
172 |             ["benzene","ester_aromatic-aliphatic","vdW",true],
173 |             ["ester_aromatic-aliphatic","ester_aromatic-aliphatic","vdW",true],
174 |             ["benzene","benzene","vdW",false],
175 |             ["benzene","benzene","vdW",true],
176 |             ["ester_aromatic-aliphatic","ester_aromatic-aliphatic","vdW",false],
177 |             ["carboxylic_acid","carboxylic_acid","vdW",false],
178 |             ["carboxylic_acid","carboxylic_acid","hbond",false],
179 |             ["carboxylic_acid","carboxylic_acid","vdW",true],
180 |             ["carboxylic_acid","carboxylic_acid","hbond",true],
181 |             ["carboxylic_acid","ester_aromatic-aliphatic","vdW",true],
182 |             ["ester_aromatic-aliphatic","carboxylic_acid","vdW",true],
183 |             ["ester_aromatic-aliphatic","carboxylic_acid","vdW",false],
184 |             ["carboxylic_acid","ester_aromatic-aliphatic","vdW",false]
185 |         ]
186 |     },
187 |     "ACSALA35": {
188 |         "space_group": "P21/c",
189 |         "z_crystal": 4.0,
190 |         "z_prime": 1.0,
191 |         "species": ["C","H","O"],
192 |         "fragments": ["benzene","carboxylic_acid","ester_aromatic-aliphatic"],
193 |         "contact_pairs": [
194 |             ["C","O","vdW",false],
195 |             ["O","C","vdW",false],
196 |             ["C","H","vdW",false],
197 |             ["H","C","vdW",false],
198 |             ["C","C","vdW",true],
199 |             ["H","O","vdW",true],
200 |             ["O","H","vdW",true],
201 |             ["C","O","vdW",true],
202 |             ["O","C","vdW",true],
203 |             ["C","H","vdW",true],
204 |             ["H","C","vdW",true],
205 |             ["C","C","vdW",false],
206 |             ["H","O","vdW",false],
207 |             ["O","H","vdW",false],
208 |             ["H","H","vdW",true],
209 |             ["O","O","vdW",false],
210 |             ["O","O","hbond",false],
211 |             ["H","O","hbond",true],
212 |             ["O","H","hbond",true]
213 |         ],
214 |         "contact_central_fragments": [
215 |             ["benzene","vdW",false],
216 |             ["carboxylic_acid","vdW",false],
217 |             ["benzene","vdW",true],
218 |             ["carboxylic_acid","vdW",true],
219 |             ["ester_aromatic-aliphatic","vdW",false],
220 |             ["ester_aromatic-aliphatic","vdW",true],
221 |             ["carboxylic_acid","hbond",false],
222 |             ["carboxylic_acid","hbond",true]
223 |         ],
224 |         "contact_fragment_pairs": [
225 |             ["benzene","carboxylic_acid","vdW",false],
226 |             ["carboxylic_acid","benzene","vdW",false],
227 |             ["benzene","carboxylic_acid","vdW",true],
228 |             ["carboxylic_acid","benzene","vdW",true],
229 |             ["benzene","ester_aromatic-aliphatic","vdW",false],
230 |             ["ester_aromatic-aliphatic","benzene","vdW",false],
231 |             ["benzene","ester_aromatic-aliphatic","vdW",true],
232 |             ["ester_aromatic-aliphatic","benzene","vdW",true],
233 |             ["benzene","benzene","vdW",false],
234 |             ["benzene","benzene","vdW",true],
235 |             ["ester_aromatic-aliphatic","ester_aromatic-aliphatic","vdW",false],
236 |             ["ester_aromatic-aliphatic","ester_aromatic-aliphatic","vdW",true],
237 |             ["carboxylic_acid","carboxylic_acid","vdW",true],
238 |             ["carboxylic_acid","ester_aromatic-aliphatic","vdW",true],
239 |             ["ester_aromatic-aliphatic","carboxylic_acid","vdW",true],
240 |             ["carboxylic_acid","carboxylic_acid","vdW",false],
241 |             ["carboxylic_acid","carboxylic_acid","hbond",false],
242 |             ["carboxylic_acid","carboxylic_acid","hbond",true],
243 |             ["ester_aromatic-aliphatic","carboxylic_acid","vdW",false],
244 |             ["carboxylic_acid","ester_aromatic-aliphatic","vdW",false]
245 |         ]
246 |     }
247 | }


--------------------------------------------------------------------------------
/docs/source/analysis.rst:
--------------------------------------------------------------------------------
  1 | Post extraction analysis
  2 | ========================
  3 | This section outlines the default post-extraction analysis tools. 
  4 | The purpose of this tool is to perform qualitative and quantitative analysis of the structure, fragment, contact and hydrogen bond data for the selected group of structures.
  5 | The tool is designed to create scatter plots for pairs of parameters and histograms for the parameters extracted during the data extraction process.
  6 | 
  7 | For the scatter plots, the algorithm calculates the correlation coefficients for the selected set of variables, while for the histograms, it offers the option to fit distributions to the selected data, and report the characteristics of the fitted curve.
  8 | 
  9 | The Data Analysis Input File
 10 | ----------------------------
 11 | The first step is to modify the ``input_data_analysis.txt`` file based on the required criteria. The general format of the file and descriptions of each parameter are as follows:
 12 | 
 13 | Input File Format
 14 | ^^^^^^^^^^^^^^^^^
 15 | The configuration should be specified in JSON format as shown below:
 16 | 
 17 | .. code-block:: json
 18 | 
 19 |     {
 20 |          "plots_directory": "../csd_db_analysis/visualize/",
 21 |          "data_directory": "../csd_db_analysis/db_data/",
 22 |          "data_prefix": "homomolecular",
 23 |          "folder": "contacts_carboxylic-acid_carboxylic-acid_OH_hb_Zprime_1",
 24 |          "figure_size": [5,3.75],
 25 |          "save_figs": false,
 26 |          "data_filters": {
 27 |              "space_group": {
 28 |                  "is_active": false,
 29 |                  "type": "single",
 30 |                  "values": ["P21/c","P21/n"],
 31 |                  "operator": "or",
 32 |                  "refine_data": false
 33 |              }, 
 34 |              "z_crystal": {
 35 |                  "is_active": false,
 36 |                  "type": "single",
 37 |                  "values": [4,8],
 38 |                  "operator": "or",
 39 |                  "refine_data": false
 40 |              }, 
 41 |              "z_prime": {
 42 |                  "is_active": true,
 43 |                  "type": "single",
 44 |                  "values": [1],
 45 |                  "operator": "or",
 46 |                  "refine_data": false
 47 |              },
 48 |              "species": {
 49 |                  "is_active": false,
 50 |                  "type": "multiple",
 51 |                  "values": ["C","H","N","O"],
 52 |                  "operator": "or",
 53 |                  "refine_data": false
 54 |              },
 55 |              "fragments": {
 56 |                  "is_active": true,
 57 |                  "type": "multiple",
 58 |                  "values": [
 59 |                      "carboxylic_acid",
 60 |                      // ...
 61 |                      ],
 62 |                  "operator": "and",
 63 |                  "refine_data": true
 64 |              },
 65 |              "contact_pairs": {
 66 |                  "is_active": true,
 67 |                  "type": "multiple_lists",
 68 |                  "values": [
 69 |                      ["O","H","hbond",true],
 70 |                      // ...
 71 |                      ],
 72 |                  "operator": "or",
 73 |                  "refine_data": true
 74 |              },
 75 |              "contact_central_fragments": {
 76 |                  "is_active": true,
 77 |                  "type": "multiple_lists",
 78 |                  "values": [
 79 |                      ["carboxylic_acid","hbond",true]
 80 |                      // ...
 81 |                      ],
 82 |                  "operator": "or",
 83 |                  "refine_data": true
 84 |              },
 85 |              "contact_fragment_pairs": {
 86 |                  "is_active": true,
 87 |                  "type": "multiple_lists",
 88 |                  "values": [
 89 |                      ["carboxylic_acid","carboxylic_acid","hbond",true],
 90 |                      // ...
 91 |                      ],
 92 |                  "operator": "and",
 93 |                  "refine_data": true
 94 |              }
 95 |          },
 96 |          "plot_data_options": {
 97 |             "individual_space_groups_plots": true,
 98 |             "interactive": true,
 99 |             "percentiles": [[10,25,50,75,90],true,true,true],
100 |             "2D_scatter": [
101 |                 ["cell_length_b_sc","cell_length_c_sc",null],
102 |                 // ...
103 |         		],
104 |             "2D_scatter_marker": "o",
105 |             "2D_scatter_facecolor": "whitesmoke",
106 |             "2D_scatter_edgecolor": "black",
107 |             "2D_scatter_opacity": 1.0,
108 |             "3D_scatter": [
109 |                 ["cc_contact_atom_ref_bv_x","cc_contact_atom_ref_bv_y","cc_contact_atom_ref_bv_z",null],
110 |                 // ...
111 |                 ],
112 |             "3D_scatter_marker": "o",
113 |             "3D_scatter_facecolor": "whitesmoke",
114 |             "3D_scatter_edgecolor": "black",
115 |             "3D_scatter_opacity": 1.0,
116 |             "histogram": [
117 |                 ["cc_length",null,false],
118 |                 // ...
119 |                 ],
120 |             "histogram_density": false,
121 |             "titles": false
122 |          }
123 |     }
124 | 
125 | 
126 | Key Descriptions
127 | ^^^^^^^^^^^^^^^^
128 | - ``plots_directory``
129 |     Specifies the directory where plots will be saved. Using the default option is recommended.
130 | - ``data_directory``
131 |     The directory where the extracted data is stored. It must match the ``"save_directory"`` specified in the ``input_data_extraction.json`` file.
132 | - ``data_prefix``
133 |     A prefix applied to output files to facilitate their identification. This must be consistent with the ``"data_prefix"`` in the ``input_data_extraction.json`` file.
134 | - ``figure_size``
135 |     Defines the dimensions of exported figures in inches, formatted as :math:`(W \times H)`. The default Matplotlib size is :math:`(6.4 \times 4.8)`. To place two figures side by side in a 12-inch wide document using an 11pt font, the optimal size is :math:`(5.0 \times 3.75)`. Adjust dimensions according to your document's specific requirements.
136 | - ``data_filters``
137 |     Details for filtering structures for the analysis. Structures can be filtered based on: 
138 | 
139 |     - **Space group**
140 |         The space group of the structure.
141 |     - :math:`Z` **value**
142 |         The total number of molecules in the unit cell (Number of symmetry operations) :math:`\times` (Number of molecules in the asymmetric unit).
143 |     - :math:`Z^{\prime}` **value**
144 |         The number of molecules in the asymmetric unit.
145 |     - **Atomic species**
146 |         The different atomic species found in the structure.
147 |     - **Fragments**
148 |         The different fragments found in the structure.
149 |     - **Contact atomic pairs**    
150 |         The different atomic pairs found for the contacts in the structure.
151 |     - **Contact central fragments**
152 |         The different central fragments for the contacts in the structure.
153 |     - **Contact fragment pairs**
154 |         The different fragment pairs found for the contacts in the structure.
155 |     
156 |     Each filter has 5 options:
157 | 
158 |     - ``is_active``
159 |         Set to ``true`` to activate the filter. Setting to ``false`` will deactivate the filter.
160 |     - ``type``
161 |         The type of the filter. The available options are 
162 |     
163 |         - ``single``
164 |             A structure is characterized by a single specific value for the variable (for example the space group).
165 |         - ``multiple``
166 |             A structure is characterized by a list of values for the specific variable (for example the atomic species in the structure).
167 |         - ``multiple_list``
168 |             A structure is characterized by a list of values for the specific variable, but each value is now a list (for example the contact pairs in the structure, where each contact pair is characterized by the species of the cetnral atom, the species of the contact atom, the type of the contact and a boolean that states if the contact is in line of sight).
169 |     
170 |     - ``values``
171 |         A list (or a list of lists) for the allowed values.
172 |     - ``operator``
173 |         The available options are
174 |     
175 |         - ``"or"``
176 |             The filter will check for structures that have **any** of the declared values,
177 |         - ``"and"``
178 |             The filter will check for structures that have **all** the declared values,
179 |         
180 |     - ``refine_data``
181 |         Set to ``true`` to refine the data for all the components in the structure based on the values of the filter. 
182 | 
183 | - ``plot_data_options`` 
184 |     Details the plotting options:
185 | 
186 |     - ``individual_space_groups_plots``
187 |         Set to ``true`` to create plots across all space groups and for each pace group sepaately.
188 | 
189 |     - ``interactive``
190 |         Set to ``true`` to create interactive `*.html`` plots with the plotly package. (Currently this is the only option supported. Currently developing a routine to generate publication-ready ``*.png`` plots).
191 | 
192 |     - ``percentiles``
193 |         The options to calculate the kde density for the 2D and 3D scatter plots. The format for the values includes a list of integerss (of floats) representing the desired percentiles followed by 3 booleans. Each boolean activates the creation of the lowest percentine (in the example the 10%), the middle percentines (25%, 50%, 75%), and the top percentile (90%). For the interactive ``*.html``` plots, it is recommended to set all options to ``true`` as the interactive plots allow to toggle on/off the different percentiles. For static ``*.png`` images, the booleans should be adjusted to include the desired percentiles in the plots. 
194 | 
195 |     - ``2D_scatter``/``3D_scatter``
196 |         A list of the requested 2D/3D scatter plots to be generated. Each entry has the format ``[variable_1, variable_2, group_variable]``/``[variable_1, variable_2, variable_3, group_variable]``. The ``variable_1``, ``variable_2`` and ``variable_3`` are the variables used for the scatter plots. The entry ``group_variable`` declares the variable to group data and plot them separately based on the values of the group variable. Setting ``group_variable`` to ``null`` generates a single plot for the full set of selected data. The group variable can take different values depending on the nature of  ``variable_1``, ``variable_2``, ``variable_3``.
197 | 
198 |     - ``2D_scatter_marker``/``3D_scatter_marker``
199 |         The marker for the data points (static images only). For the available options please refer to the `official matplotlib documentation <https://matplotlib.org/stable/api/markers_api.html>`_. 
200 |     
201 |     - ``2D_scatter_facecolor``/``3D_scatter_facecolor``
202 |         The marker face color for the data points (static images only). For the available options please refer to the `official matplotlib documentation <https://matplotlib.org/stable/gallery/color/named_colors.html>`_. 
203 |     
204 |     - ``2D_scatter_edgecolor``/``3D_scatter_edgecolor``
205 |         The marker edge color for the data points (static images only). For the available options please refer to the `official matplotlib documentation <https://matplotlib.org/stable/gallery/color/named_colors.html>`_. 
206 |     
207 |     - ``2D_scatter_opacity``/``3D_scatter_opacity``
208 |         The marker opacity for the data points (static images only). Can take a value in the range :math:`[0,1]`.
209 | 
210 |     - ``histogram``
211 |         A list of the requested histograms to be generated. Each entry has the format ``[variable, group_variable, fit_kde_curve]``. The ``group_variable`` works in a similar was as for the 2D/3D scatter plots. the     ``fit_kde_curve`` can be set to ``true`` when we require to fit a kde curve to the histogram data.  
212 | 
213 |     - ``histogram_density``
214 |         Setting to ``false`` will plot on the ``y`` axis the occurences. Setting to ``true`` will plot the frequency. 
215 | 
216 | List of available variables
217 | ---------------------------
218 | 
219 | The available variables are included in the file ``variables.json`` located in the ``source_data`` folder. Currently, the algorithm supports 127 different variables grouped into 5 families (See details below). Details for each variable can be found in the `Data Extraction Procedure section <https://crystal-math.readthedocs.io/en/latest/procedure.html>`_. Each variable is described using a dictionary entry in the following format.
220 | 
221 | .. code-block :: json
222 | 
223 |     "variable_name": {
224 |         "latex_name": string,
225 |         "html_name": string,
226 |         "family": string,
227 |     	"path": [list of strings],
228 |     	"position_symmetry": [boolean,boolean,boolean,integer]
229 |       }
230 | 
231 | 
232 | Key Descriptions
233 | ^^^^^^^^^^^^^^^^
234 | 
235 | - ``variable_name``
236 |     The name of the variable. Currently 127 variables are supported.
237 | 
238 | - ``latex_name``
239 |     The name of the variable in LaTeX format used to render static ``*.png`` images.
240 | 
241 | - ``html_name``
242 |     The name of the variable in html format used to render interactive ``*.html`` plots.
243 | 
244 | - ``family``
245 |     The family of the variable. Currently the available variables are grouped into 5 different families based on the nature of the variable:
246 | 
247 |     - ``structure`` variable family (27 variables)
248 | 
249 |         Includes all the variables related to the geeral characteristics of the structure.
250 | 
251 |         - ``str_id``
252 |         - ``space_group``
253 |         - ``z_crystal``
254 |         - ``z_prime``
255 |         - ``formula``
256 |         - ``species``
257 |         - ``cell_length_a``
258 |         - ``cell_length_b``
259 |         - ``cell_length_c``
260 |         - ``cell_length_a_sc``
261 |         - ``cell_length_b_sc``
262 |         - ``cell_length_c_sc``
263 |         - ``cell_angle_alpha``
264 |         - ``cell_angle_beta``
265 |         - ``cell_angle_gamma``
266 |         - ``cell_volume``
267 |         - ``cell_density``
268 |         - ``vdWFV``
269 |         - ``SAS``
270 |         - ``E_tot``
271 |         - ``E_el``
272 |         - ``E_vdW``
273 |         - ``E_vdW_at``
274 |         - ``E_vdW_rep``
275 |         - ``E_hb``
276 |         - ``E_hb_at``
277 |         - ``E_hb_rep``
278 | 
279 |     - ``fragment`` variable family (52 variables)
280 | 
281 |         Includes all the variables related to the general characteristics of the fragments in the structure.
282 | 
283 |         - ``fragment``
284 |         - ``fragment_x``
285 |         - ``fragment_y``
286 |         - ``fragment_z``
287 |         - ``fragment_u``
288 |         - ``fragment_v``
289 |         - ``fragment_w``
290 |         - ``fragment_e1_x``
291 |         - ``fragment_e1_y``
292 |         - ``fragment_e1_z``
293 |         - ``fragment_e1_u``
294 |         - ``fragment_e1_v``
295 |         - ``fragment_e1_w``
296 |         - ``fragment_w11_u``
297 |         - ``fragment_w11_v``
298 |         - ``fragment_w11_w``
299 |         - ``fragment_w12_u``
300 |         - ``fragment_w12_v``
301 |         - ``fragment_w12_w``
302 |         - ``fragment_w1_angle_1``
303 |         - ``fragment_w1_angle_2``
304 |         - ``fragment_e1_d_min``
305 |         - ``fragment_e2_x``
306 |         - ``fragment_e2_y``
307 |         - ``fragment_e2_z``
308 |         - ``fragment_e2_u``
309 |         - ``fragment_e2_v``
310 |         - ``fragment_e2_w``
311 |         - ``fragment_w21_u``
312 |         - ``fragment_w21_v``
313 |         - ``fragment_w21_w``
314 |         - ``fragment_w22_u``
315 |         - ``fragment_w22_v``
316 |         - ``fragment_w22_w``
317 |         - ``fragment_w2_angle_1``
318 |         - ``fragment_w2_angle_2``
319 |         - ``fragment_e2_d_min``
320 |         - ``fragment_e3_x``
321 |         - ``fragment_e3_y``
322 |         - ``fragment_e3_z``
323 |         - ``fragment_e3_u``
324 |         - ``fragment_e3_v``
325 |         - ``fragment_e3_w``
326 |         - ``fragment_w31_u``
327 |         - ``fragment_w31_v``
328 |         - ``fragment_w31_w``
329 |         - ``fragment_w32_u``
330 |         - ``fragment_w32_v``
331 |         - ``fragment_w32_w``
332 |         - ``fragment_w3_angle_1``
333 |         - ``fragment_w3_angle_2``
334 |         - ``fragment_e3_d_min``
335 | 
336 |     - ``fragment_atom`` variable family (14 variables)
337 | 
338 |         Includes all the variables related to the characteristics of the atoms in each fragment.
339 | 
340 |         - ``fragment_atom_species``
341 |         - ``fragment_atom_x``
342 |         - ``fragment_atom_y``
343 |         - ``fragment_atom_z``
344 |         - ``fragment_atom_u``
345 |         - ``fragment_atom_v``
346 |         - ``fragment_atom_w``
347 |         - ``fragment_atom_bv_x``
348 |         - ``fragment_atom_bv_y``
349 |         - ``fragment_atom_bv_z``
350 |         - ``fragment_atom_bv_u``
351 |         - ``fragment_atom_bv_v``
352 |         - ``fragment_atom_bv_w``
353 |         - ``fragment_atom_dzzp_min``
354 | 
355 |     - ``contact`` variable family (3 variables)
356 | 
357 |         Includes all the variables related to the general characteristics of the close contacts in the structure.
358 | 
359 |         - ``cc_length``
360 |         - ``cc_type``
361 |         - ``cc_is_in_los``
362 |    
363 |     - ``contact_atom`` variable family (31 variables)
364 | 
365 |         Includes all the variables related to theatoms forming the close contacts in the structure.   
366 | 
367 |         - ``cc_central_atom_species``
368 |         - ``cc_central_atom_fragment``
369 |         - ``cc_central_atom_x``
370 |         - ``cc_central_atom_y``
371 |         - ``cc_central_atom_z``
372 |         - ``cc_central_atom_u``
373 |         - ``cc_central_atom_v``
374 |         - ``cc_central_atom_w``
375 |         - ``cc_central_atom_bv_x``
376 |         - ``cc_central_atom_bv_y``
377 |         - ``cc_central_atom_bv_z``
378 |         - ``cc_central_atom_ref_bv_x``
379 |         - ``cc_central_atom_ref_bv_y``
380 |         - ``cc_central_atom_ref_bv_z``
381 |         - ``cc_contact_atom_species``
382 |         - ``cc_contact_atom_fragment``
383 |         - ``cc_contact_atom_x``
384 |         - ``cc_contact_atom_y``
385 |         - ``cc_contact_atom_z``
386 |         - ``cc_contact_atom_u``
387 |         - ``cc_contact_atom_v``
388 |         - ``cc_contact_atom_w``
389 |         - ``cc_contact_atom_bv_x``
390 |         - ``cc_contact_atom_bv_y``
391 |         - ``cc_contact_atom_bv_z``
392 |         - ``cc_contact_atom_ref_bv_x``
393 |         - ``cc_contact_atom_ref_bv_y``
394 |         - ``cc_contact_atom_ref_bv_z``
395 |         - ``cc_contact_atom_ref_bv_r``
396 |         - ``cc_contact_atom_ref_bv_theta``
397 |         - ``cc_contact_atom_ref_bv_phi``
398 | - ``path``
399 |     List of strings pointing to the location of the value for each variable within each structure dictionary.
400 | 
401 | - ``position_symmetry``
402 |     The symmetry operations that are applied to get the complete set of values for a crystal. The first boolean declares if a rotation operation is applied to the variable and is ``true`` only for :math:`(x,y,z)` or :math:`(u,v,w)` related coordinates. The second boolean is ``true`` when translational symmetry is applied and the third is ``true`` for variables that are restricted within the limits of the unit cell (such as the fractional atomic coordinates). The fourth entry in the list, is an integer declaring the group ID for each variable. If set to ``-1`` the variable is not part of a group. If is set to ``0`` the variable is memebr of the structure geometry variables :math:`(a,b,c,\alpha,\beta,\gamma,\Omega)` that are required to apply coordinate transformations to any positional variable. If set to an integer :\math:`>0`, the variable is part of a specific group of connected positional variables, such as the coordinates of an atom. There are 24 groups of variables:
403 | 
404 |     - ``1``. ``['cc_central_atom_x', 'cc_central_atom_y', 'cc_central_atom_z']``
405 |     - ``2``. ``['cc_central_atom_u', 'cc_central_atom_v', 'cc_central_atom_w']``
406 |     - ``3``. ``['cc_central_atom_bv_x', 'cc_central_atom_bv_y', 'cc_central_atom_bv_z']``
407 |     - ``4``. ``['cc_contact_atom_x', 'cc_contact_atom_y', 'cc_contact_atom_z']``
408 |     - ``5``. ``['cc_contact_atom_u', 'cc_contact_atom_v', 'cc_contact_atom_w']``
409 |     - ``6``. ``['cc_contact_atom_bv_x', 'cc_contact_atom_bv_y', 'cc_contact_atom_bv_z']``
410 |     - ``7``. ``['fragment_x', 'fragment_y', 'fragment_z']``
411 |     - ``8``. ``['fragment_u', 'fragment_v', 'fragment_w']``
412 |     - ``9``. ``['fragment_e1_x', 'fragment_e1_y', 'fragment_e1_z']``
413 |     - ``10``. ``['fragment_e1_u', 'fragment_e1_v', 'fragment_e1_w']``
414 |     - ``11``. ``['fragment_w11_u', 'fragment_w11_v', 'fragment_w11_w']``
415 |     - ``12``. ``['fragment_w12_u', 'fragment_w12_v', 'fragment_w12_w']``
416 |     - ``13``. ``['fragment_e2_x', 'fragment_e2_y', 'fragment_e2_z']``
417 |     - ``14``. ``['fragment_e2_u', 'fragment_e2_v', 'fragment_e2_w']``
418 |     - ``15``. ``['fragment_w21_u', 'fragment_w21_v', 'fragment_w21_w']``
419 |     - ``16``. ``['fragment_w22_u', 'fragment_w22_v', 'fragment_w22_w']``
420 |     - ``17``. ``['fragment_e3_x', 'fragment_e3_y', 'fragment_e3_z']``
421 |     - ``18``. ``['fragment_e3_u', 'fragment_e3_v', 'fragment_e3_w']``
422 |     - ``19``. ``['fragment_w31_u', 'fragment_w31_v', 'fragment_w31_w']``
423 |     - ``20``. ``['fragment_w32_u', 'fragment_w32_v', 'fragment_w32_w']``
424 |     - ``21``. ``['fragment_atom_x', 'fragment_atom_y', 'fragment_atom_z']``
425 |     - ``22``. ``['fragment_atom_u', 'fragment_atom_v', 'fragment_atom_w']``
426 |     - ``23``. ``['fragment_atom_bv_x', 'fragment_atom_bv_y', 'fragment_atom_bv_z']``
427 |     - ``24``. ``['fragment_atom_bv_u', 'fragment_atom_bv_v', 'fragment_atom_bv_w']``
428 | 
429 |     In case a positional variable from the above lists is selected to be displayed in any 2D/3D scatter plot, the algorithm adds the values for all the variables in the same group as well as the variables in group ``0`` to the analysis data to be able to perform the necessary coordinate transformations. 
430 | 
431 | Example usage of the filters
432 | ----------------------------
433 | 
434 | The filters for the analysis are designed in a way to facilitate detailed analysis of any of the available variables in refined sets of data consistent with the needs of every user. The correct combination of the filters is crucial in order to analyze the correct set of data. Below we provide examples on how to use the filters in different scenarios:  
435 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'CrystalMath'
21 | copyright = '2024, Nikolaos Galanakis'
22 | author = 'Nikolaos Galanakis'
23 | 
24 | 
25 | # -- General configuration ---------------------------------------------------
26 | 
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = [
31 |     'sphinx.ext.mathjax'
32 | ]
33 | 
34 | # Add any paths that contain templates here, relative to this directory.
35 | templates_path = ['_templates']
36 | 
37 | # List of patterns, relative to source directory, that match files and
38 | # directories to ignore when looking for source files.
39 | # This pattern also affects html_static_path and html_extra_path.
40 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
41 | 
42 | 
43 | # -- Options for HTML output -------------------------------------------------
44 | 
45 | # The theme to use for HTML and HTML Help pages.  See the documentation for
46 | # a list of builtin themes.
47 | #
48 | html_theme = 'sphinx_rtd_theme'
49 | 
50 | # Add any paths that contain custom static files (such as style sheets) here,
51 | # relative to this directory. They are copied after the builtin static files,
52 | # so a file named "default.css" will overwrite the builtin "default.css".
53 | html_static_path = ['_static']


--------------------------------------------------------------------------------
/docs/source/features.rst:
--------------------------------------------------------------------------------
 1 | Features
 2 | ========
 3 | 
 4 | CrystalMath provides a comprehensive statistical analysis of molecular crystal structures from the CSD database and custom structures in the *.cif format. It offers deep insights into molecular packing trends, intermolecular interactions, and the topological nuances that dictate these patterns.
 5 | 
 6 | The algorithm begins with a systematic exploration of the CSD, extracting and analyzing topological and geometrical data. This method integrates a fundamental understanding that molecular crystals conform to specific geometrical constraints and topological patterns. Through statistical analysis, CrystalMath derives logical rules and predictive models that enhance our understanding of molecular structures.
 7 | 
 8 | This section outlines the main features of the Crystal Math software, which include analysis of existing structures within the CSD and predictions of molecular crystal structures.
 9 | 
10 | Analysis of Existing Structures within the CSD
11 | ----------------------------------------------
12 | This feature represents the algorithm's investigative aspect, wherein it meticulously explores the repository of the CSD to extract and analyze structural data. The process employs a sophisticated fragment-based approach to assess molecular geometry, allowing it to discern subtle nuances and patterns within the crystal structures.
13 | 
14 | This computational process is not merely a data retrieval mechanism. It involves the calculation of crucial geometrical and topological properties, including:
15 | 
16 | - **Relative orientation**
17 | - **Plane intersections with unit cell vertices**
18 | - **Close contacts**
19 | - **Hydrogen bonds**
20 | - **Void analysis in the unit cell**
21 | 
22 | These computations are invaluable, forming the bedrock of the dataset that the subsequent predictive stage will utilize. The adaptability of this feature allows researchers to set specific criteria, enabling the algorithm to target structures that bear direct relevance to their studies, thereby ensuring a customized, relevant, and rich analytical output.
23 | 
24 | Prediction of Molecular Crystal Structures
25 | ------------------------------------------
26 | Building upon the robust foundation laid by the analytical phase, the prediction feature marks the algorithm’s leap into the realm of prospective crystallography. This innovative function does not merely extrapolate from existing data but employs a rigorous mathematical, geometrical, and topological framework to envision and predict feasible crystal structures.
27 | 
28 | Bypassing traditional methods that rely heavily on force fields and energy calculations, this feature stands out due to its unique approach, essentially rewriting the rules of crystal structure prediction. By utilizing the detailed insights gleaned from the analysis of existing CSD structures, the algorithm assesses countless possibilities and predicts structures that are not just theoretically plausible but ripe for synthesis and experimental verification.
29 | 
30 | Detailed Analysis of Existing CSD Structures
31 | --------------------------------------------
32 | The algorithm delves into the CSD, applying user-defined criteria to identify and analyze structures pertinent to your research. These criteria could range from the atomic species present in the crystal to more complex attributes such as:
33 | 
34 | - Space group
35 | - :math:`Z^{\prime}` value
36 | - Molecular weight for the components within the asymmetric unit
37 | 
38 | Fragment-Based Analysis
39 | ^^^^^^^^^^^^^^^^^^^^^^^
40 | The script communicates with the CSD database, seeking structures that align with specific user-defined rules. Upon identifying the relevant structures, the algorithm proceeds to extract critical data, focusing particularly on geometric and topological properties that inform the subsequent prediction phase.
41 | 
42 | A pivotal aspect of the CSP Algorithm's analytical prowess hinges on its geometric interpretation of intermolecular forces, by extracting properties for the close contacts and hydrogen bonding within crystal structures. These interactions are not merely physical constraints but are insightful topological and energetic indicators that guide the strategic assembly of molecular crystals.
43 | 
44 | Geometrical and Topological Properties Analysis
45 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
46 | 
47 | The extracted data encompasses several key molecular aspects, with calculations and analyses including, but not limited to:
48 | 
49 | - **Orientation Relative to Inertia Frame**: Assessing molecular and fragmentary orientation within the unit cell, referenced against their inertia frames. This analysis goes beyond simple spatial representation; it is a profound exploration of the positional relationship between molecular fragments and the encompassing lattice geometry. The algorithm calculates the orientations by establishing a molecule's inertia frame, a defined coordinate system based on the molecule's moment of inertia. This frame serves as a reference point, allowing for a standardized comparison of molecular orientations. With this approach, the algorithm can systematically analyze how different fragments within a molecule orient themselves relative to each other and their collective orientation within the unit cell.
50 | - **Relative positions of principal planes of inertia**: The algorithm computes the distances of certain points in a unit cell to the pnincipal planes of inertia (planes perpendicular to the principal axes of inertia, passing through the center of mass of each fragment). This calculation is instrumental in understanding the molecule's spatial orientation and placement.
51 | - **Inter-Fragment Correlations**: By observing the relative orientations of fragments within a molecule, the algorithm unveils potential correlations in geometric conformations. These insights are crucial for understanding the molecule's structural dynamics, offering clues about its stability, reactivity, or interactions with neighboring entities.
52 | - **Molecule-Unit Cell Interplay**: Expand the analysis to explore how the molecule fits and orients itself within the unit cell. This exploration can reveal critical insights into whether the molecule's orientation is influenced by the unit cell's geometric constraints, contributing to a deeper understanding of the crystal packing phenomena.
53 | - **Predictive Insights for New Structures**: By identifying trends and correlations between molecular orientation and unit cell geometry, the algorithm can hypothesize about probable orientations for molecules in novel crystal structures, providing a reliable foundation for anticipating the behavior of molecules in uncharted configurations.
54 | 
55 | In essence, the orientation analysis relative to the inertia frame is not a mere calculation but a holistic examination of the molecule's spatial narrative. It provides contextual insights that are indispensable for predicting how new molecular assemblies might accommodate themselves within various lattice frameworks, essentially influencing the design strategy for new materials with desired properties.
56 | 
57 | - **Close Contacts**: Traditional analysis of close contacts often stops at identifying distances shorter than the sum of van der Waals radii. However, the CSP Algorithm delves deeper, recognizing that the strength of these contacts is an extremely important topological property, intimately tied to the interaction energy's minimum. By examining a comprehensive matrix of atomic species pairs and their distribution across various space groups, the algorithm calculates the optimal strength of close contacts. In addition, it analyzes the spatial distribution of the contacts in respect to the center of mass for each fragment. This analysis provides a benchmark for constructing molecular crystals with judicious interatomic interactions, ensuring structural stability without compromising the lattice's integrity. These calculated parameters are instrumental during the prediction phase, where the algorithm utilizes this statistical backbone to forecast interaction energies, guiding the assembly of molecules within the crystal lattice in a manner that's energetically favorable.
58 | - **Hydrogen Bonds**: The analysis of the hydrogen bonds within the crystal matrix, provide insights into their geometric configuration which is tied to their energetic profile. This understanding is crucial because hydrogen bonds impart significant directional character to molecular arrangements in crystal lattices, influencing both structure and properties. The CSP Algorithm evaluates the geometry of potential hydrogen bond, ensuring not only geometric precision but also the right balance of strength and directionality in these interactions. This information is vital for constructing viable hydrogen-bonded networks, especially in complex molecular crystals where these interactions dictate structural feasibility and stability.
59 | - **Voids in Unit Cell**: Analyzing the van der Waals free volume and solvent-accessible surface within the crystal lattice provides insights into the potential for molecular movement, stability under pressure, or where guest molecules might reside.
60 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. Crystal_Math documentation master file, created by
 2 |    sphinx-quickstart on Tue Apr 16 08:52:30 2024.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to Crystal_Math's documentation!
 7 | ========================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 |    
13 |    features
14 |    installation
15 |    procedure
16 |    analysis
17 | 
18 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | We highly recommend using **Anaconda** for its ease of package management and environment handling, as it includes numerous scientific computing packages that facilitate a smoother setup process.
 5 | 
 6 | Download and Install Anaconda
 7 | -----------------------------
 8 | 
 9 | Visit the `Anaconda Distribution page <https://www.anaconda.com/products/distribution>`_ to download and install the distribution. Please ensure you download the version that includes ``Python 3.9`` or higher.
10 | 
11 | Required Python Packages
12 | ------------------------
13 | The following Python packages are necessary for running Crystal Math:
14 | 
15 | - ``ast``
16 | - ``datetime``
17 | - ``itertools``
18 | - ``json``
19 | - ``matplotlib``
20 | - ``networkx``
21 | - ``numpy``
22 | - ``os``
23 | - ``scipy``
24 | - ``re``
25 | - ``time``
26 | 
27 | These can be installed using the following command:
28 | 
29 | .. code-block:: bash
30 | 
31 |     pip install matplotlib networkx numpy scipy
32 | 
33 | Note that some packages (``ast``, ``datetime``, ``itertools``, ``json``, ``os``, ``re``, ``time``) are part of the Python Standard Library and do not need installation via pip.
34 | 
35 | Installing the CSD Python API
36 | -----------------------------
37 | The current version requires the installation of the CSD Python API, which is crucial for the statistical analysis phase and for retrieving molecular structure data. Due to specific installation instructions and licensing, please refer to the `official installation notes <https://downloads.ccdc.cam.ac.uk/documentation/API/installation_notes.html>`_ for detailed guidance. Adhere strictly to their guidelines to ensure full functionality within the CSP algorithm environment.
38 | 
39 | Installing the code
40 | -------------------
41 | The code itself requires **no installation** of additional software packages or libraries, other than Git for obtaining the code. Simply follow the steps below to clone the repository to your local machine and run the code directly.
42 | 
43 | #. **Git**: Git is a version control system that lets you manage and keep track of your source code history. If you don't already have Git installed, you can download it from `the Git website <https://git-scm.com/downloads>`_.
44 | 
45 | Cloning the Repository
46 | ^^^^^^^^^^^^^^^^^^^^^^
47 | 
48 | Cloning a repository means making a copy of the code on your local machine. This is done via Git. To clone the repository, follow these steps:
49 | 
50 | 1. Open a terminal window. On Windows, you can search for ``CMD`` or ``Command Prompt`` in your start menu. On macOS, you can open the Terminal from your Applications folder under Utilities.
51 | 
52 | 2. Use the following command to clone the repository:
53 |    
54 |    .. code-block:: bash
55 |    
56 |        git clone https://github.com/nigalanakis/Crystal_Math
57 | 
58 | 3. After the cloning process is complete, navigate to the newly created directory:
59 | 
60 |    .. code-block:: bash
61 |    
62 |        cd your-repository


--------------------------------------------------------------------------------
/docs/source/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx>=5.0
2 | sphinx_rtd_theme==1.0.0
3 | 
4 | 


--------------------------------------------------------------------------------
/input_files/input_data_extraction.txt:
--------------------------------------------------------------------------------
 1 | {
 2 |   "data_directory": "../csd_db_analysis/db_data/",
 3 |   "data_prefix": "homomolecular",
 4 |   "get_refcode_families": false,
 5 |   "cluster_refcode_families": false,
 6 |   "get_unique_structures": false,
 7 |   "get_structure_data": false,
 8 |   "get_structure_filter_data": false,
 9 |   "unique_structures_clustering_method": "vdWFV",
10 |   "structure_list": ["csd-unique", "all"],
11 |   "structures_to_exclude": ["BALDUP", "CEMVAS", "DAGRIN", "DAHKUV", "FADGEW", "HUPCUT", "JIKXOT", "LUQDAE", "PEVLOR", "TEVYAV", "VIRLOY", "ZEPDAZ04"],
12 |   "crystal_type": ["homomolecular"],
13 |   "target_species": ["C", "H", "N", "O", "F", "Cl", "Br", "I", "P", "S"],
14 |   "add_symmetric_positions": true,
15 |   "target_space_groups": ["P1", "P-1", "P21", "C2", "Pc", "Cc", "P21/m", "C2/m", "P2/c", "P21/c", "P21/n", "C2/c", "P21212", "P212121", "Pca21", "Pna21", "Pbcn", "Pbca", "Pnma", "R-3", "I41/a"],
16 |   "target_z_prime_values": [1, 2, 3, 4, 5],
17 |   "target_fragments": [],
18 |   "molecule_weight_limit": 500.0,
19 |   "molecule_formal_charges": [0],
20 |   "center_molecule": true,
21 |   "add_full_component": true,
22 |   "fragments_to_check_alignment": [],
23 |   "proposed_vectors_n_max": 5
24 | }
25 | 


--------------------------------------------------------------------------------
/source_code/create_reference_fragments.py:
--------------------------------------------------------------------------------
 1 | import json 
 2 | import numpy as np
 3 | 
 4 | from maths import calculate_inertia
 5 | from maths import center_of_mass
 6 | from maths import ensure_right_handed_coordinate_system
 7 | from maths import sort_eigenvectors
 8 | 
 9 | def create_reference_fragments():
10 |     """ 
11 |     Converts the input fragment list into a space fixed list of fragments.
12 |     
13 |     Parameters
14 |     ----------
15 |     
16 |     Returns
17 |     -------
18 |     reference_fragment_list : dict
19 |         A dictionary with the space fixed reference fragments.
20 |     """
21 |     with open("../source_data/fragment_list.json","r") as f:
22 |         fragment_list = json.load(f)
23 |     
24 |     reference_fragment_list = {}
25 |     for fragment in fragment_list:
26 |         fragment_atoms_mass = np.array(fragment_list[fragment]["mass"])
27 |         fragment_atoms_pos = np.array(fragment_list[fragment]["coordinates"])
28 |         fragment_com = center_of_mass(fragment_atoms_mass,fragment_atoms_pos)
29 |         fragment_atoms_bv = fragment_atoms_pos - fragment_com
30 |     
31 |         inertia_eigenvalues, inertia_eigenvectors = calculate_inertia(fragment_atoms_mass,
32 |                                                                       fragment_atoms_bv)
33 |             
34 |         inertia_eigenvalues, inertia_eigenvectors = sort_eigenvectors(inertia_eigenvalues, 
35 |                                                                       inertia_eigenvectors)
36 |         
37 |         inertia_eigenvectors = ensure_right_handed_coordinate_system(inertia_eigenvectors)
38 |         
39 |         fragment_atoms_sfc = np.matmul(fragment_atoms_bv, 
40 |                                         inertia_eigenvectors)
41 |         
42 |         fragment_atoms_sfc = np.round(fragment_atoms_sfc, decimals=4)
43 |     
44 |         reference_fragment_list[fragment] = {"smarts": fragment_list[fragment]["smarts"],
45 |                                              "species": fragment_list[fragment]["species"],
46 |                                              "coordinates_sf": fragment_atoms_sfc.tolist(),
47 |                                              "mass": fragment_list[fragment]["mass"],
48 |                                              "atoms_to_align": fragment_list[fragment]["atoms_to_align"]}
49 |     
50 |     # Write the reference fragment to json file
51 |     with open('../source_data/reference_fragment_list.json', 'w') as f:
52 |         json.dump(reference_fragment_list, f, indent=4)  
53 |         
54 |     return reference_fragment_list
55 |     
56 | 
57 | 


--------------------------------------------------------------------------------
/source_code/csd_data_extraction.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from datetime import datetime
 3 | from time import process_time as timer   
 4 | 
 5 | from csd_operations import cluster_refcode_families
 6 | from csd_operations import get_refcode_families
 7 | from csd_operations import get_unique_structures
 8 | from generate_molecule_fragments import create_reference_fragments
 9 | from get_structure_data import get_structure_data, get_structure_filter_data
10 | from utilities import convert_seconds_to_hms
11 | 
12 | def main(input_file):
13 |     # Load execution parameters 
14 |     with open('input_files/' + input_file) as f:
15 |         input_parameters = json.load(f)
16 |     
17 |     # Get the refcode families
18 |     if input_parameters["get_refcode_families"]:
19 |         print('Getting the CSD refcode families and the structures in each family.')
20 |         get_refcode_families(input_parameters)
21 |         
22 |     # Cluster refcode families based on structure similarity.
23 |     if input_parameters["cluster_refcode_families"]:
24 |         print('Filter structures based on the user defined criteria and clustering refcode families members based on packing similarity.')
25 |         cluster_refcode_families(input_parameters)
26 |         
27 |     # Get unique structures
28 |     if input_parameters["get_unique_structures"]:
29 |         print('Getting unique structures.')
30 |         get_unique_structures(input_parameters)
31 |         
32 |     # Get structure data
33 |     if input_parameters["get_structure_data"]:
34 |         print('Getting structure data.')
35 |         get_structure_data(input_parameters)
36 |         
37 |     # Get structure filter data
38 |     if input_parameters["get_structure_filter_data"]:
39 |         print('Getting structure filter data.')
40 |         get_structure_filter_data(input_parameters)
41 | 
42 | if __name__ == "__main__":
43 |     input_file = "input_data_extraction.txt"
44 | 
45 |     now = datetime.now()    
46 |     print('#' * 80)
47 |     print('Crystal Math')
48 |     print('A Mathematical and Geometrical Crystal Structure Analyis Protocol')
49 |     print('-' * 80)
50 |     print('Nikos Galanakis')
51 |     print('Research Scientist')
52 |     print('The Tuckerman Group')
53 |     print('New York University')
54 |     print('ng1807@nyu.edu')
55 |     print('=' * 80)
56 |     print("Process started at ", now.strftime("%Y-%m-%d %H:%M:%S"))
57 |     print('-' * 80)
58 |     
59 |     start = timer()
60 |     main(input_file)
61 |     
62 |     cpu_time = timer() - start
63 |     hours, minutes, seconds = convert_seconds_to_hms(cpu_time)
64 |     now = datetime.now()
65 |     print("Process completed at ", now.strftime("%Y-%m-%d %H:%M:%S"))
66 |     print(f"Total computation time: {hours}h {minutes}m {seconds:.2f}s")


--------------------------------------------------------------------------------
/source_code/csd_operations.py:
--------------------------------------------------------------------------------
  1 | import ast 
  2 | import json
  3 | import numpy as np
  4 | import os 
  5 | import ccdc.search
  6 | from ccdc import io
  7 | from ccdc.crystal import PackingSimilarity
  8 | from ccdc.morphology import VisualHabit
  9 | 
 10 | from create_reference_fragments import create_reference_fragments
 11 | from maths import calculate_inertia
 12 | from maths import ensure_right_handed_coordinate_system
 13 | from maths import sort_eigenvectors
 14 | from structure_operations import get_lattice_vectors 
 15 | from structure_operations import get_unique_species 
 16 | from structure_operations import similarity_check
 17 | 
 18 | def structure_check(input_parameters,crystal,molecule):
 19 |     ''' 
 20 |     Performs a check to see if a structure is consistent with the used defined
 21 |     requirements.
 22 |     
 23 |     Parameters
 24 |     ----------
 25 |     input_parameters : dict
 26 |         A dictionary with the input parameters for the search.
 27 |     crystal : csd object
 28 |         The csd crystal for the structure.
 29 |     molecule : csd_object
 30 |         The csd molecule for the structure.
 31 |     
 32 |     Returns
 33 |     -------
 34 |     True if a structure is accepted, None otherwise.
 35 |     '''
 36 |     # Discard structures with based on the Z prime value
 37 |     if crystal.z_prime not in input_parameters['target_z_prime_values']:
 38 |         return None
 39 | 
 40 |     # Discard structures with unwanted space group 
 41 |     if input_parameters['target_space_groups'] != [] and crystal.spacegroup_symbol not in input_parameters['target_space_groups']:
 42 |         return None
 43 |     
 44 |     # Assign unknow bond types, add missing hydrogens and assign 
 45 |     # partial charges to atoms
 46 |     try:
 47 |         molecule.assign_bond_types()
 48 |         molecule.add_hydrogens(mode='missing')
 49 |         molecule.assign_partial_charges()
 50 |     except Exception:
 51 |         return None
 52 |     
 53 |     # Generate atoms
 54 |     try:
 55 |         atoms = molecule.atoms 
 56 |     except Exception:
 57 |         return None
 58 |     
 59 |     # Discard structures with no atoms in the crystal
 60 |     if len(atoms) == 0:
 61 |         return None
 62 |     
 63 |     # Discard structures with missing coordinates:
 64 |     for at in atoms:
 65 |         if at.coordinates == None:
 66 |             return None 
 67 |         
 68 |     # Discard structures based on the their type (homomolecular, co-crystals, hydrates)
 69 |     components = [c.formula for c in molecule.components]
 70 |     if all(item == components[0] for item in components):
 71 |         crystal_type = 'homomolecular'
 72 |     else:
 73 |         if 'H2 O1' in components:
 74 |             crystal_type = 'hydrate'
 75 |         else:
 76 |             crystal_type = 'co-crystal'    
 77 |     if crystal_type not in input_parameters['crystal_type']:
 78 |         return None
 79 |     
 80 |     # Discard structures based on formal charge of molecules
 81 |     for component in molecule.components:
 82 |         if crystal_type == 'homomolecular' and component.formal_charge not in input_parameters['molecule_formal_charges']:
 83 |             return None
 84 |     
 85 |     # Discard structures with out-of-range molecular weight
 86 |     for component in molecule.components:
 87 |         if component.molecular_weight > input_parameters['molecule_weight_limit']:
 88 |             return None
 89 | 
 90 |     # Discard structures with unwanted atomic species
 91 |     if input_parameters['target_species'] != []:
 92 |         for s in get_unique_species(crystal.formula):
 93 |             if s not in input_parameters['target_species']:
 94 |                 return None
 95 |                 
 96 |     return True
 97 | 
 98 | def get_refcode_families(input_parameters):
 99 |     '''
100 |     Reads the CSD database and returns the refcode families and the structures
101 |     for each family.
102 |     
103 |     Parameters
104 |     ----------
105 |     input_parameters : dict
106 |         A dictionary with the user defined input parameters.
107 |     
108 |     Returns
109 |     -------
110 |     refcode_families : dict
111 |         A dictionaty with the refcode families and the structures for each
112 |         family.
113 |     '''
114 |     # Initialize the reader for the CSD
115 |     reader = io.EntryReader('CSD')
116 |    
117 |     # List to hold the matching Refcodes
118 |     refcode_families = {}
119 |    
120 |     # Iterate through all entries in the CSD
121 |     family_i = ''
122 |     for entry in reader: 
123 |         family_j = entry.identifier[:6]
124 |         if family_j != family_i:
125 |             refcode_families[family_j] = [entry.identifier]
126 |         else:
127 |             refcode_families[family_j].append(entry.identifier)
128 |         family_i = family_j
129 |         
130 |     # Specify the filename you want to write to
131 |     filename = '../csd_db_analysis/db_data/' + input_parameters['data_prefix'] + '_csd_refcode_families.json'
132 |     
133 |     # Writing the dictionary to a file in JSON format
134 |     with open(filename, 'w') as f:
135 |         json.dump(refcode_families, f, indent=4)  
136 | 
137 |     return refcode_families
138 | 
139 | def cluster_refcode_families(input_parameters):  
140 |     '''
141 |     Reads the csd families distionary and returns a new dictionary for the 
142 |     refcode familes where the structures are grouped based on their similarity.
143 |     Only strutures consisntent with the user defined criteria are included in 
144 |     the clustered refcode families.
145 |     
146 |     Parameters
147 |     ----------
148 |     input_parameters : dict
149 |         A dictionary with the input parameters for the search.
150 | 
151 |     Returns
152 |     -------
153 |     refcode_families : dict
154 |         A dictionaty with the refcode families and the structures for each
155 |         family grouped based on similarity.
156 |     '''   
157 |     # Open the refcode families file and read data.
158 |     refcode_families_f = '../csd_db_analysis/db_data/' + input_parameters['data_prefix'] + '_csd_refcode_families.json'
159 |     if not os.path.exists(refcode_families_f):
160 |         # If the file does not exist, raise an exception
161 |         raise FileNotFoundError(f'The file {refcode_families_f} does not exist.')
162 |     else:
163 |         # Set the checking similarity engine
164 |         similarity_engine = PackingSimilarity()
165 |         similarity_engine.settings.distance_tolerance = 0.2
166 |         similarity_engine.settings.angle_tolerance = 20.
167 |         similarity_engine.settings.ignore_bond_types = True
168 |         similarity_engine.settings.ignore_hydrogen_counts = True
169 |         similarity_engine.settings.ignore_hydrogen_positions = True
170 |         similarity_engine.settings.packing_shell_size = 15
171 |         
172 |         # Get the families and member structures.
173 |         with open(refcode_families_f) as f:
174 |             data = f.read()
175 |         refcode_families = ast.literal_eval(data)
176 |         
177 |         # Get families with more than one structure. For families with one
178 |         # one structure, add structure to the unique structure lise.
179 |         csd_entries = io.EntryReader('CSD')
180 |         families_clustered = {}
181 |         for family in refcode_families:
182 |             if len(refcode_families[family]) > 1:
183 |                 structures_to_check = []
184 |                 for structure in refcode_families[family]:
185 |                     entry = csd_entries.entry(structure)
186 |                     crystal = entry.crystal
187 |                     molecule = entry.molecule 
188 |     
189 |                     # Check if structure is valid according to search criteria.
190 |                     if structure_check(input_parameters,crystal,molecule) == None:
191 |                         continue
192 |                         
193 |                     structures_to_check.append([structure,crystal])
194 |     
195 |                 # Get similar structures
196 |                 similar_structure_groups = similarity_check(structures_to_check,similarity_engine)
197 |                 
198 |                 # Print out groups of similar structures
199 |                 if len(similar_structure_groups) > 0:
200 |                     families_clustered[family] = []
201 |                     for i, group in enumerate(similar_structure_groups):
202 |                         group = sorted(group)
203 |                         families_clustered[family].append(group)
204 |                 
205 |             else:
206 |                 entry = csd_entries.entry(refcode_families[family][0])
207 |                 crystal = entry.crystal
208 |                 molecule = entry.molecule 
209 |         
210 |                 # Check if structure is valid according to search criteria.
211 |                 if structure_check(input_parameters,crystal,molecule) == None:
212 |                     continue
213 |                     
214 |                 families_clustered[family] = [[refcode_families[family][0]]]
215 |                 
216 |     # Specify the filename for the clustered families
217 |     filename = '../csd_db_analysis/db_data/' + input_parameters['data_prefix'] + '_csd_refcode_families_clustered.json'
218 |     
219 |     # Writing the dictionary to a file in JSON format
220 |     with open(filename, 'w') as f:
221 |         json.dump(families_clustered, f, indent=4)  
222 |     
223 |     return families_clustered
224 | 
225 | def get_unique_structures(input_parameters):
226 |     '''
227 |     Goes through the clustered refcode families and return a single structure
228 |     for each group of similar structures. The resulting structure is based on 
229 |     user defined criteria.
230 |     
231 |     Parameters
232 |     ----------
233 |     input_parameters : dict
234 |         A dictionary with the input parameters for the search.
235 | 
236 |     Returns
237 |     -------
238 |     unique_structures : dict
239 |         A dictionaty with the unique polymorphs for each refcode family.
240 |     '''
241 |     # Set the unique structures clustering method
242 |     unique_structures_clustering_method = input_parameters['unique_structures_clustering_method']
243 |     if unique_structures_clustering_method == 'energy':
244 |         visualhabit_settings = VisualHabit.Settings()
245 |     
246 |     # Open the refcode families clusters file and read data.
247 |     refcode_families_clusters_f  = '../csd_db_analysis/db_data/' + input_parameters['data_prefix'] + '_csd_refcode_families_clustered.json'
248 |     if not os.path.exists(refcode_families_clusters_f ):
249 |         # If the file does not exist, raise an exception
250 |         raise FileNotFoundError(f'The file {refcode_families_clusters_f} does not exist.')
251 |     else:
252 |         # Get the families and member structures.
253 |         with open(refcode_families_clusters_f) as f:
254 |             data = f.read()
255 |         families_clustered = ast.literal_eval(data)
256 |         
257 |         # Loop over refcode family clusters.
258 |         csd_entries = io.EntryReader('CSD')
259 |         unique_structures = {}
260 |         for family in families_clustered:
261 |             # Loop over the numbe rof polymorphs 
262 |             unique_structures[family] = []
263 |             n_polymorphs = len(families_clustered[family])
264 |             for i in range(n_polymorphs): 
265 |                 n_similar_structures = len(families_clustered[family][i])
266 |                 
267 |                 # If the polymorph has only one structure deposited, add 
268 |                 # structure to the dictionary. Else, cluster similar structures.
269 |                 if n_similar_structures == 1:
270 |                     if families_clustered[family][i][0] in input_parameters['structures_to_exclude']:
271 |                         continue
272 |                     unique_structures[family].append(families_clustered[family][i][0])
273 |                 else:
274 |                     # Set the minimum value for the ranking
275 |                     minimum_value = np.inf
276 |                     minimum_value_structure = ''
277 |                     for structure in families_clustered[family][i]:
278 |                         if structure in input_parameters['structures_to_exclude']:
279 |                             continue 
280 |                         
281 |                         entry = csd_entries.entry(structure)
282 |                         crystal = entry.crystal
283 |                         
284 |                         if unique_structures_clustering_method == 'energy':
285 |                             try:
286 |                                 results = VisualHabit(settings=visualhabit_settings).calculate(crystal)
287 |                             except Exception:
288 |                                 continue 
289 |                             lattice_energy = results.lattice_energy.total
290 |                                                         
291 |                             if lattice_energy < minimum_value:
292 |                                 minimum_value = lattice_energy
293 |                                 minimum_value_structure = structure
294 |                         
295 |                         if unique_structures_clustering_method == 'vdWFV':
296 |                             vdWFV = 1.0 - crystal.packing_coefficient
297 |                             
298 |                             if vdWFV < minimum_value:
299 |                                 minimum_value = vdWFV
300 |                                 minimum_value_structure = structure
301 |                                 
302 |                     if minimum_value_structure != '': 
303 |                         unique_structures[family].append(minimum_value_structure)
304 |             unique_structures[family] = sorted(unique_structures[family])
305 |             
306 |     # Specify the filename for the clustered families
307 |     filename = '../csd_db_analysis/db_data/' + input_parameters['data_prefix'] + '_csd_refcode_families_unique_structures.json'
308 |     
309 |     # Writing the dictionary to a file in JSON format
310 |     with open(filename, 'w') as f:
311 |         json.dump(unique_structures, f, indent=4)  
312 | 
313 |     return unique_structures
314 | 
315 | def check_for_target_fragments(input_parameters,molecule):
316 |     fragment_list = create_reference_fragments()
317 | 
318 |     # Check for target fragments
319 |     for fragment in fragment_list:
320 |         if fragment not in input_parameters['target_fragments']:
321 |             continue
322 | 
323 |         csd_fragment = ccdc.search.SMARTSSubstructure(fragment_list[fragment]['smarts'])
324 |         fragmentSearch = ccdc.search.SubstructureSearch()
325 |         fragmentID = fragmentSearch.add_substructure(csd_fragment)
326 |         hits = fragmentSearch.search(molecule)
327 | 
328 |         if hits == []:
329 |             return None
330 |     
331 |     return True
332 |         
333 | def get_csd_atom_and_molecule_properties(crystal,molecule,atoms):
334 |     ''' 
335 |     Extracts and returns the atomic and  molecular properties for a CSD entry. 
336 |     
337 |     Parameters
338 |     ----------
339 |     crystal : csd obj
340 |         The CSD crystal object of the structure.
341 |     molecule : csd obj
342 |         The CSD molecule object of the structure.
343 |     atoms : csd obj
344 |         The CSD atoms object of the structure.
345 |     
346 |     Returns
347 |     -------
348 |     atom_properties : dict
349 |         A dictionary with the atomic properties.
350 |     molecule_properties : dict
351 |         A dictionary with the molecular properties.
352 |     '''
353 |     structure_molecule = {}
354 |     structure_molecule['atoms_charge'] = np.array([at.partial_charge for at in atoms])
355 |     structure_molecule['atoms_labels'] = [at.label for at in atoms]
356 |     structure_molecule['atoms_mass'] = np.round(np.array([at.atomic_weight for at in atoms]),4)
357 |     structure_molecule['atoms_species'] = [at.atomic_symbol for at in atoms]
358 |     structure_molecule['atoms_vdW_radius'] = np.round(np.array([at.vdw_radius for at in atoms]),4)
359 |     structure_molecule['atoms_coordinates_f'] = np.round(np.array([[at.fractional_coordinates[i] for i in [0,1,2]] for at in atoms]),4)
360 |     structure_molecule['atoms_coordinates_c'] = np.round(np.array([[at.coordinates[i] for i in [0,1,2]] for at in atoms]),4)
361 |     structure_molecule['n_atoms'] = len(atoms)
362 |     structure_molecule['coordinates_f'] = np.round(np.sum(structure_molecule['atoms_mass'].reshape(structure_molecule['n_atoms'],1) * structure_molecule['atoms_coordinates_f'],axis=0) / np.sum(structure_molecule['atoms_mass']),4)
363 |     structure_molecule['coordinates_c'] = np.round(np.sum(structure_molecule['atoms_mass'].reshape(structure_molecule['n_atoms'],1) * structure_molecule['atoms_coordinates_c'],axis=0) / np.sum(structure_molecule['atoms_mass']),4)
364 |     structure_molecule['volume'] = np.round(molecule.molecular_volume,4)
365 |     structure_molecule['atoms_bond_vectors_f'] = np.round(structure_molecule['atoms_coordinates_f'] - structure_molecule['coordinates_f'],4)
366 |     structure_molecule['atoms_bond_vectors_c'] = np.round(structure_molecule['atoms_coordinates_c'] - structure_molecule['coordinates_c'],4)
367 |     structure_molecule['bonds'] = [[bond.atoms[0].label, bond.atoms[1].label] for bond in molecule.bonds]
368 |     
369 |     return structure_molecule
370 | 
371 | def get_csd_crystal_properties(crystal):
372 |     ''' 
373 |     Extracts and returns the crystal properties for a CSD entry. 
374 |     
375 |     Parameters
376 |     ----------
377 |     crystal : csd obj
378 |         The CSD crystal object of the structure.
379 |         
380 |     Returns
381 |     -------
382 |     crystal_properties : dict 
383 |         A dictionary with the crystal properties.
384 |     '''
385 |     # Set the engine for energy calculation 
386 |     visualhabit_settings = VisualHabit.Settings()
387 |     visualhabit_settings.potential = 'gavezzotti'
388 |     try:
389 |         energy = VisualHabit(settings=visualhabit_settings).calculate(crystal)
390 |     except Exception:
391 |         energy = None
392 |     if energy != None:
393 |         lattice_energy = energy.lattice_energy
394 |     
395 |     crystal_properties = {}
396 |     crystal_properties['ID'] = crystal.identifier
397 |     crystal_properties['formula'] = crystal.formula
398 |     crystal_properties['species'] = get_unique_species(crystal.formula)
399 |     crystal_properties['space_group'] = crystal.spacegroup_symbol
400 |     crystal_properties['z_crystal'] = crystal.z_value
401 |     crystal_properties['z_prime'] = crystal.z_prime 
402 |     crystal_properties['cell_lengths'] = np.round(np.array([l for l in crystal.cell_lengths]),4)
403 |     crystal_properties['scaled_cell_lengths'] = np.round(np.array([l for l in crystal.cell_lengths])/crystal.cell_lengths[0],4)
404 |     crystal_properties['cell_angles'] = np.round(np.array([l for l in crystal.cell_angles]),2)
405 |     crystal_properties['cell_volume'] = np.round(crystal.cell_volume,4) 
406 |     crystal_properties['cell_density'] = np.round(crystal.calculated_density,4)
407 |     crystal_properties['vdWFV'] = np.round(1.0 - crystal.packing_coefficient,4)
408 |     crystal_properties['SAS'] = np.round(crystal.void_volume(probe_radius=1.2,grid_spacing=0.2,mode='accessible'),4)
409 |     crystal_properties['lattice_vectors'] = np.round(get_lattice_vectors(crystal_properties['cell_lengths'],crystal_properties['cell_angles'],crystal_properties['cell_volume']),4)
410 |     crystal_properties['inverse_lattice_vectors'] = np.round(get_lattice_vectors(crystal_properties['cell_lengths'],crystal_properties['cell_angles'],crystal_properties['cell_volume'],inverse=True),4)
411 |     crystal_properties['close_contacts'] = crystal.contacts(intermolecular='Intermolecular',distance_range=(-3.0, 0.50)) 
412 |     crystal_properties['hbonds'] = crystal.hbonds(intermolecular='Intermolecular')
413 |     if energy != None:
414 |         crystal_properties['lattice_energy'] = {
415 |             'total': np.round(lattice_energy.total,4), 
416 |             'electrostatic': np.round(lattice_energy.electrostatic,4),
417 |             'vdW': np.round(lattice_energy.vdw,4),
418 |             'vdW_attraction': np.round(lattice_energy.vdw_attraction,4),
419 |             'vdW_repulsion': np.round(lattice_energy.vdw_repulsion,4),
420 |             'h-bond': np.round(lattice_energy.h_bond,4),
421 |             'h-bond_attraction': np.round(lattice_energy.h_bond_attraction,4),
422 |             'h-bond_repulsion': np.round(lattice_energy.h_bond_repulsion,4)
423 |             }
424 |     else:
425 |         crystal_properties['lattice_energy'] = {
426 |             'total': 0.0, 
427 |             'electrostatic': 0.0,
428 |             'vdW': 0.0,
429 |             'vdW_attraction': 0.0,
430 |             'vdW_repulsion': 0.0,
431 |             'h-bond': 0.0,
432 |             'h-bond_attraction': 0.0,
433 |             'h-bond_repulsion': 0.0}
434 |     return crystal_properties
435 | 
436 | def get_csd_structure_fragments(input_parameters,structure,molecule):
437 |     ''' 
438 |     Identify and returns the fragments in a molecule 
439 |     
440 |     Parameters
441 |     ----------
442 |     input_parameters : dict
443 |         A dictionary with the user defined input parameters.
444 |     structure : dict
445 |         A disctionary with the data for the structure.
446 |     molecule : object
447 |         The csd molecule object for the structure.
448 |         
449 |     Returns
450 |     -------
451 |     str_fragments : dict
452 |         A dictionary with the identified fragments in the molecule
453 |     '''
454 |     # Update the reference fragment list 
455 |     fragment_list = create_reference_fragments()
456 |     
457 |     # Get the fragments for the structure
458 |     fragments = {}
459 |     i_hit = 0
460 |     for fragment in fragment_list:
461 |         csd_fragment = ccdc.search.SMARTSSubstructure(fragment_list[fragment]['smarts'])
462 |         fragmentSearch = ccdc.search.SubstructureSearch()
463 |         fragmentID = fragmentSearch.add_substructure(csd_fragment)
464 |         hits = fragmentSearch.search(molecule)
465 |         for hit in hits:
466 |             i_hit += 1
467 |             key = 'F' + str(i_hit).zfill(2) + '.' + fragment
468 |             hit_atoms = []
469 |             hit_atoms_species = []
470 |             hit_atoms_labels = []
471 |             for at in hit.match_atoms():
472 |                 hit_atoms.append(structure['molecule']['atoms_labels'].index(at.label))
473 |                 hit_atoms_species.append(at.atomic_symbol)
474 |                 hit_atoms_labels.append(at.label)
475 |             fragments[key] = {}
476 |             fragments[key]['smarts'] = fragment_list[fragment]['smarts']
477 |             fragments[key]['atoms'] = hit_atoms
478 |             fragments[key]['atoms_species'] = hit_atoms_species
479 |             fragments[key]['atoms_labels'] = hit_atoms_labels
480 |             fragments[key]['atoms_mass'] = np.round(np.array(fragment_list[fragment]['mass']),4)
481 |             fragments[key]['n_atoms'] = len(fragments[key]['atoms'])
482 |             fragments[key]['atoms_coordinates_c'] = np.round(np.array(structure['molecule']['atoms_coordinates_c'][hit_atoms]),4)
483 |             fragments[key]['atoms_coordinates_f'] = np.round(np.array(structure['molecule']['atoms_coordinates_f'][hit_atoms]),4)
484 |             fragments[key]['atoms_coordinates_sf'] = np.round(np.array(fragment_list[fragment]['coordinates_sf']),4)
485 |             fragments[key]['atoms_to_align'] = fragment_list[fragment]['atoms_to_align']
486 |             fragments[key]['coordinates_c'] = np.round(np.sum(fragments[key]['atoms_mass'].reshape(fragments[key]['n_atoms'],1) * fragments[key]['atoms_coordinates_c'],axis=0) / np.sum(fragments[key]['atoms_mass']),4)
487 |             fragments[key]['coordinates_f'] = np.round(np.sum(fragments[key]['atoms_mass'].reshape(fragments[key]['n_atoms'],1) * fragments[key]['atoms_coordinates_f'],axis=0) / np.sum(fragments[key]['atoms_mass']),4)
488 |             fragments[key]['atoms_bond_vectors_c'] = np.round(fragments[key]['atoms_coordinates_c'] - fragments[key]['coordinates_c'],4)
489 |             fragments[key]['atoms_bond_vectors_f'] = np.round(fragments[key]['atoms_coordinates_f'] - fragments[key]['coordinates_f'],4)
490 |             
491 |     # Remove subsets (sub-fragments)
492 |     entries_to_remove = set()
493 | 
494 |     # Compare all pairs of keys
495 |     for key1 in fragments:
496 |         for key2 in fragments:
497 |             if key1 != key2 and key1 not in entries_to_remove and key2 not in entries_to_remove:
498 |                 if fragments[key1]['smarts'] == fragments[key2]['smarts']:
499 |                     continue
500 |                 
501 |                 atoms1 = set(fragments[key1]['atoms_labels'])
502 |                 atoms2 = set(fragments[key2]['atoms_labels'])
503 |                 
504 |                 # Check if atoms of entry1 are subset of entry2
505 |                 if atoms1.issubset(atoms2):
506 |                     entries_to_remove.add(key1)
507 |                 elif atoms2.issubset(atoms1):
508 |                     entries_to_remove.add(key2)
509 |     
510 |     # Remove identified keys from the dictionary
511 |     for key in entries_to_remove:
512 |         del fragments[key]
513 |         
514 |     # Add a fragment number ID
515 |     str_fragments = {}
516 |     for i, key in enumerate(fragments):
517 |         new_key = 'F' + str(i + 1).zfill(2) + '.' + key[4:]
518 |         str_fragments[new_key] = fragments[key]
519 |         
520 |     # Add fragments for full components 
521 |     if input_parameters['add_full_component']:
522 |         for i, component in enumerate(molecule.components):
523 |             key = 'FMC.component_' + str(i + 1)
524 |             str_fragments[key] = {}
525 |             str_fragments[key]['atoms_labels'] =  [at.label for at in component.atoms]
526 |             str_fragments[key]['atoms'] = [structure['molecule']['atoms_labels'].index(at.label) for at in component.atoms]
527 |             str_fragments[key]['atoms_species'] = [at.atomic_symbol for at in component.atoms]
528 |             str_fragments[key]['atoms_mass'] = np.round(np.array([at.atomic_weight for at in component.atoms]),4)
529 |             str_fragments[key]['n_atoms'] = len(component.atoms)
530 |             str_fragments[key]['atoms_coordinates_c'] = np.round(np.array([at.coordinates for at in component.atoms]),4)
531 |             str_fragments[key]['atoms_coordinates_f'] = np.round(np.array([at.fractional_coordinates for at in component.atoms]),4)
532 |             str_fragments[key]['atoms_to_align'] = 'all'
533 |             str_fragments[key]['coordinates_c'] = np.round(np.sum(str_fragments[key]['atoms_mass'].reshape(str_fragments[key]['n_atoms'],1) * str_fragments[key]['atoms_coordinates_c'],axis=0) / np.sum(str_fragments[key]['atoms_mass']),4)
534 |             str_fragments[key]['coordinates_f'] = np.round(np.sum(str_fragments[key]['atoms_mass'].reshape(str_fragments[key]['n_atoms'],1) * str_fragments[key]['atoms_coordinates_f'],axis=0) / np.sum(str_fragments[key]['atoms_mass']),4)
535 |             str_fragments[key]['atoms_bond_vectors_c'] = np.round(str_fragments[key]['atoms_coordinates_c'] - str_fragments[key]['coordinates_c'],4)
536 |             str_fragments[key]['atoms_bond_vectors_f'] = np.round(str_fragments[key]['atoms_coordinates_c'] - str_fragments[key]['coordinates_c'],4)
537 |             
538 |             # Set the rotation of the full component
539 |             inertia_eigenvalues, inertia_eigenvectors = calculate_inertia(str_fragments[key]['atoms_mass'],str_fragments[key]['atoms_bond_vectors_c'])                
540 |             inertia_eigenvalues, inertia_eigenvectors = sort_eigenvectors(inertia_eigenvalues,inertia_eigenvectors)
541 |             inertia_eigenvectors = ensure_right_handed_coordinate_system(inertia_eigenvectors)
542 |             
543 |             str_fragments[key]['atoms_coordinates_sf']  = np.round(np.round(np.matmul(str_fragments[key]['atoms_bond_vectors_c'],inertia_eigenvectors), decimals=4),4)
544 |             
545 |     return str_fragments
546 | 


--------------------------------------------------------------------------------
/source_code/generate_molecule_fragments.py:
--------------------------------------------------------------------------------
  1 | import ast 
  2 | import json 
  3 | import numpy as np
  4 | 
  5 | from maths import calculate_inertia
  6 | from maths import center_of_mass
  7 | from maths import ensure_right_handed_coordinate_system
  8 | from maths import sort_eigenvectors
  9 | from structure_operations import get_atoms_from_formula
 10 | 
 11 | def create_reference_fragments():
 12 |     """ 
 13 |     Converts the input fragment list into a space fixed list of fragments.
 14 |     
 15 |     Parameters
 16 |     ----------
 17 |     
 18 |     Returns
 19 |     -------
 20 |     reference_fragment_list : dict
 21 |         A dictionary with the space fixed reference fragments.
 22 |     """
 23 |     with open("../source_data/fragment_list.json","r") as f:
 24 |         fragment_list = json.load(f)
 25 |     
 26 |     reference_fragment_list = {}
 27 |     for fragment in fragment_list:
 28 |         fragment_atoms_mass = np.array(fragment_list[fragment]["mass"])
 29 |         fragment_atoms_pos = np.array(fragment_list[fragment]["coordinates"])
 30 |         fragment_com = center_of_mass(fragment_atoms_mass,fragment_atoms_pos)
 31 |         fragment_atoms_bv = fragment_atoms_pos - fragment_com
 32 |     
 33 |         inertia_eigenvalues, inertia_eigenvectors = calculate_inertia(fragment_atoms_mass,
 34 |                                                                       fragment_atoms_bv)
 35 |             
 36 |         inertia_eigenvalues, inertia_eigenvectors = sort_eigenvectors(inertia_eigenvalues, 
 37 |                                                                       inertia_eigenvectors)
 38 |         
 39 |         inertia_eigenvectors = ensure_right_handed_coordinate_system(inertia_eigenvectors)
 40 |         
 41 |         fragment_atoms_sfc = np.matmul(fragment_atoms_bv, 
 42 |                                         inertia_eigenvectors)
 43 |         
 44 |         fragment_atoms_sfc = np.round(fragment_atoms_sfc, decimals=4)
 45 |     
 46 |         reference_fragment_list[fragment] = {"smarts": fragment_list[fragment]["smarts"],
 47 |                                              "species": fragment_list[fragment]["species"],
 48 |                                              "coordinates_sf": fragment_atoms_sfc.tolist(),
 49 |                                              "mass": fragment_list[fragment]["mass"],
 50 |                                              "atoms_to_align": fragment_list[fragment]["atoms_to_align"]}
 51 |     
 52 |     # Write the reference fragment to json file
 53 |     with open('../source_data/reference_fragment_list.json', 'w') as f:
 54 |         json.dump(reference_fragment_list, f, indent=4)  
 55 |         
 56 |     return 
 57 | 
 58 | def get_reference_fragment_list():
 59 |     '''
 60 |     Returns the reference fragment list.
 61 |     '''    
 62 |     with open('../source_data/reference_fragment_list.json','r') as f:
 63 |         reference_fragment_list = json.load(f)
 64 |     return reference_fragment_list
 65 | 
 66 | def get_molecule_fragments(input_fragments,reference_fragment_list):
 67 |     '''
 68 |     Returns the fragments for the reference molecule.
 69 |     
 70 |     Parameters
 71 |     ----------
 72 |     input_fragments : list
 73 |         A list with the fragments for the compound.
 74 |     reference_fragment_list : dict
 75 |         A dictionary with the reference fragments used to build the molecules.
 76 |         
 77 |     Returns
 78 |     -------
 79 |     int
 80 |         The number of fragments.
 81 |     molecule_fragments : dict
 82 |         A dictionary with the properties of the fragments.
 83 |             
 84 |     '''
 85 |     molecule_fragments = {}
 86 |     fragment_count = {}  # Keep track of how many times each fragment has appeared
 87 |       
 88 |     for fragment in input_fragments:
 89 |         if fragment in fragment_count:
 90 |             # If the fragment has appeared before, increment its count
 91 |             fragment_count[fragment] += 1
 92 |             # Use the fragment name and its count to create a unique key
 93 |             unique_key = f"{fragment}_{fragment_count[fragment]}"
 94 |         else:
 95 |             # If it's the first time the fragment appears, initialize its count
 96 |             fragment_count[fragment] = 1
 97 |             unique_key = fragment
 98 |             
 99 |         # Use the unique key to store the fragment in the molecule_fragments dictionary
100 |         molecule_fragments[unique_key] = reference_fragment_list[fragment]
101 |     
102 |     return len(molecule_fragments), molecule_fragments
103 | 
104 | def calculate_molecular_volume(formula,compound_rings,atomic_properties):
105 |     '''
106 |     Calculates the molecular vdW volume
107 |         J. Org. Chem. 2003, 68, 19, 7368–7373
108 | 
109 |     Parameters
110 |     ----------
111 |     formula : str 
112 |         The molecular formula.
113 |     compound_rings : dictionary
114 |         A dictionary with the number of aromatic and aliphatic rings.
115 |     atomic_properties : dict
116 |         A dictionary containing the atomic properties.
117 | 
118 |     Returns
119 |     -------
120 |     molecular_volume : float
121 |         The molecular vdW volume.
122 |     '''
123 |     # Set the number of aromatic and aliphatic rings from the dictionary 
124 |     n_rings = [compound_rings['aromatic'], compound_rings['aliphatic']]
125 |     
126 |     # Get the count of atoms for each species 
127 |     species_counts, n_atoms, _ = get_atoms_from_formula(formula)
128 | 
129 |     # Calculate the number of bonds
130 |     n_bonds = n_atoms - 1 + n_rings[0] + n_rings[1]
131 | 
132 |     # Calculate total atomic vdW volume
133 |     atomic_vdW_volume = np.sum([species_counts[key] * (4.0 * np.pi * atomic_properties[key]['van_der_waals_radius']**3 / 3) for key in species_counts])
134 | 
135 |     # Calculate the molecular volume
136 |     molecular_volume = atomic_vdW_volume - 5.92 * n_bonds - 14.7 * n_rings[0] - 3.8 * n_rings[1]
137 | 
138 |     return molecular_volume
139 | 
140 | def calculate_molecular_weight(formula,atomic_properties):
141 |     '''
142 |     Calculates the molecular weight from formula
143 | 
144 |     Parameters
145 |     ----------
146 |     formula : str
147 |         The molecular formula.
148 |     atomic_properties : dict
149 |         A dictionary containing the atomic properties.
150 | 
151 |     Returns
152 |     -------
153 |     molecular_weight : float
154 |         The molecular weight
155 |     '''
156 |     # Get the count of atoms for each species 
157 |     species_counts, n_atoms, _ = get_atoms_from_formula(formula)
158 | 
159 |     # Calculate the molecular weight
160 |     molecular_weight = np.sum([species_counts[atom]*atomic_properties[atom]['atomic_mass'] for atom in species_counts])
161 | 
162 |     return molecular_weight
163 | 
164 | def generate_fragments(input_parameters,atomic_properties):
165 |     '''
166 |     Reads input data from input files.
167 | 
168 |     Parameters
169 |     ----------
170 |     input_parameters : dict
171 |         A dictionary containing the input parameters.
172 | 
173 |     Returns
174 |     -------
175 |     n_fragments : int
176 |         The number of fragments in the molecule.
177 |     fragments : dict
178 |         A dictionary with the fragment properties.
179 |     reference_molecule : dict
180 |         A dictionary containing the reference molecule properties.
181 |     '''
182 |     # Read the reference fragment list 
183 |     reference_fragment_list = get_reference_fragment_list()
184 |     
185 |     # Get the fragments for the molecule
186 |     n_fragments, fragments = get_molecule_fragments(input_parameters['fragments'],
187 |                                                     reference_fragment_list)
188 |     
189 |     # Initialize reference molecule dictionary
190 |     reference_molecule = {'formula': input_parameters['formula']} 
191 |     
192 |     # Get the molecular volume
193 |     reference_molecule['volume'] = calculate_molecular_volume(input_parameters['formula'],
194 |                                                               input_parameters['rings'],
195 |                                                               atomic_properties)
196 |     
197 |     # Calculate the molecular weight
198 |     reference_molecule['weight'] = calculate_molecular_weight(input_parameters['formula'],
199 |                                                               atomic_properties)
200 | 
201 |     return n_fragments, fragments, reference_molecule


--------------------------------------------------------------------------------
/source_code/get_analysis_data.py:
--------------------------------------------------------------------------------
  1 | import copy 
  2 | import json 
  3 | import numpy as np
  4 | from collections import OrderedDict
  5 | from space_group_operations import apply_symmetry_operations
  6 | from structure_operations import get_lattice_vectors
  7 | 
  8 | def get_user_variables(input_parameters,variables):
  9 |     '''
 10 |     Gets the variables for the analysis/plotting
 11 |     
 12 |     Parameters
 13 |     ----------    
 14 |     input_parameters : dict
 15 |         The user defined input file
 16 |     variables : dict
 17 |         A dictionary with the available variables
 18 |         
 19 |     Returns
 20 |     -------
 21 |     A list with the unique variables for the analysis/plotting
 22 |     '''
 23 |     # Get the variables for the analysis
 24 |     user_variables = []
 25 |     if input_parameters['histograms_options']['variables'] != 'all':
 26 |         user_variables.extend([var[0][0] for var in input_parameters['histograms_options']['variables']])
 27 |     else:
 28 |         for key in variables:
 29 |             for var in variables[key]:
 30 |                 user_variables.append(var)
 31 |     if input_parameters['2D_scatter_plots_options']['variables'] != 'all':
 32 |         user_variables.extend([var[0][0] for var in input_parameters['2D_scatter_plots_options']['variables']])
 33 |         user_variables.extend([var[0][1] for var in input_parameters['2D_scatter_plots_options']['variables']])
 34 |     else:
 35 |         for key in variables:
 36 |             for var in variables[key]:
 37 |                 user_variables.append(var)
 38 |     if input_parameters['3D_scatter_plots_options']['variables'] != 'all':
 39 |         user_variables.extend([var[0][0] for var in input_parameters['3D_scatter_plots_options']['variables']])
 40 |         user_variables.extend([var[0][1] for var in input_parameters['3D_scatter_plots_options']['variables']])
 41 |         user_variables.extend([var[0][2] for var in input_parameters['3D_scatter_plots_options']['variables']])
 42 |     else:
 43 |         for key in variables:
 44 |             for var in variables[key]:
 45 |                 user_variables.append(var)
 46 |         
 47 |     return sorted(list(set(user_variables)))
 48 | 
 49 | def get_value(data, path):
 50 |     '''
 51 |     Gets value for a specific variable
 52 | 
 53 |     Parameters
 54 |     ----------
 55 |     data : dict
 56 |         A dictionary with the structure data.
 57 |     path : str
 58 |         The path to read the variable value from the structure data dictionary.
 59 | 
 60 |     Returns
 61 |     -------
 62 |     data : float, str or bool
 63 |         The value for the specific variable in the structure.
 64 |     '''
 65 |     for key in path:
 66 |         data = data[key]
 67 |     return data
 68 | 
 69 | def get_analysis_structures_list(input_parameters):
 70 |     # Get the structures list based on the user defined filters
 71 |     structures_list = {}
 72 |     structures_filter_data_filename = input_parameters['data_directory'] + input_parameters['data_prefix'] + '_structures_filter_data.json'
 73 | 
 74 |     filter_groups = {
 75 |         'single': {
 76 |             'target_space_groups': 'space_group',
 77 |             'target_z_crystal_values': 'z_crystal',
 78 |             'target_z_prime_values': 'z_prime',
 79 |             'target_species': 'species'
 80 |         },
 81 |         'single_combinations': {
 82 |             'target_structure_fragments': 'fragments',
 83 |             'target_contact_central_fragments': 'contact_central_fragments'
 84 |         },
 85 |         'multiple_combinations': {
 86 |             'target_contact_pairs': 'contact_pairs',
 87 |             'target_contact_fragment_pairs': 'contact_fragment_pairs'
 88 |         }
 89 |     }
 90 |             
 91 |     with open(structures_filter_data_filename) as f:
 92 |         structures_filter_data = json.load(f)
 93 |         
 94 |         for structure, values in zip(structures_filter_data.keys(),structures_filter_data.values()):
 95 |             accept_structure = True
 96 | 
 97 |             if input_parameters['target_families'] != None and structure[:6] not in input_parameters['target_families']:
 98 |                 accept_structure = False
 99 | 
100 |             if input_parameters['target_structures'] != None and structure not in input_parameters['target_structures']:
101 |                 accept_structure = False
102 | 
103 |             for filter, property in zip(filter_groups['single'].keys(),filter_groups['single'].values()):
104 |                 if input_parameters[filter] != None and values[property] not in input_parameters[filter]:
105 |                     accept_structure = False
106 | 
107 |             for filter, property in zip(filter_groups['single_combinations'].keys(),filter_groups['single_combinations'].values()):
108 |                 if input_parameters[filter] != None:
109 |                     if input_parameters[filter][1] == 'or':
110 |                         if not any(item in set(values[property]) for item in input_parameters[filter][0]):
111 |                             accept_structure = False
112 |                     elif input_parameters[filter][1] == 'and':
113 |                         if not set(input_parameters[filter][0]).issubset(set(values[property])):
114 |                             accept_structure = False
115 | 
116 |             for filter, property in zip(filter_groups['multiple_combinations'].keys(),filter_groups['multiple_combinations'].values()):
117 |                 if input_parameters[filter] != None:
118 |                     if input_parameters[filter][1] == 'or':
119 |                         if not any(tuple(item) in set(tuple(x) for x in values[property]) for item in input_parameters[filter][0]):
120 |                             accept_structure = False
121 |                     elif input_parameters[filter][1] == 'and':
122 |                         if not set(tuple(x) for x in input_parameters[filter][0]).issubset(set(tuple(x) for x in values[property])):
123 |                             accept_structure = False
124 | 
125 |             if not accept_structure:
126 |                 continue 
127 |             
128 |             space_group = structures_filter_data[structure]['space_group']
129 |             if space_group != 'R-3':
130 |                 if space_group not in structures_list:
131 |                     structures_list[space_group] = []
132 |                 structures_list[space_group].append(structure)
133 |     return structures_list
134 |     
135 | def get_analysis_data(input_parameters,variables):
136 |     '''
137 |     Gets the data for the plots.
138 | 
139 |     Parameters
140 |     ----------
141 |     input_parameters : dict
142 |         A dictionary with the user defined input parameters.
143 |     variables : dict
144 |         A dictionary with the available variables and their properties.
145 | 
146 |     Returns
147 |     -------
148 |     data : dict
149 |         A dictionary with the required data to create the user defined plots.
150 | 
151 |     '''
152 |     
153 |     # Get the variables for the analysis
154 |     user_variables = get_user_variables(input_parameters,variables)
155 |     
156 |     # Get the structures list for the analysis based on the used defined filters
157 |     structures_list = get_analysis_structures_list(input_parameters)
158 | 
159 |     # Set the structures folder
160 |     structure_files_folder = input_parameters['data_directory'] + input_parameters['data_prefix'] + '_structures/'
161 | 
162 |     # Get the user variables families
163 |     variable_families = [variables[var]['family'] for var in user_variables]
164 |     variable_families = sorted(list(set(variable_families)))
165 | 
166 |     # Initialize  the data dictionary for the analysis
167 |     data = {key: {space_group: {} for space_group in structures_list} for key in variable_families}
168 | 
169 |     # Set the data filter dependencies (variables that should be added for filtering data)
170 |     filter_dependencies = {
171 |         'structure': ['z_crystal','z_prime'],
172 |         'fragment': ['z_crystal','z_prime','fragment'],
173 |         'contact': ['z_crystal','z_prime','cc_central_atom_fragment','cc_contact_atom_fragment','cc_central_atom_species','cc_contact_atom_species','cc_type','cc_is_in_los'],
174 |         'fragment_atom': ['z_crystal','z_prime','fragment','fragment_atom_species'],
175 |         'contact_atom': ['z_crystal','z_prime','cc_central_atom_fragment','cc_contact_atom_fragment','cc_central_atom_species','cc_contact_atom_species','cc_type','cc_is_in_los','cc_length']}
176 | 
177 |     # Add variables to the data ditionary for data filter dependencies
178 |     for variable_family in variable_families:
179 |         for space_group in structures_list:
180 |             for denepdency in filter_dependencies[variable_family]:
181 |                 data[variable_family][space_group][denepdency] = []
182 | 
183 |     # Add user variables to data dictionary
184 |     for var in user_variables:
185 |         for space_group in structures_list:
186 |             data[variables[var]['family']][space_group][var] = []
187 | 
188 |     # Check if additional positional variables should be added to calculate coordinate transformations from fractional to cartesian and via versa.
189 |     variable_groups = {}
190 |     for var in variables:
191 |         variable_group = variables[var]['position_symmetry'][3]
192 |         if str(variable_group) not in variable_groups:
193 |             variable_groups[str(variable_group)] = []
194 |         
195 |         variable_groups[str(variable_group)].append(var)
196 | 
197 |     for variable_family in data:
198 |         for space_group in structures_list:
199 |             add_zero = False
200 |             family_variables = [key for key in data[variable_family][space_group]]
201 |             for var in family_variables:
202 |                 variable_group = variables[var]['position_symmetry'][3]
203 |                 if variable_group > 0 and var[-2:] in ['_x','_y','_z','_u','_v','_w']:
204 |                     add_zero = True
205 |                     for group_var in variable_groups[str(variable_group)]:
206 |                         if group_var not in data[variable_family][space_group]:
207 |                             data[variable_family][space_group][group_var] = [] 
208 |             if add_zero:
209 |                 for var in variable_groups['0']:
210 |                     data[variable_family][space_group][var] = [] 
211 | 
212 |     # Get data from indivirdual structure files grouped by space group
213 |     for space_group in structures_list:
214 |         for structure in structures_list[space_group]:
215 |             with open(structure_files_folder + structure + '.json') as f:
216 |                 # Read the current structure data
217 |                 structure_data = json.load(f)
218 | 
219 |             # Add structure data to the data dictionary for the analysis
220 |             for variable_family in variable_families:
221 |                 for var in data[variable_family][space_group]:
222 |                     
223 |                     if variable_family == 'structure': 
224 |                         path = copy.deepcopy(variables[var]['path']) 
225 |                         value = get_value(structure_data, path)
226 |                         data[variable_family][space_group][var].append(value)
227 |                     
228 |                     if variable_family == 'fragment': 
229 |                         if variables[var]['family'] == 'fragment':
230 |                             for fragment_key in structure_data['fragments']:
231 |                                 path = copy.deepcopy(variables[var]['path'])
232 |                                 path[1] = fragment_key
233 |                                 value = get_value(structure_data, path)
234 |                                 data[variable_family][space_group][var].append(value)
235 |                         if variables[var]['family'] == 'structure':
236 |                             for fragment_key in structure_data['fragments']:
237 |                                 path = copy.deepcopy(variables[var]['path'])
238 |                                 value = get_value(structure_data, path)
239 |                                 data[variable_family][space_group][var].append(value)
240 |                         
241 |                     if variable_family == 'fragment_atom': 
242 |                         if variables[var]['family'] == 'fragment_atom':
243 |                             for fragment_key in structure_data['fragments']:
244 |                                 path = copy.deepcopy(variables[var]['path'])
245 |                                 path[1] = fragment_key
246 |                                 for atom_key in structure_data['fragments'][fragment_key]['atoms']:
247 |                                     path[3] = atom_key
248 |                                     value = get_value(structure_data, path)
249 |                                     data[variable_family][space_group][var].append(value)
250 |                         if variables[var]['family'] == 'fragment':
251 |                             for fragment_key in structure_data['fragments']:
252 |                                 for atom_key in structure_data['fragments'][fragment_key]['atoms']:
253 |                                     path = copy.deepcopy(variables[var]['path'])
254 |                                     path[1] = fragment_key
255 |                                     value = get_value(structure_data, path)
256 |                                     data[variable_family][space_group][var].append(value)
257 |                         if variables[var]['family'] == 'structure':
258 |                             for fragment_key in structure_data['fragments']:
259 |                                 for atom_key in structure_data['fragments'][fragment_key]['atoms']:
260 |                                     path = copy.deepcopy(variables[var]['path'])
261 |                                     value = get_value(structure_data, path)
262 |                                     data[variable_family][space_group][var].append(value)
263 |                                 
264 |                     if variable_family == 'contact':
265 |                         if variables[var]['family'] == 'contact':
266 |                             for pair_key in structure_data['crystal']['close_contacts']:
267 |                                 path = copy.deepcopy(variables[var]['path']) 
268 |                                 path[2] = pair_key
269 |                                 value = get_value(structure_data, path)
270 |                                 data[variable_family][space_group][var].append(value)  
271 |                         if variables[var]['family'] == 'contact_atom' and var in ['cc_central_atom_fragment','cc_contact_atom_fragment','cc_central_atom_species','cc_contact_atom_species']:
272 |                             for pair_key in structure_data['crystal']['close_contacts']:
273 |                                 path = copy.deepcopy(variables[var]['path']) 
274 |                                 path[2] = pair_key
275 |                                 path[3] = var[:15]
276 |                                 value = get_value(structure_data, path)
277 |                                 data[variable_family][space_group][var].append(value)
278 |                         if variables[var]['family'] == 'structure':
279 |                             for pair_key in structure_data['crystal']['close_contacts']:
280 |                                 path = copy.deepcopy(variables[var]['path'])
281 |                                 value = get_value(structure_data, path)
282 |                                 data[variable_family][space_group][var].append(value)
283 |                                 
284 |                     if variable_family == 'contact_atom':
285 |                         if variables[var]['family'] == 'contact_atom':
286 |                             for pair_key in structure_data['crystal']['close_contacts']:
287 |                                 path = copy.deepcopy(variables[var]['path']) 
288 |                                 path[2] = pair_key
289 |                                 path[3] = var[:15]
290 |                                 value = get_value(structure_data, path)
291 |                                 data[variable_family][space_group][var].append(value) 
292 |                         if variables[var]['family'] == 'contact':
293 |                             for pair_key in structure_data['crystal']['close_contacts']:
294 |                                 path = copy.deepcopy(variables[var]['path']) 
295 |                                 path[2] = pair_key
296 |                                 value = get_value(structure_data, path)
297 |                                 data[variable_family][space_group][var].append(value)  
298 |                         if variables[var]['family'] == 'structure':
299 |                             for pair_key in structure_data['crystal']['close_contacts']:
300 |                                 path = copy.deepcopy(variables[var]['path'])
301 |                                 value = get_value(structure_data, path)
302 |                                 data[variable_family][space_group][var].append(value)
303 | 
304 |     # Apply symmetry operations to atomic coordinates
305 |     # Get the space group properties
306 |     with open('../source_data/space_group_properties.json') as f:
307 |         space_group_properties = json.load(f)
308 |         
309 |     for variable_family in data:
310 |         for space_group in data[variable_family]:
311 |             # Sort data based on variable name for the correct application of symmetry operations 
312 |             data[variable_family][space_group] = OrderedDict(sorted(data[variable_family][space_group].items()))
313 | 
314 |             # Load symmetry operations for the current space group
315 |             symmetry_operations = space_group_properties[space_group]['symmetry_operations']
316 | 
317 |             # Get the groups of variables
318 |             groups = []
319 |             for var in data[variable_family][space_group].keys():
320 |                 group = variables[var]['position_symmetry']
321 |                 if group[3] > 0 and group not in groups:
322 |                     groups.append(group)
323 | 
324 |             # Get the symmetry groups
325 |             symmetry_groups = []
326 |             for group in groups:
327 |                 group_variables = [var for var in variables if variables[var]['position_symmetry'][3] == group[3]]
328 |                 symmetry_groups.append([group[0],group[1],group_variables])
329 | 
330 |             # Get the coordinates for the symemtric atoms
331 |             symmetric_variables = []
332 |             for rotation, translation, group_variables in symmetry_groups:
333 |                 symmetric_variables.extend([var for var in group_variables])
334 |                 if group_variables[0][-2:] == '_u':
335 |                     fractional_positions = np.transpose([data[variable_family][space_group][group_variables[0]],
336 |                                                           data[variable_family][space_group][group_variables[1]],
337 |                                                           data[variable_family][space_group][group_variables[2]]])
338 |                     
339 |                 elif group_variables[0][-2:] == '_x':
340 |                     cartesian_positions = np.transpose([data[variable_family][space_group][group_variables[0]],
341 |                                                         data[variable_family][space_group][group_variables[1]],
342 |                                                         data[variable_family][space_group][group_variables[2]]])
343 |                     cell_parameters = np.transpose([data[variable_family][space_group]['cell_length_a'],
344 |                                                     data[variable_family][space_group]['cell_length_b'],
345 |                                                     data[variable_family][space_group]['cell_length_c'],
346 |                                                     data[variable_family][space_group]['cell_angle_alpha'],
347 |                                                     data[variable_family][space_group]['cell_angle_beta'],
348 |                                                     data[variable_family][space_group]['cell_angle_gamma'],
349 |                                                     data[variable_family][space_group]['cell_volume']])
350 |                     
351 |                     fractional_positions = []
352 |                     lattice_vectors = []
353 |                     for (x, y, z), (a, b, c, alpha, beta, gamma, omega) in zip(cartesian_positions,cell_parameters):
354 |                         lattice_vectors.append(get_lattice_vectors(np.array([a,b,c]),np.array([alpha,beta,gamma]),omega,inverse=False))
355 |                         inverse_lattice_vectors = get_lattice_vectors(np.array([a,b,c]),np.array([alpha,beta,gamma]),omega,inverse=True)
356 |                         fractional_positions.append(np.dot([x,y,z],inverse_lattice_vectors).tolist())
357 |                 
358 |                 for op in symmetry_operations[1:]:
359 |                     if group_variables[0][-2:] == '_u':
360 |                         symmetric_positions = apply_symmetry_operations(fractional_positions, op, translation)  
361 |                     if group_variables[0][-2:] == '_x':
362 |                         symmetric_positions = []
363 |                         for pos, vec in zip(fractional_positions,lattice_vectors):
364 |                             symmetric_positions.append(np.dot(apply_symmetry_operations([pos], op, translation), vec)[0])
365 | 
366 |                     for x, y, z in symmetric_positions:
367 |                         data[variable_family][space_group][group_variables[0]].append(x)
368 |                         data[variable_family][space_group][group_variables[1]].append(y)
369 |                         data[variable_family][space_group][group_variables[2]].append(z)
370 |             
371 |             # Extend data for symmetric positions
372 |             if symmetric_variables != []:
373 |                 for var in data[variable_family][space_group]:
374 |                     if var not in symmetric_variables:
375 |                         extend_data = copy.deepcopy(data[variable_family][space_group][var])
376 |                         for i in range(len(symmetry_operations) - 1):
377 |                             data[variable_family][space_group][var].extend(extend_data)
378 |                             
379 |             # Move fractional coordinates of atoms in unit cell if necessary
380 |             for var in data[variable_family][space_group]:
381 |                 if variables[var]['position_symmetry'][2]:
382 |                     data[variable_family][space_group][var] = [x % 1 for x in data[variable_family][space_group][var]]
383 | 
384 |     return data


--------------------------------------------------------------------------------
/source_code/get_structure_data.py:
--------------------------------------------------------------------------------
  1 | import ast 
  2 | import itertools
  3 | import json
  4 | import numpy as np
  5 | import os 
  6 | import re
  7 | from ccdc import io
  8 | 
  9 | import io_operations
 10 | from csd_operations import check_for_target_fragments
 11 | from csd_operations import get_csd_atom_and_molecule_properties
 12 | from csd_operations import get_csd_crystal_properties
 13 | from csd_operations import get_csd_structure_fragments
 14 | from get_structures_list import get_structures_list 
 15 | from maths import align_structures
 16 | from maths import cartesian_to_spherical
 17 | from maths import distance_to_plane
 18 | from maths import distance_to_zzp_planes_family
 19 | from maths import get_reference_cell_points
 20 | from maths import kabsch_rotation_matrix
 21 | from maths import set_zzp_planes
 22 | from maths import vectors_closest_to_perpendicular
 23 | 
 24 | class NumpyArrayEncoder(json.JSONEncoder):
 25 |     """ Custom encoder for numpy data types """
 26 |     def default(self, obj):
 27 |         if isinstance(obj, np.ndarray):
 28 |             return obj.tolist()
 29 |         return json.JSONEncoder.default(self, obj)
 30 | 
 31 | def format_lists(json_str):
 32 |     """ Formats lists in the JSON string to remove unnecessary whitespace and newlines. """
 33 |     pattern = re.compile(r'\[\s*((?:[^[\]]|\n)+?)\s*\]', re.DOTALL)
 34 |     return re.sub(pattern, lambda x: '[' + x.group(1).replace('\n', '').replace(' ', '') + ']', json_str)
 35 | 
 36 | def convert_to_json(data):
 37 |     """ Converts Python dictionary to formatted JSON string. """
 38 |     json_str = json.dumps(data, cls=NumpyArrayEncoder, indent=4)
 39 |     formatted_json = format_lists(json_str)
 40 |     return formatted_json
 41 | 
 42 | def get_structure_data(input_parameters):
 43 |     ''' 
 44 |     Extracts data from the unique CSD structures.
 45 |     
 46 |     Parameters
 47 |     ----------
 48 |     input_parameters : dict
 49 |         A dictionary with the input parameters for the search.
 50 | 
 51 |     Returns
 52 |     -------
 53 |     '''
 54 |     # Set the files to write data
 55 |     db_folder = "../csd_db_analysis/db_data/"
 56 |     prefix = input_parameters["data_prefix"] 
 57 |     db_structures_folder = db_folder + "_".join([prefix,"structures"]) + "/"
 58 |     
 59 |     # Create the structures folder
 60 |     os.makedirs(db_structures_folder, exist_ok=True)
 61 |     
 62 |     # Get the reference structures dictionary.
 63 |     if input_parameters["structure_list"][0] == "csd-all":
 64 |         reference_structures_f  = '../csd_db_analysis/db_data/' + input_parameters["data_prefix"] + '_csd_refcode_families_clustered.json'
 65 |     elif input_parameters["structure_list"][0] == "csd-unique":
 66 |         reference_structures_f  = '../csd_db_analysis/db_data/' + input_parameters["data_prefix"] + '_csd_refcode_families_unique_structures.json'
 67 |     elif input_parameters["structure_list"][0] == "cif":
 68 |         cif_files_f = '../source_data/cif_files/'
 69 |         reference_structures_f  = cif_files_f + 'cif_structures_list.json'
 70 |     
 71 |     # Check if the dictionary exists.
 72 |     if not os.path.exists(reference_structures_f):
 73 |         # If the file does not exist, raise an exception
 74 |         raise FileNotFoundError(f"The file {reference_structures_f} does not exist.")
 75 |     else:
 76 |         # Get the families and member structures.
 77 |         with open(reference_structures_f) as f:
 78 |             data = f.read()
 79 |         reference_structures = ast.literal_eval(data)
 80 |            
 81 |     # Get the structures list for the analysis
 82 |     structures_list = get_structures_list(input_parameters,reference_structures)
 83 |     
 84 |     # Get the csd entries if necessary
 85 |     if input_parameters["structure_list"][0] in ["csd-all","csd-unique"]:
 86 |         csd_entries = io.EntryReader("CSD")
 87 |     
 88 |     # Initialize cell reference points 
 89 |     cell_reference_points = get_reference_cell_points(-1, 2.5, 0.5)
 90 |     
 91 |     # Set the ZZP planes
 92 |     zzp_planes = set_zzp_planes()
 93 | 
 94 |     # Loop over the structures in the list
 95 |     for structure_name in structures_list:
 96 |         # Set the csd_crystal and csd_molecule objects
 97 |         if input_parameters["structure_list"][0] in ["csd-all","csd-unique"]:
 98 |             entry = csd_entries.entry(structure_name)
 99 |             crystal = entry.crystal 
100 |             if input_parameters["center_molecule"]:
101 |                 crystal.centre_molecule() # Move molecule inside unit cell 
102 |             molecule = entry.molecule
103 |         elif input_parameters["structure_list"][0] == "cif":
104 |             crystal = io.CrystalReader(cif_files_f + structure_name)
105 |             crystal = crystal[0]
106 |             if input_parameters["center_molecule"]:
107 |                 crystal.centre_molecule() # Move molecule inside unit cell 
108 |             molecule = io.MoleculeReader(cif_files_f + structure_name)
109 |             molecule = molecule[0]  
110 |             
111 |         # Exclude structures
112 |         if crystal.identifier in input_parameters["structures_to_exclude"]:
113 |             continue
114 |         
115 |         # Add missing hydrogen atoms
116 |         try:
117 |             molecule.assign_bond_types()
118 |             molecule.add_hydrogens(mode='missing')
119 |             molecule.assign_partial_charges()
120 |         except Exception:
121 |             continue 
122 |         
123 |         # Set the atoms for the reference molecule 
124 |         try:
125 |             atoms = molecule.atoms
126 |         except Exception:
127 |             continue 
128 |         
129 |         # Check for unnatural atoms with no coordinates
130 |         discard = False
131 |         for at in atoms:
132 |             if at.coordinates == None:
133 |                 discard = True 
134 |                 break 
135 |             
136 |         if discard:
137 |             continue
138 |         
139 |         # Check for target fragments
140 |         if check_for_target_fragments(input_parameters,molecule) == None:
141 |             continue
142 |         
143 |         # Initialize structure
144 |         structure = {} 
145 |         
146 |         # Get crystal, molecule and atom properties
147 |         structure["crystal"] = get_csd_crystal_properties(crystal)
148 |         structure["molecule"] = get_csd_atom_and_molecule_properties(crystal,molecule,atoms)
149 |             
150 |         # Get the fragments for the structure
151 |         structure["fragments"] = get_csd_structure_fragments(input_parameters,structure,molecule)
152 |     
153 |         # Discard structures with none of the desired substructures 
154 |         if not bool(structure["fragments"]):
155 |             continue
156 |            
157 |         # Calculate the structure specific cell reference points in cartesian
158 |         # coordinates
159 |         cell_points = np.dot(cell_reference_points, structure["crystal"]["lattice_vectors"])
160 |         
161 |         # Loop over all fragments to calculate the fragment orientation
162 |         # print('Analyzing structure ' + structure["crystal"]["ID"])
163 |         for fragment in structure["fragments"]:
164 |             current_fragment = structure["fragments"][fragment]
165 |             # Get the list of atoms that are used for the aligmnent
166 |             if current_fragment["atoms_to_align"] == "all":
167 |                 atoms_to_align = list(range(current_fragment["n_atoms"]))
168 |             else:
169 |                 atoms_to_align = current_fragment["atoms_to_align"]
170 |             
171 |             # Get the rotation matrix
172 |             current_fragment["rotation_matrix"] = np.round(kabsch_rotation_matrix(current_fragment["atoms_coordinates_sf"][atoms_to_align],
173 |                                                                                   current_fragment["atoms_bond_vectors_c"][atoms_to_align]),4)
174 |             current_fragment["inverse_rotation_matrix"] = np.round(current_fragment["rotation_matrix"].T,4)
175 |             
176 |             # Filter unwanted fragments in case of identical smarts representation
177 |             if fragment[4:] in input_parameters["fragments_to_check_alignment"]:
178 |                 rmsd = align_structures(current_fragment["rotation_matrix"], 
179 |                                         current_fragment["atoms_coordinates_sf"][atoms_to_align], 
180 |                                         current_fragment["atoms_bond_vectors_c"][atoms_to_align])
181 | 
182 |                 if rmsd > input_parameters["alignment_tolerance"]:
183 |                     continue 
184 |             
185 |             # Calculate the normalized vectors perpendicular to the 
186 |             # principal inertia planes in the crystallographic coordinates 
187 |             # system
188 |             current_fragment["principal_inertia_planes_f"] = np.dot(current_fragment["rotation_matrix"], structure["crystal"]["lattice_vectors"].T)
189 |             current_fragment["principal_inertia_planes_f"] = np.round(current_fragment["principal_inertia_planes_f"] / np.linalg.norm(current_fragment["principal_inertia_planes_f"], axis=1, keepdims=True),4)
190 |            
191 |             # Identify for each eigenvector the proposed vectors that are
192 |             # closest to be perpendicular and the respective angle
193 |             current_fragment["n_max_vectors"] = vectors_closest_to_perpendicular(current_fragment["principal_inertia_planes_f"], 
194 |                                                                                  input_parameters["proposed_vectors_n_max"])
195 |         
196 |             # Calculate minimum distances of pripcipal inertia planes to the 
197 |             # corners of all the points of a 3x3x3 supercell in the form
198 |             # (0.5k1, 0.5k2, 0.5k3), k1, k2, k3 = -2, -1, ..., 4
199 |             minimum_distances_to_planes = []
200 |             for plane in current_fragment["rotation_matrix"]:
201 |                 d_min = np.inf
202 |                 for point in cell_points:
203 |                     d = distance_to_plane(point,plane,current_fragment["coordinates_c"],normal=False)
204 |                     if d < d_min:
205 |                         d_min = d
206 |                 minimum_distances_to_planes.append(d_min)
207 |             current_fragment["principal_inertia_planes_distances_to_cell_points"] = np.round(minimum_distances_to_planes,4)
208 |             
209 |             # Calculate minimum distance of non-hydeogen atoms to ZZP planes
210 |             minimum_distances_to_zzp_planes = []
211 |             for point in current_fragment["atoms_coordinates_f"]:
212 |                 d_min = np.inf 
213 |                 for plane_normal, plane_norm in zzp_planes:
214 |                     d = distance_to_zzp_planes_family(point, plane_normal, plane_norm)
215 |                     if d < d_min:
216 |                         d_min = d
217 |                 minimum_distances_to_zzp_planes.append(d_min)
218 |             current_fragment["minimum_atom_distances_to_zzp_planes"] = np.round(minimum_distances_to_zzp_planes,4)
219 |             
220 |             # Add hydrogen atoms to fragmetnts
221 |             if fragment[:3] != "FMC":
222 |                 for atom, atom_label in zip(structure["molecule"]["atoms_species"],structure["molecule"]["atoms_labels"]):
223 |                     if atom == "H":
224 |                         for at1, at2 in structure["molecule"]["bonds"]:
225 |                             if at1 == atom_label:
226 |                                 bonded_atom = at2
227 |                             if at2 == atom_label:
228 |                                 bonded_atom = at1
229 |                         if bonded_atom in current_fragment["atoms_labels"]:
230 |                             current_fragment["atoms_species"].append(atom)
231 |                             current_fragment["atoms_labels"].append(atom_label)
232 |         
233 |         # Create the contacts dictionary
234 |         structure_contacts = {}
235 |         for contact in structure["crystal"]["close_contacts"]:
236 |             # Check if the contact is part of an h-bond
237 |             is_hbond = False
238 |             for hbond in structure["crystal"]["hbonds"]:
239 |                 hbond_atom_labels = [atom.label for atom in hbond.atoms]
240 |                 if (contact.atoms[0].label, contact.atoms[1].label) in list(itertools.permutations(hbond_atom_labels,2)):    
241 |                     if [contact.atoms[0].label, contact.atoms[1].label] not in structure["molecule"]["bonds"] and [contact.atoms[1].label, contact.atoms[0].label] not in structure["molecule"]["bonds"]:                    
242 |                         is_hbond = True 
243 |                         break
244 | 
245 |             # Get the central and contact groups (fragments)
246 |             central_group = [fragment for fragment in structure["fragments"] if contact.atoms[0].label in structure["fragments"][fragment]["atoms_labels"] if fragment[:3] != "FMC"]
247 |             contact_group = [fragment for fragment in structure["fragments"] if contact.atoms[1].label in structure["fragments"][fragment]["atoms_labels"] if fragment[:3] != "FMC"]
248 |             for i in [0, 1]: 
249 |                 for fragment1 in central_group:
250 |                     for fragment2 in contact_group:
251 |                         at1, at2 = 0, 1
252 |                         central_fragment, contact_fragment = fragment1, fragment2 
253 |                             
254 |                         # Get the bond vectors of the contact atoms to the central 
255 |                         # fragment
256 |                         central_bond_vector = contact.atoms[at1].coordinates - structure["fragments"][central_fragment]["coordinates_c"]
257 |                         contact_bond_vector = contact.atoms[at2].coordinates - structure["fragments"][central_fragment]["coordinates_c"]
258 |                         
259 |                         # Rotate them to the central fragment's reference system
260 |                         central_bond_vector_r = np.dot(central_bond_vector,structure["fragments"][central_fragment]["inverse_rotation_matrix"])
261 |                         contact_bond_vector_r = np.dot(contact_bond_vector,structure["fragments"][central_fragment]["inverse_rotation_matrix"])
262 |                         
263 |                         # Convert contact bond vector to spherical coodinates
264 |                         contact_bond_vector_spherical = cartesian_to_spherical(contact_bond_vector_r)
265 |                         
266 |                         # Get the contact type
267 |                         contact_type = "hbond" if is_hbond else "vdW"
268 |                        
269 |                         # Add contact data to list
270 |                         structure_contacts['_'.join([contact.atoms[at1].label,fragment1,contact.atoms[at2].label,fragment2])] = {
271 |                             "cc_length": np.round(contact.length,4),
272 |                             "cc_type": contact_type,
273 |                             "cc_is_in_los": contact.is_in_line_of_sight,
274 |                             "cc_central_atom": {
275 |                                 "atom": contact.atoms[at1].atomic_symbol,
276 |                                 "fragment": central_fragment[4:],
277 |                                 "coordinates": {
278 |                                     "cartesian": np.round(contact.atoms[at1].coordinates,4),
279 |                                     "fractional": np.round(contact.atoms[at1].fractional_coordinates,4)
280 |                                     },
281 |                                 "bond_vectors": np.round(central_bond_vector,4),
282 |                                 "reference_bond_vectors": np.round(central_bond_vector_r,4)
283 |                                 },
284 |                             "cc_contact_atom": {
285 |                                 "atom": contact.atoms[at2].atomic_symbol,
286 |                                 "fragment": contact_fragment[4:],
287 |                                 "coordinates": {
288 |                                     "cartesian": np.round(contact.atoms[at2].coordinates,4),
289 |                                     "fractional": np.round(contact.atoms[at2].fractional_coordinates,4)
290 |                                     },
291 |                                 "bond_vectors": np.round(contact_bond_vector,4),
292 |                                 "reference_bond_vectors": np.round(contact_bond_vector_r,4),
293 |                                 "reference_bond_vectors_spherical": np.round(contact_bond_vector_spherical,4)
294 |                                 },
295 |                             }
296 |         structure["crystal"]["close_contacts"] = structure_contacts
297 | 
298 |         # Create the hydrogen bonds dictionary
299 |         structure_hbonds = {}
300 |         for hbond in structure["crystal"]["hbonds"]:
301 |             # Get the donor atom
302 |             hbond_atom_labels = [atom.label for atom in hbond.atoms]
303 |             for bond in structure["molecule"]["bonds"]:
304 |                 if hbond_atom_labels[1] in bond:
305 |                     if hbond_atom_labels[0] in bond:
306 |                         hbond_donor = 0
307 |                         hbond_acceptor = 2
308 |                     if hbond_atom_labels[2] in bond:
309 |                         hbond_donor = 2
310 |                         hbond_acceptor = 0
311 |             
312 |             structure_hbonds['_'.join([hbond.atoms[hbond_donor].label,hbond.atoms[1].label,hbond.atoms[hbond_acceptor].label])] = {
313 |                 "hb_atoms": (hbond.atoms[hbond_donor].atomic_symbol,hbond.atoms[1].atomic_symbol,hbond.atoms[hbond_acceptor].atomic_symbol),
314 |                 "hb_length": np.round(hbond.length,4),
315 |                 "hb_da_distance": np.round(hbond.da_distance,4),
316 |                 "hb_angle": np.round(hbond.angle,4),
317 |                 "hb_is_in_los": hbond.is_in_line_of_sight,
318 |                 "hb_donor_coordinates": np.round(hbond.atoms[hbond_donor].coordinates,4),
319 |                 "hb_h_coordinates": np.round(hbond.atoms[1].coordinates,4),
320 |                 "hb_acceptor_coordinates": np.round(hbond.atoms[hbond_acceptor].coordinates,4),
321 |                 }
322 |         structure["crystal"]["hbonds"] = structure_hbonds
323 |         
324 |         # Create the crystal dictionary
325 |         structure_crystal = {
326 |             "str_id": structure["crystal"]["ID"],
327 |             "space_group": structure["crystal"]["space_group"],
328 |             "z_crystal": structure["crystal"]["z_crystal"],
329 |             "z_prime": structure["crystal"]["z_prime"],
330 |             "formula": structure["crystal"]["formula"],
331 |             "species": structure["crystal"]["species"],
332 |             "cell_lengths": structure["crystal"]["cell_lengths"],
333 |             "scaled_cell_lengths": structure["crystal"]["scaled_cell_lengths"],
334 |             "cell_angles": structure["crystal"]["cell_angles"],
335 |             "cell_volume": structure["crystal"]["cell_volume"],
336 |             "cell_density": structure["crystal"]["cell_density"],
337 |             "vdWFV": structure["crystal"]["vdWFV"],
338 |             "SAS": structure["crystal"]["SAS"],
339 |             "lattice_vectors": structure["crystal"]["lattice_vectors"],
340 |             "lattice_energy": structure["crystal"]["lattice_energy"],
341 |             "close_contacts": structure["crystal"]["close_contacts"],
342 |             "hbonds": structure["crystal"]["hbonds"],
343 |             }
344 |             
345 |         # Create the fragments dictionary 
346 |         structure_fragments = {}
347 |         for fragment in structure["fragments"]:
348 |             # Get the data for the atoms
349 |             at_labels = structure["fragments"][fragment]["atoms_labels"]
350 |             at_species = structure["fragments"][fragment]["atoms_species"]
351 |             at_coordinates_c = structure["fragments"][fragment]["atoms_coordinates_c"]
352 |             at_coordinates_f = structure["fragments"][fragment]["atoms_coordinates_f"]
353 |             at_bond_vectors_c = structure["fragments"][fragment]["atoms_bond_vectors_c"]
354 |             at_bond_vectors_f = structure["fragments"][fragment]["atoms_bond_vectors_f"]
355 |             min_distance_to_zzp = structure["fragments"][fragment]["minimum_atom_distances_to_zzp_planes"]
356 |             fragment_atoms = {}
357 |             for label, species, coor_c, coor_f, bv_c, bv_f, d_min in zip(at_labels,at_species,at_coordinates_c,at_coordinates_f,at_bond_vectors_c,at_bond_vectors_f,min_distance_to_zzp):
358 |                 fragment_atoms[label] = {
359 |                     "species": species,
360 |                     "coordinates": {
361 |                         "cartesian": coor_c,
362 |                         "fractional": coor_f    
363 |                         },
364 |                     "bond_vectors": {
365 |                         "cartesian": bv_c,
366 |                         "fractional": bv_f    
367 |                         },
368 |                     "dzzp_min": d_min
369 |                     }
370 |                 
371 |             # Get the data for the inertia planes
372 |             fragment_inertia_planes = {}
373 |             eigvecs_c = structure["fragments"][fragment]["rotation_matrix"]
374 |             eigvecs_f = structure["fragments"][fragment]["principal_inertia_planes_f"]
375 |             n_max_vectors = structure["fragments"][fragment]["n_max_vectors"]
376 |             eigvecs_dmin = structure["fragments"][fragment]["principal_inertia_planes_distances_to_cell_points"]
377 |             for i_vector, (e, w, (_, ((n1, ang1), (n2, ang2))), d_min) in enumerate(zip(eigvecs_c,eigvecs_f,n_max_vectors,eigvecs_dmin)):
378 |                 fragment_inertia_planes["e_" + str(i_vector + 1)] = {
379 |                     "cartesian": e,
380 |                     "crystallographic": w,
381 |                     "perpendicular_vectors": {
382 |                         "vector_1": n1,
383 |                         "vector_2": n2,
384 |                         "angle_1": ang1,
385 |                         "angle_2": ang2},
386 |                     "min_distance_to reference_points": d_min
387 |                     }
388 |              
389 |             # Set the fragment name
390 |             if fragment[4:-2] == "component":
391 |                 fragment_name = "component"
392 |             else:
393 |                 fragment_name = fragment[4:]
394 |                 
395 |             # Set the fragment data
396 |             structure_fragments[fragment] = {
397 |                 "fragment": fragment_name,
398 |                 "coordinates": {
399 |                     "cartesian": structure["fragments"][fragment]["coordinates_c"],
400 |                     "fractional": structure["fragments"][fragment]["coordinates_f"]
401 |                     },
402 |                 "inertia_planes": fragment_inertia_planes,
403 |                 "atoms": fragment_atoms
404 |                 }
405 |             
406 |         # Set the complete structure data
407 |         structure_data = {
408 |             "crystal": structure_crystal,
409 |             "fragments": structure_fragments
410 |             }
411 |         
412 |         # Convert data to json format
413 |         structure_data = convert_to_json(structure_data)
414 |         
415 |         # Write data to file
416 |         io_operations.write_structure_data_file(db_structures_folder,structure_crystal,structure_data)
417 |         
418 |     return
419 |     
420 | def get_structure_filter_data(input_parameters):
421 |     """
422 |     Creates a dictionary with structure information that can be used to rapidly 
423 |     filter structures for analysis
424 | 
425 |     Parameters
426 |     ----------
427 |     input_parameters : dict
428 |         A dictionary with the input parameters for the search.
429 | 
430 |     Returns
431 |     -------
432 |     None.
433 | 
434 |     """
435 |     # Set the files to read and write data
436 |     db_folder = "../csd_db_analysis/db_data/"
437 |     prefix = input_parameters["data_prefix"] 
438 |     db_structures_folder = db_folder + "_".join([prefix,"structures"]) + "/"
439 |     
440 |     # Read the structures list 
441 |     structures_list = os.listdir(db_structures_folder)
442 |     
443 |     # Get the structure filter data
444 |     structures_filter_data = {}
445 |     for structure in structures_list:
446 |         with open(db_structures_folder + "/" + structure,"r") as f:
447 |             structure_data = json.load(f)
448 |                         
449 |             structure_crystal = structure_data["crystal"]
450 |             structure_fragments = structure_data["fragments"]
451 |             structure_contacts = structure_crystal["close_contacts"]
452 |             
453 |             fragments = []
454 |             for fragment in structure_fragments:
455 |                 if structure_fragments[fragment]["fragment"] not in fragments:
456 |                     fragments.append(structure_fragments[fragment]["fragment"])
457 |                     
458 |             contact_pairs = []
459 |             contact_central_fragments = []
460 |             contact_fragment_pairs = []
461 |             for contact in structure_contacts:
462 |                 contact_pair = [structure_contacts[contact]["cc_central_atom"]["atom"],structure_contacts[contact]["cc_contact_atom"]["atom"],structure_contacts[contact]["cc_type"],structure_contacts[contact]["cc_is_in_los"]]
463 |                 if contact_pair not in contact_pairs:
464 |                     contact_pairs.append(contact_pair)
465 |                     
466 |                 contact_central_fragment = [structure_contacts[contact]["cc_central_atom"]["fragment"],structure_contacts[contact]["cc_type"],structure_contacts[contact]["cc_is_in_los"]]
467 |                 if contact_central_fragment not in contact_central_fragments:
468 |                     contact_central_fragments.append(contact_central_fragment)
469 |                 
470 |                 contact_fragment_pair = [structure_contacts[contact]["cc_central_atom"]["fragment"],structure_contacts[contact]["cc_contact_atom"]["fragment"],structure_contacts[contact]["cc_type"],structure_contacts[contact]["cc_is_in_los"]]
471 |                 if contact_fragment_pair not in contact_fragment_pairs:
472 |                     contact_fragment_pairs.append(contact_fragment_pair)
473 |             
474 |             structures_filter_data[structure_data["crystal"]["str_id"]] = {
475 |                 "space_group": structure_crystal["space_group"],
476 |                 "z_crystal": structure_crystal["z_crystal"],
477 |                 "z_prime": structure_crystal["z_prime"],
478 |                 "species": structure_crystal["species"],
479 |                 "fragments": fragments,
480 |                 "contact_pairs": contact_pairs,
481 |                 "contact_central_fragments": contact_central_fragments,
482 |                 "contact_fragment_pairs": contact_fragment_pairs
483 |                 }
484 |         
485 |     # Convert data to json format
486 |     structures_filter_data = convert_to_json(structures_filter_data)
487 | 
488 |     # Write data to file
489 |     io_operations.write_structures_filter_data(input_parameters,structures_filter_data)
490 |     
491 |     return
492 |         
493 |                         
494 |                         
495 | 
496 |     


--------------------------------------------------------------------------------
/source_code/get_structures_list.py:
--------------------------------------------------------------------------------
 1 | def get_structures_list(input_parameters,reference_structures):
 2 |     '''
 3 |     Returns the structure list for the analysis
 4 |     
 5 |     Parameters
 6 |     ----------
 7 |     inpu_parameters : dict
 8 |         A dictionary with the user defined input parameters.
 9 |     reference_structures : dict
10 |         The unique reference structures calculated based on the user defined
11 |         criteria
12 |         
13 |     Returns
14 |     -------
15 |     structures_list : dict
16 |         A dictionary with the structures to analyze
17 |     '''
18 |     # Create the structures list        
19 |     structures_list = {}
20 |     if input_parameters["structure_list"][0] in ["csd-all","csd-unique"]:
21 |         if input_parameters["structure_list"][0] == "csd-all": 
22 |             if input_parameters["structure_list"][1] == "all":
23 |                 for family in reference_structures:
24 |                     for group in reference_structures[family]:
25 |                         for structure in group:
26 |                                 structures_list[structure] = {}
27 |             else:
28 |                 target_families = [families[0] for families in input_parameters["structure_list"][1]]
29 |                 target_families_structures = [families[1] for families in input_parameters["structure_list"][1]]
30 |                 for target_family, target_structures in zip(target_families, target_families_structures):
31 |                     if target_structures == "all":
32 |                         for group in reference_structures[target_family]:
33 |                             for structure in group: 
34 |                                 structures_list[structure] = {}
35 |                                 
36 |                     else:
37 |                         structure_indices = [str(target_structure).zfill(2) if target_structure != 0 else '' for target_structure in target_structures ]
38 |                         for index in structure_indices:
39 |                             if target_family + index not in [structure for target_family in reference_structures for group in reference_structures[target_family] for structure in group]:
40 |                                 print(f'Structure {target_family + index} is not found in reference structures and will be excluded from the data extraction process.')
41 |                                 continue
42 |                             structures_list[target_family + index] = {}
43 |                             
44 |         if input_parameters["structure_list"][0] == "csd-unique": 
45 |             if input_parameters["structure_list"][1] == "all":
46 |                 for family in reference_structures:
47 |                     for structure in reference_structures[family]:
48 |                             structures_list[structure] = {}
49 |             else:
50 |                 target_families = [families[0] for families in input_parameters["structure_list"][1]]
51 |                 target_families_structures = [families[1] for families in input_parameters["structure_list"][1]]
52 |                 for target_family, target_structures in zip(target_families, target_families_structures):
53 |                     if target_structures == "all":
54 |                         for structure in reference_structures[target_family]:
55 |                             structures_list[structure] = {}
56 |                             
57 |                     else:
58 |                         structure_indices = [str(target_structure).zfill(2) if target_structure != 0 else '' for target_structure in target_structures ]
59 |                         for index in structure_indices:
60 |                             if target_family + index not in reference_structures[target_family]:
61 |                                 print(f'Structure {target_family + index} is not found in reference structures and will be excluded from the data extraction process.')
62 |                                 continue
63 |                             structures_list[target_family + index] = {}
64 |                             
65 |     elif input_parameters["structure_list"][0] == "cif":
66 |         for family in reference_structures:
67 |             for structure in reference_structures[family]:
68 |                 structures_list[structure] = {}
69 |                 
70 |     return structures_list


--------------------------------------------------------------------------------
/source_code/input_checks.py:
--------------------------------------------------------------------------------
 1 | import sys 
 2 | 
 3 | # Define a function to check if the given variables are boolean
 4 | def check_boolean_variables(variables):
 5 |     for var_name, value in variables.items():
 6 |         if not isinstance(value, bool):
 7 |             print(f"Error: The variable '{var_name}' must be a boolean (True or False).")
 8 |             sys.exit(1)
 9 | 
10 | # Define a function to check if the given variables are integers
11 | def check_integer_variables(variables):
12 |     for var_name, value in variables.items():
13 |         if not isinstance(value, int):
14 |             print(f"Error: The variable '{var_name}' must be an integer (True or False).")
15 |             sys.exit(1)
16 | 
17 | # Define functions to check if the single value variables get values from their respective lists
18 | def check_single_value_variables(variables):
19 |     for var_name, values in variables.items():
20 |         if values[0] not in values[1]:
21 |             print(f"Error: The variable '{var_name}' has an invalid value '{values[0]}'. Allowed values are {values[1]}.")
22 |             sys.exit(1)
23 | 
24 | # Define functions to check if the list variables get values from their respective lists
25 | def check_list_variables(variables):
26 |     for var_name, values in variables.items():
27 |         if any(value not in values[1] for value in values[0]):
28 |             print(f"Error: The variable '{var_name}' has an invalid value '{values[0]}'. Allowed values are {values[1]}.")
29 |             sys.exit(1)
30 | 
31 | def check_input_parameters(data_analysis,data_extraction,extraction_actions,extraction_filters,analysis_actions,topological_properties):
32 |     # Define the check for the boolean variables
33 |     boolean_variables = {
34 |         'data_analysis': data_analysis,
35 |         'data_extraction': data_extraction,
36 |         **extraction_actions,
37 |         'center_molecule': extraction_filters['center_molecule'],
38 |         'add_full_component': extraction_filters['add_full_component'],
39 |         **analysis_actions
40 |     }
41 |     
42 |     # Define the dictionary mapping integer variables to their available values
43 |     integer_variables = {
44 |         'proposed_vectors_n_max': topological_properties['proposed_vectors_n_max']
45 |     }
46 |         
47 |     # Define the dictionary mapping single value variables to their available values
48 |     single_value_variables = {
49 |         'unique_structures_clustering_method': [extraction_filters['unique_structures_clustering_method'], ['energy', 'vdWFV']],
50 |         'structure_list': [extraction_filters['structure_list'][0], ['csd-all', 'csd-unique', 'cif']]
51 |     }
52 |     
53 |     # Define the dictionary mapping list variables to their available values
54 |     list_variables = {
55 |         'crystal_type': [extraction_filters['crystal_type'], ['homomolecular', 'co-crystal', 'hydrate']]
56 |     }
57 |     
58 |     check_boolean_variables(boolean_variables)
59 |     check_integer_variables(integer_variables)
60 |     check_single_value_variables(single_value_variables)
61 |     check_list_variables(list_variables)


--------------------------------------------------------------------------------
/source_code/io_operations.py:
--------------------------------------------------------------------------------
 1 | import os 
 2 | 
 3 | def check_for_file(save_dir,filename):
 4 |     """
 5 |     Check if a file exists and if it does, ask the user whether to overwrite it or not.
 6 | 
 7 |     Parameters
 8 |     ----------
 9 |     filename : str
10 |         The name of the file.
11 |     
12 |     Returns
13 |     -------
14 |     file object
15 |     """
16 |     if os.path.exists(save_dir + filename):
17 |         # If the file exists, ask for confirmation to overwrite
18 |         user_input = input(f'File {filename} already exists. Do you want to overwrite it? (yes/no): ').lower()
19 |         if user_input not in ['yes','y','YES','Y']:
20 |             print(f'\tWARNING! Exiting without overwriting the file.\n\tNo data will be writen in file: {filename}')
21 |             return None  # or manage the scenario where the user doesn't want to overwrite
22 | 
23 |     # If the file doesn't exist or if overwrite is confirmed, open and return the file object
24 |     try:
25 |         file_object = open(save_dir + filename, 'w')  # Open the file with writing mode, which will also create it if it doesn't exist
26 |         return file_object
27 |     except Exception as e:
28 |         print(f'An error occurred: {e}')
29 |         return None 
30 | 
31 | def write_structure_data_file(db_structures_folder,structure_crystal,structure_data):
32 |     """
33 |     Writes structure data to a json format file.
34 |     
35 |     Parameters
36 |     ----------
37 |     db_structures_folder : str
38 |         The folder where the data for each structure will be stored.
39 |     structure_crystal : dict
40 |         A dictionary with the structure crystal data.
41 |     structure_data : dict
42 |         A dictionary with the structure data.
43 |     
44 |     Returns
45 |     -------
46 |     """
47 | 
48 |     structure_data_file = db_structures_folder + structure_crystal["str_id"] + ".json"
49 |     with open(structure_data_file,"w") as f:
50 |         f.write(structure_data)
51 |         
52 |     return
53 | 
54 | def write_structures_filter_data(input_parameters,structures_filter_data):
55 |     """ 
56 |     Writes compact structure data for the filtering step.
57 |     
58 |     Parameters
59 |     ----------
60 |     input parameters : dict
61 |         A dictionary with the user defined input data.
62 |     structures_filter_data : dict
63 |         A dictionary with the compact structure data.
64 |         
65 |     Returns
66 |     -------
67 |     None
68 |     
69 |     """
70 |     # Set the file name
71 |     db_folder = "../csd_db_analysis/db_data/"
72 |     prefix = input_parameters["data_prefix"] 
73 |     structures_filter_data_file = check_for_file(db_folder, prefix + "_structures_filter_data.json")
74 |     
75 |     # Write data and close file
76 |     structures_filter_data_file.write(structures_filter_data)
77 |     structures_filter_data_file.close()
78 |     
79 |     return
80 |     
81 |     


--------------------------------------------------------------------------------
/source_code/maths.py:
--------------------------------------------------------------------------------
  1 | import itertools 
  2 | import numpy as np 
  3 | from scipy.spatial.distance import cdist
  4 | 
  5 | ### COORDINATE OPERATIONS #####################################################
  6 | def kabsch_rotation_matrix(A, B):
  7 |     """
  8 |     Calculate the optimal rotation matrix to align structure A to structure B 
  9 |         using Kabsch algorithm
 10 |     
 11 |     Parameters
 12 |     ----------
 13 |     A: Nx3 matrix of coordinates (N atoms, 3 dimensions) for molecule A
 14 |     B: Nx3 matrix of coordinates (N atoms, 3 dimensions) for molecule B
 15 |     
 16 |     Returns
 17 |     -------
 18 |     R: 3x3 optimal rotation matrix
 19 |     """
 20 | 
 21 |     # Step 1: Centroid and Covariance
 22 |     # Centroid already at origin, so we skip to covariance matrix
 23 |     H = A.T @ B
 24 | 
 25 |     # Step 2: Singular value decomposition (SVD)
 26 |     V, S, Wt = np.linalg.svd(H)
 27 | 
 28 |     # Step 3: Check for reflection and calculate rotation matrix
 29 |     d = (np.linalg.det(V) * np.linalg.det(Wt)) < 0.0
 30 | 
 31 |     if d:
 32 |         S[-1] = -S[-1]
 33 |         V[:, -1] = -V[:, -1]
 34 | 
 35 |     # Create optimal rotation matrix
 36 |     R = V @ Wt
 37 | 
 38 |     return R
 39 | 
 40 | def align_structures(R, A, B):
 41 |     """
 42 |     Aligns structure A to structure B and calculates RMSD
 43 |     
 44 |     Pamameters:
 45 |         R: 3x3 optimal rotation matrix
 46 |         A: Nx3 matrix of coordinates for molecule A
 47 |         B: Nx3 matrix of coordinates for molecule B
 48 |     Returns: 
 49 |         R, aligned A, RMSD
 50 |     """
 51 |     A_aligned = A @ R  # Apply the rotation matrix to A to align it with B
 52 |     rmsd = np.sqrt(np.mean(np.sum((A_aligned - B)**2, axis=1)))  # Calculate RMSD
 53 |     
 54 |     return rmsd
 55 | 
 56 | def cartesian_to_spherical(vector):
 57 |     """
 58 |     Convert a vector from Cartesian coordinates (x, y, z) to spherical 
 59 |     coordinates (r, theta, phi)
 60 |     
 61 |     Parameters
 62 |     ----------
 63 |     vector : ndarray 
 64 |         A NumPy array containing the x, y, and z coordinates of the vector.
 65 |     
 66 |     Returns
 67 |     -------
 68 |     ndarray 
 69 |         A NumPy array containing the spherical coordinates (r, theta, phi),
 70 |         where r is the radius, theta is the polar angle (in radians), and
 71 |         phi is the azimuthal angle (in radians).
 72 |     """
 73 |     # Convert the input list or tuple to a NumPy array if it's not already
 74 |     vector = np.asarray(vector)
 75 |     
 76 |     # Compute the radial distance
 77 |     r = np.linalg.norm(vector)
 78 |     
 79 |     # Compute the polar angle (theta) - angle from the z-axis
 80 |     # Guard against the r being zero to avoid division by zero
 81 |     theta = np.arccos(vector[2] / r) if r != 0 else 0
 82 |     
 83 |     # Compute the azimuthal angle (phi) - angle from the x-axis in the xy-plane
 84 |     phi = np.arctan2(vector[1], vector[0])
 85 |     
 86 |     return np.array([r, np.degrees(theta), np.degrees(phi)])
 87 | 
 88 | ### DISTANCE OPERATIONS #######################################################
 89 | def distance_to_plane(point,plane_normal,plane_point,normal=False):
 90 |     """
 91 |     Calculate the distance from a point to a plane defined by
 92 |     a normal vector and a point on the plane.
 93 |     """
 94 |     if normal:
 95 |         d =  np.abs(np.dot(plane_normal, point - plane_point)) 
 96 |     else:
 97 |         d =  np.abs(np.dot(plane_normal, point - plane_point)) / np.linalg.norm(plane_normal)
 98 |     return d
 99 | 
100 | def distance_to_zzp_planes_family(point,plane_normal,plane_norm):
101 |     """ 
102 |     Calculate the distance from a point to a family of zzp planes defined by a normal vector
103 |     """
104 |     distance = distance_to_plane(point,plane_normal,np.array([0.0,0.0,0.0])) % (0.25 / plane_norm)
105 |     if distance > 0.125 / plane_norm:
106 |         distance = 0.25 / plane_norm - distance
107 |         
108 |     return distance
109 | 
110 | ### PHYSICAL PROPERTIES OPERATIONS ############################################
111 | def calculate_inertia(mass,pos):
112 |     """ 
113 |     Calculates and returns the inertia tensor and inertia eigenvectors  for a 
114 |         configuration of atoms
115 |     
116 |     Parameters
117 |     ----------
118 |     mass : numpy.ndarray
119 |         The mass of the atoms 
120 |     pos : numpy.ndarray
121 |         The positions of the atoms
122 |         
123 |     Returns
124 |     -------
125 |     A tupple with the eigenvalues and eigenvectors of the inertia tensor
126 |     """
127 |     # Calculate the inertia tensor for the reference molecule
128 |     inertia_tensor = -np.einsum('k,ki,kj->ij', mass, pos, pos, optimize=True)
129 |     np.fill_diagonal(inertia_tensor, np.einsum('k,k->', mass, np.sum(pos**2, axis=1), optimize=True) + np.diag(inertia_tensor))
130 |     
131 |     return np.linalg.eig(inertia_tensor)
132 | 
133 | def ensure_right_handed_coordinate_system(vectors):
134 |     """
135 |     Ensure the eigenvectors form a right-handed coordinate system.
136 | 
137 |     Parameters
138 |     ----------
139 |     vectors : numpy.ndarray 
140 |         2D array where each column is an eigenvector.
141 | 
142 |     Returns
143 |     -------
144 |     adjusted_vectors : numpy.ndarray
145 |         Eigenvectors adjusted to form a right-handed coordinate system.
146 |     """
147 |     # Compute the scalar triple product
148 |     scalar_triple_product = np.dot(vectors[:, 0], np.cross(vectors[:, 1], vectors[:, 2]))
149 |     
150 |     # Check if the system is left-handed
151 |     if scalar_triple_product < 0:
152 |         # Switch the direction of the third eigenvector to make the system right-handed.
153 |         vectors[:, 2] = -vectors[:, 2]
154 |     
155 |     return vectors
156 | 
157 | def sort_eigenvectors(eigenvalues,eigenvectors):
158 |     """
159 |     Sort eigenvalues and their corresponding eigenvectors in ascending order.
160 |     
161 |     Parameters:
162 |         eigenvalues (numpy.ndarray): 1D array containing the eigenvalues.
163 |         eigenvectors (numpy.ndarray): 2D array where each column is an 
164 |             eigenvector.
165 |         
166 |     Returns:
167 |         sorted_eigenvalues (numpy.ndarray): Eigenvalues sorted in ascending 
168 |             order.
169 |         sorted_eigenvectors (numpy.ndarray): Eigenvectors sorted to correspond 
170 |             to sorted_eigenvalues.
171 |     """
172 |     # Get the indices that would sort eigenvalues in ascending order.
173 |     idx = np.argsort(eigenvalues)
174 |     
175 |     # Use fancy indexing to reorder eigenvalues and eigenvectors.
176 |     sorted_eigenvalues = eigenvalues[idx]
177 |     sorted_eigenvectors = eigenvectors[:, idx]
178 |     
179 |     return sorted_eigenvalues, sorted_eigenvectors
180 | 
181 | def center_of_mass(mass,pos):
182 |     '''
183 |     Calculates and returns the center of mass for a configuration of atoms
184 |     
185 |     Parameters
186 |     ----------
187 |     mass : numpy.ndarray 
188 |         An array with the mass of the atoms
189 |     pos : numpy.ndarray
190 |         An array with the positions of the atoms
191 |         
192 |     Returns
193 |     -------
194 |     numpy.ndarray
195 |         The center of mass of the configuration (3)
196 |     '''            
197 |     return np.sum(mass[:,np.newaxis] * pos, axis = 0) / np.sum(mass)
198 | 
199 | ### TOPOLOGICAL PROPERTIES OPERATIONS #########################################
200 | def set_zzp_planes():
201 |     '''
202 |     Sets and returns the ZZP planes
203 | 
204 |     Returns
205 |     -------
206 |     zzp_planes : list
207 |         The family of zzp planes in the unit cell.
208 |     '''
209 |     zzp_planes = ((np.array([1, 0, 0]),1.),
210 |                   (np.array([0, 1, 0]),1.),
211 |                   (np.array([0, 0, 1]),1.),
212 |                   (np.array([1, 1, 0]),np.sqrt(2.)),
213 |                   (np.array([1,-1, 0]),np.sqrt(2.)),
214 |                   (np.array([1, 0, 1]),np.sqrt(2.)),
215 |                   (np.array([1, 0,-1]),np.sqrt(2.)),
216 |                   (np.array([0, 1, 1]),np.sqrt(2.)),
217 |                   (np.array([0, 1,-1]),np.sqrt(2.)))
218 |     
219 |     return zzp_planes
220 | 
221 | def generate_proposed_eigenvectors(n_max):
222 |     """
223 |     Generate a list of 3D vectors with specific criteria.
224 | 
225 |     Parameters
226 |     ----------
227 |     n_max : int
228 |         The maximum absolute value for the vector components.
229 | 
230 |     Returns
231 |     -------
232 |     A list of valid 3D vectors as tuples.
233 |     """
234 |     
235 |     # Initialize a list to hold the valid vectors
236 |     proposed_eigenvectors = []
237 | 
238 |     # Create all combinations of vector components within the range [-n_max, n_max] for a 3D vector
239 |     # Note: we adjust the range for the first component to [0, n_max] to satisfy your second condition
240 |     alternating_range = [0] + [val for i in range(1, n_max + 1) for val in (i, -i)] 
241 |     for combination in itertools.product(range(0,n_max+1), alternating_range, alternating_range):
242 |         # Unpack the combination into the individual components
243 |         x, y, z = combination
244 | 
245 |         # Check if one and only one of the components is zero (condition 3)
246 |         if [x, y, z].count(0) >= 1 and [x, y, z].count(0) <= 2:
247 |             # Create a vector from the components
248 |             vector = (x, y, z)
249 | 
250 |             # Check for parallel vectors
251 |             # A new vector is parallel to an existing vector if their cross product is the zero vector
252 |             is_parallel = False
253 |             for valid_vector in proposed_eigenvectors:
254 |                 cross_product = (valid_vector[1]*vector[2] - valid_vector[2]*vector[1], 
255 |                                  valid_vector[2]*vector[0] - valid_vector[0]*vector[2], 
256 |                                  valid_vector[0]*vector[1] - valid_vector[1]*vector[0])
257 | 
258 |                 if cross_product == (0, 0, 0):
259 |                     is_parallel = True
260 |                     break  # No need to check further, move to the next combination
261 | 
262 |             # If the vector is not parallel to any vector in the list, we add it to our valid vectors
263 |             if not is_parallel:
264 |                 proposed_eigenvectors.append(vector)
265 |     return proposed_eigenvectors
266 | 
267 | def vectors_closest_to_perpendicular(I, n_max):
268 |     """
269 |     For each vector v in I, find the vectors in valid_vectors closest to be
270 |     perpendicular to v.
271 | 
272 |     Parameters
273 |     ----------
274 |     I : list)
275 |         List of inquiry vectors.
276 |     valid_vectors : list
277 |         List of valid vectors to check against.
278 | 
279 |     Returns:
280 |     A list of tuples (v_i, [(w_i, a_i), (w_j, aj)]), where v_i is a vector 
281 |     from I, and w_i, w_j are the vectors from valid_vectors that are closest to 
282 |     be perpendicular to v_i, with a_i, a_j the respective angles.
283 |     """
284 |     # Generate the set of the proposed vectors
285 |     proposed_vectors = generate_proposed_eigenvectors(n_max)
286 |     
287 |     # Convert the list of vectors into numpy arrays for easier computation
288 |     I_array = np.array(I, dtype=np.float64)  # ensure floating point precision
289 |     valid_vectors_array = np.array(proposed_vectors, dtype=np.float64)  # same here
290 | 
291 |     # Normalize the vectors, since we're interested in the angle between them.
292 |     # This normalization step is crucial to ensure that the dot product only measures the angle between vectors.
293 |     I_norms = np.linalg.norm(I_array, axis=1, keepdims=True)
294 |     valid_vectors_norms = np.linalg.norm(valid_vectors_array, axis=1, keepdims=True)
295 | 
296 |     # To avoid division by zero, we will use np.divide which can handle these cases gracefully.
297 |     # 'out' is used to specify the array where the result is stored. If division by zero occurs, it will be replaced by zero.
298 |     I_array = np.divide(I_array, I_norms, out=np.zeros_like(I_array), where=I_norms!=0)
299 |     valid_vectors_array = np.divide(valid_vectors_array, valid_vectors_norms, out=np.zeros_like(valid_vectors_array), where=valid_vectors_norms!=0)
300 | 
301 |     # Compute the cosine distances between each pair of vectors in I and valid_vectors.
302 |     sin_distances = cdist(I_array, valid_vectors_array, metric='cosine') - 1.0
303 | 
304 |     # Find the index of the vector in valid_vectors that forms the smallest angle with each vector in I.
305 |     # closest_to_perpendicular_indices = np.argmin(np.abs(sin_distances), axis=1)
306 |     closest_to_perpendicular_indices = np.argsort(np.abs(sin_distances), axis=1)[:,:2]
307 | 
308 |     # Prepare a list to store the pairs of vectors with the minimum angle
309 |     closest_to_perpendicular_vectors = []
310 |     
311 |     for i, indices in enumerate(closest_to_perpendicular_indices):
312 |         v_i = I[i]
313 |         results_for_v = []
314 |         
315 |         for index in indices:
316 |             w_i = proposed_vectors[index]
317 | 
318 |             # Since we used cosine, we convert it back to the angle. The cosine of the angle between the vectors is the dot product
319 |             # because we normalized the vectors.
320 |             cos_similarity = sin_distances[i, index]
321 |             angle = np.round(np.arccos(cos_similarity) * 180.0 / np.pi,2)  # converting to degrees from radians
322 | 
323 |             results_for_v.append((w_i, angle))
324 | 
325 |         # Append the pair of closest vectors along with their angles relative to v_i
326 |         closest_to_perpendicular_vectors.append((v_i, results_for_v))
327 |     return closest_to_perpendicular_vectors
328 | 
329 | def get_reference_cell_points(min_value,max_value,step):
330 |     '''
331 |     Returns a list of the reference cell points for the calculation of the 
332 |     distances of the principal inertia planes to the points
333 | 
334 |     Parameters
335 |     ----------
336 |     min_value : float
337 |         The minimum coordinate in fractional coordinates
338 |     max_value : float
339 |         The maximum coordinate in fractional coordinates
340 |     step : float
341 |         The step fractional coordinates
342 |     
343 |     Returns
344 |     -------
345 |     list
346 |         The reference cell points for the calculation of the 
347 |         distances of the principal inertia planes to the points
348 | 
349 |     '''
350 |     c_list = np.arange(min_value, max_value, step)
351 |     return [[u, v, w] for u in  c_list for v in c_list for w in c_list]
352 | 
353 | 
354 | 
355 | 
356 | 


--------------------------------------------------------------------------------
/source_code/space_group_operations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def parse_symmetry_operation(op):
 4 |     '''
 5 |     Parse a symmetry operation string into a transformation function.
 6 |     '''
 7 |     # Replace fractional coordinate symbols with array references
 8 |     op = op.replace('x', 'coord[0]').replace('y', 'coord[1]').replace('z', 'coord[2]')
 9 |     
10 |     # Create a lambda function for the operation
11 |     return lambda coord: np.array(eval(op))
12 | 
13 | def apply_symmetry_operations(fractional_positions,symmetry_operation,translation=True):
14 |     '''
15 |     Apply symmetry operations to the fractional coordinates of a molecule and return a list for each operation.
16 |     
17 |     Parameters
18 |     ----------
19 |     fractional_positions : numpy.ndarray
20 |         The fractional coordinates of the atoms in the reference molecule.
21 |     symmetry_operation : str
22 |         The symmetry operation for the symmetric molecule
23 |         
24 |     Returns
25 |     -------
26 |     symmetric_positions : numpy.ndarray
27 |         The fractional coordinates of the atoms in the symmetric molecule
28 |     '''
29 |     # Parse the symmetry operations
30 |     if not translation:
31 |         for translation_coefficient in ["0.25","0.5","0.75","1.0","1/3","2/3","5/6"]:
32 |             symmetry_operation = symmetry_operation.replace(translation_coefficient,"0.0")
33 | 
34 |     transformation = parse_symmetry_operation(symmetry_operation)
35 | 
36 |     # Apply transformations to each fractional position
37 |     symmetric_positions =  np.array([transformation(pos) for pos in fractional_positions])
38 |     
39 |     return symmetric_positions
40 | 


--------------------------------------------------------------------------------
/source_code/structure_operations.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx 
  2 | import numpy as np
  3 | import re 
  4 | from collections import defaultdict
  5 | 
  6 | def get_unique_species(molecule):
  7 |     """
  8 |     Extracts and returns unique species (elements) from a molecular formula string
  9 |     in alphabetical order.
 10 | 
 11 |     Parameters
 12 |     ----------
 13 |     molecule : str
 14 |         The molecular formula as a string.
 15 | 
 16 |     Return
 17 |     ------
 18 |     list of str
 19 |         Unique species in alphabetical order.
 20 |     """
 21 |     # Regex pattern to extract element symbols
 22 |     pattern = re.compile(r'([A-Z][a-z]?)(\d+)?')
 23 |     
 24 |     # Extracting elements
 25 |     elements = pattern.findall(molecule)
 26 |     
 27 |     # Extracting unique element symbols and sorting them
 28 |     unique_species = sorted(set([element[0] for element in elements]))
 29 |     
 30 |     return unique_species
 31 | 
 32 | def get_atoms_from_formula(formula):
 33 |     '''
 34 |     Reads the atomic formula and returns the number of atoms for each species 
 35 | 
 36 |     Parameters
 37 |     ----------
 38 |     formula : str
 39 |         The molecular formula.
 40 | 
 41 |     Returns
 42 |     -------
 43 |     species_counts : dict
 44 |         A dictionary with the count for each species.
 45 |     n_atoms : int
 46 |         The total number of atoms in the molecule.
 47 |     '''
 48 |     # This regular expression will match speciess and their counts
 49 |     species_regex = r'([A-Z][a-z]?)(\d*)'
 50 | 
 51 |     # Use defaultdict to handle speciess with no specified count (count of 1)
 52 |     species_counts = defaultdict(int)
 53 |     n_atoms = 0
 54 |     n_heavy_atoms = 0
 55 | 
 56 |     # Find all matches of the species regex in the formula
 57 |     for species, count in re.findall(species_regex, formula):
 58 |         # If count is empty, it means the species count is 1
 59 |         species_count = int(count) if count else 1
 60 |         # Add the count to the species in the dictionary
 61 |         species_counts[species] += species_count
 62 |         # Add the count to the total number of atoms
 63 |         n_atoms += species_count
 64 |         # Add the count to the total number of heavy atoms
 65 |         if species != 'H':
 66 |             n_heavy_atoms += species_count
 67 | 
 68 |     return dict(species_counts), n_atoms, n_heavy_atoms
 69 |     
 70 | def similarity_check(structures,similarity_engine):
 71 |     '''
 72 |     Performs a similarity check between a group of structures
 73 | 
 74 |     Parameters
 75 |     ----------
 76 |     structures : dict
 77 |         A dictionary with the structures to check.
 78 |     similarity_engine : obj
 79 |         The csd python API similarity check engine.
 80 | 
 81 |     Returns
 82 |     -------
 83 |     similar_structure_groups : list
 84 |         A list with groups of similar structures.
 85 | 
 86 |     '''
 87 |     # Create a new graph for the structures to be checked
 88 |     G = nx.Graph()
 89 |     
 90 |     # Add nodes for each structure
 91 |     for structure, _ in structures:
 92 |         G.add_node(structure)
 93 |     
 94 |     for i1, (structure1,crystal1) in enumerate(structures):
 95 |         for i2, (structure2,crystal2) in enumerate(structures):
 96 |             if i1 >= i2:
 97 |                 continue
 98 | 
 99 |             try:
100 |                 h = similarity_engine.compare(crystal1, crystal2)
101 |             except RuntimeError:
102 |                 h = None
103 |             
104 |             if h == None:
105 |                 continue 
106 |             
107 |             # If structures meet similarity criteria, add an edge
108 |             if h.nmatched_molecules == 15 and h.rmsd < 1.0:
109 |                 G.add_edge(structure1, structure2)
110 |                 
111 |     # Find groups of similar structures
112 |     # Each set in 'similar_groups' contains structures that are considered similar
113 |     similar_structure_groups = list(nx.connected_components(G))
114 |     
115 |     return similar_structure_groups
116 | 
117 | def get_lattice_vectors(cell_lengths,cell_angles,cell_volume,inverse=False):
118 |     ''' 
119 |     Calculates and returns the coordinate transformation matrices .
120 |     
121 |     Parameters
122 |     ----------
123 |     cell_lengths : numpy.ndarray
124 |         The cell lengths of the unit cell.
125 |     cell_angles : numpy.ndarray
126 |         The cell angles of the unit cell.
127 |     cell_volume : float
128 |         The volume of the unit cell.
129 |         
130 |     Returns
131 |     -------
132 |     numpy.ndarray
133 |         The transformation matrix from Cartesian to fractional (3,3) 
134 |     '''
135 |     # Set the individual cell lengths and angles in radians
136 |     a, b, c = cell_lengths
137 |     alpha, beta, gamma = cell_angles * np.pi / 180.0
138 |     
139 |     # Calculate trigomometric numbers for the angles
140 |     cos_alpha = np.cos(alpha)
141 |     cos_beta = np.cos(beta)
142 |     cos_gamma = np.cos(gamma)
143 |     sin_gamma = np.sin(gamma)
144 |     
145 |     if inverse:
146 |         return np.array([[1.0 / a, -cos_gamma / a / sin_gamma, b * c * (cos_alpha * cos_gamma - cos_beta) / cell_volume / sin_gamma],
147 |                          [    0.0,        1.0 / b / sin_gamma, a * c * (cos_beta * cos_gamma - cos_alpha) / cell_volume / sin_gamma],
148 |                          [    0.0,                        0.0,                                      a * b * sin_gamma / cell_volume]]).T
149 |         
150 |     else:
151 |         return np.array([[   a, b * cos_gamma,                                       c * cos_beta],
152 |                          [ 0.0, b * sin_gamma, c * (cos_alpha - cos_beta * cos_gamma) / sin_gamma],
153 |                          [ 0.0,           0.0,                    cell_volume / a / b / sin_gamma]]).T
154 | 
155 | 


--------------------------------------------------------------------------------
/source_code/utilities.py:
--------------------------------------------------------------------------------
 1 | import json 
 2 | import re 
 3 | 
 4 | def convert_seconds_to_hms(seconds):
 5 |     '''
 6 |     Converts CPU time in HMS format
 7 |     
 8 |     Parameters
 9 |     ----------
10 |     seconds : float
11 |         The input time in seconds.
12 |         
13 |     Returns
14 |     -------
15 |     The time in HMS format.
16 |     '''
17 |     hours, remainder = divmod(seconds, 3600)
18 |     minutes, seconds = divmod(remainder, 60)
19 |     
20 |     return int(hours), int(minutes), round(seconds, 2)
21 | 
22 | class NumpyArrayEncoder(json.JSONEncoder):
23 |     ''' 
24 |     Custom encoder for numpy data types 
25 |     '''
26 |     def default(self, obj):
27 |         if isinstance(obj, np.ndarray):
28 |             return obj.tolist()
29 |         return json.JSONEncoder.default(self, obj)
30 | 
31 | def format_lists(json_str):
32 |     ''' 
33 |     Formats lists in the JSON string to remove unnecessary whitespace and newlines. 
34 | 
35 |     Parameters : str
36 |     ----------------
37 |         The input JSON string
38 |     
39 |     Returns
40 |     -------
41 |     The JSON string with unnecessary whitespace and newlines removed.
42 |     '''
43 |     pattern = re.compile(r'\[\s*((?:[^[\]]|\n)+?)\s*\]', re.DOTALL)
44 |     return re.sub(pattern, lambda x: '[' + x.group(1).replace('\n', '').replace(' ', '') + ']', json_str)
45 | 
46 | def convert_to_json(data):
47 |     ''' 
48 |     Converts Python dictionary to formatted JSON string. 
49 | 
50 |     Parameters
51 |     ----------
52 |     data : dict
53 |         The dictionary that needs to be converted to formatted JSON string.
54 |     
55 |     Returns
56 |     -------
57 |     formatted_json : str
58 |         The formatted JSON string corresponding to the data dictionary.
59 |     '''
60 |     json_str = json.dumps(data, cls=NumpyArrayEncoder, indent=4)
61 |     formatted_json = format_lists(json_str)
62 |     return formatted_json


--------------------------------------------------------------------------------
/source_data/atomic_properties.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "H": {
 3 |         "atomic_number": 1,
 4 |         "atomic_mass": 1.008,
 5 |         "covalent_radius": 0.31,
 6 |         "van_der_waals_radius": 1.20,
 7 |         "electronegativity": 2.20,
 8 |         "other_properties": {}
 9 |     },
10 |     "C": {
11 |         "atomic_number": 6,
12 |         "atomic_mass": 12.011,
13 |         "covalent_radius": 0.76,
14 |         "van_der_waals_radius": 1.70,
15 |         "electronegativity": 2.55,
16 |         "other_properties": {}
17 |     },
18 |     "N": {
19 |         "atomic_number": 7,
20 |         "atomic_mass": 14.007,
21 |         "covalent_radius": 0.71,
22 |         "van_der_waals_radius": 1.55,
23 |         "electronegativity": 3.04,
24 |         "other_properties": {}
25 |     },
26 |     "O": {
27 |         "atomic_number": 8,
28 |         "atomic_mass": 15.999,
29 |         "covalent_radius": 0.66,
30 |         "van_der_waals_radius": 1.52,
31 |         "electronegativity": 3.44,
32 |         "other_properties": {}
33 |     },
34 |     "F": {
35 |         "atomic_number": 9,
36 |         "atomic_mass": 18.998,
37 |         "covalent_radius": 0.57,
38 |         "van_der_waals_radius": 1.47,
39 |         "electronegativity": 3.98,
40 |         "other_properties": {}
41 |     },
42 |     "Cl": {
43 |         "atomic_number": 17,
44 |         "atomic_mass": 35.453,
45 |         "covalent_radius": 0.99,
46 |         "van_der_waals_radius": 1.75,
47 |         "electronegativity": 3.16,
48 |         "other_properties": {}
49 |     },
50 |     "Br": {
51 |         "atomic_number": 35,
52 |         "atomic_mass": 79.904,
53 |         "covalent_radius": 1.14,
54 |         "van_der_waals_radius": 1.85,
55 |         "electronegativity": 2.96,
56 |         "other_properties": {}
57 |     },
58 |     "S": {
59 |         "atomic_number": 16,
60 |         "atomic_mass": 32.06,
61 |         "covalent_radius": 1.02,
62 |         "van_der_waals_radius": 1.80,
63 |         "electronegativity": 2.58,
64 |         "other_properties": {}
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/source_data/close_contacts_properties.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "BrBr": {"vdW": 3.97, "HBond": 3.97, "Tolerance": 0.0},
 3 |     "BrC":  {"vdW": 3.88, "HBond": 3.88, "Tolerance": 0.0},
 4 |     "BrCl": {"vdW": 3.87, "HBond": 3.87, "Tolerance": 0.0},
 5 |     "BrF":  {"vdW": 3.59, "HBond": 3.59, "Tolerance": 0.0},
 6 |     "BrH":  {"vdW": 3.29, "HBond": 2.43, "Tolerance": 0.0},
 7 |     "BrN":  {"vdW": 3.72, "HBond": 3.72, "Tolerance": 0.0},
 8 |     "BrO":  {"vdW": 3.65, "HBond": 3.65, "Tolerance": 0.0},
 9 |     "BrS":  {"vdW": 3.93, "HBond": 3.93, "Tolerance": 0.0},
10 |     "CC":   {"vdW": 3.75, "HBond": 3.75, "Tolerance": 0.50},
11 |     "CCl":  {"vdW": 3.78, "HBond": 3.78, "Tolerance": 0.50},
12 |     "CF":   {"vdW": 3.48, "HBond": 3.48, "Tolerance": 0.0},
13 |     "CH":   {"vdW": 3.21, "HBond": 3.21, "Tolerance": 0.50},
14 |     "CN":   {"vdW": 3.61, "HBond": 3.61, "Tolerance": 0.50},
15 |     "CO":   {"vdW": 3.54, "HBond": 3.54, "Tolerance": 0.50},
16 |     "CS":   {"vdW": 3.84, "HBond": 3.84, "Tolerance": 0.50},
17 |     "ClCl": {"vdW": 3.78, "HBond": 3.78, "Tolerance": 0.50},
18 |     "ClF":  {"vdW": 3.50, "HBond": 3.50, "Tolerance": 0.0},
19 |     "ClH":  {"vdW": 3.19, "HBond": 2.24, "Tolerance": 0.50},
20 |     "ClN":  {"vdW": 3.63, "HBond": 3.14, "Tolerance": 0.50},
21 |     "ClO":  {"vdW": 3.58, "HBond": 3.14, "Tolerance": 0.50},
22 |     "ClS":  {"vdW": 3.84, "HBond": 3.74, "Tolerance": 0.0},
23 |     "FF":   {"vdW": 3.17, "HBond": 3.17, "Tolerance": 0.0},
24 |     "FH":   {"vdW": 2.89, "HBond": 2.89, "Tolerance": 0.0},
25 |     "FN":   {"vdW": 3.34, "HBond": 2.70, "Tolerance": 0.0},
26 |     "FO":   {"vdW": 3.29, "HBond": 2.61, "Tolerance": 0.0},
27 |     "FS":   {"vdW": 3.57, "HBond": 3.57, "Tolerance": 0.0},
28 |     "HH":   {"vdW": 2.73, "HBond": 2.73, "Tolerance": 0.50},
29 |     "HN":   {"vdW": 3.01, "HBond": 2.08, "Tolerance": 0.50},
30 |     "HO":   {"vdW": 2.87, "HBond": 1.96, "Tolerance": 0.50},
31 |     "HS":   {"vdW": 3.27, "HBond": 2.56, "Tolerance": 0.50},
32 |     "NN":   {"vdW": 3.50, "HBond": 3.02, "Tolerance": 0.50},
33 |     "NO":   {"vdW": 3.37, "HBond": 2.87, "Tolerance": 0.50},
34 |     "NS":   {"vdW": 3.70, "HBond": 3.41, "Tolerance": 0.50},
35 |     "OO":   {"vdW": 3.37, "HBond": 2.75, "Tolerance": 0.50},
36 |     "OS":   {"vdW": 3.65, "HBond": 3.30, "Tolerance": 0.50},
37 |     "SS":   {"vdW": 3.87, "HBond": 3.87, "Tolerance": 0.50}
38 | }


--------------------------------------------------------------------------------
/source_data/fragment_list.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "1-2-dichlorobenzene": {
  3 |     	"smarts": "c1(Cl)c(Cl)cccc1", 
  4 | 		"species": ["C","Cl","C","Cl","C","C","C","C"],
  5 |     	"coordinates": [
  6 |     		[ 1.3750, 0.0000, 0.0000],
  7 |     		[ 3.1150, 0.0000, 0.0000],
  8 |     		[ 0.6875, 1.1908, 0.0000],
  9 |     		[ 1.5575, 2.6977, 0.0000],
 10 |     		[-0.6875, 1.1908, 0.0000],
 11 |     		[-1.3750, 0.0000, 0.0000],
 12 |     		[-0.6875,-1.1908, 0.0000],
 13 |     		[ 0.6875,-1.1908, 0.0000]],
 14 |     	"mass": [12.0107, 35.430, 12.0107, 35.430, 12.0107, 12.0107, 12.0107, 12.0107],
 15 |     	"atoms_to_align": "all"},
 16 | 	"1-3-4-thiadozole": {
 17 |     	"smarts": "[SX2r5]1[CX3r5]=[NX2r5][NX2r5]=[CX3r5]1", 
 18 | 		"species": ["S","C","N","N","C"],
 19 |     	"coordinates": [
 20 |     	    [ 1.2445, 0.0000, 0.0000],
 21 |     		[ 0.0000, 1.2018, 0.0000],
 22 |     		[-1.2059, 0.6649, 0.0000],
 23 |     		[-1.2059,-0.6649, 0.0000],
 24 |     		[ 0.0000,-1.2018, 0.0000]],
 25 |     	"mass": [32.0650, 12.0107, 14.0067, 14.0067, 12.0107],
 26 |     	"atoms_to_align": "all"},
 27 | 	"1-methylimidazole_aliphatic": {
 28 |     	"smarts": "C1=CN=CN([#6])1", 
 29 | 		"species": ["C","C","N","C","N","C"],
 30 |     	"coordinates": [
 31 |     		[-0.0064,-0.7598, 0.0000],
 32 |     		[ 0.0000, 0.6406, 0.0000],
 33 |     		[ 1.2851, 1.1007, 0.0000],
 34 |     		[ 2.0137, 0.0000, 0.0000],
 35 |     		[ 1.2553,-1.1458, 0.0000],
 36 |     		[ 1.7228,-2.5268, 0.0000]],
 37 |     	"mass": [12.0107, 12.0107, 14.0067, 12.0107, 14.0067, 12.0107],
 38 |     	"atoms_to_align": "all"},
 39 |     "1-methylimidazole_aromatic": {
 40 |     	"smarts": "c1cN=CN([#6])1", 
 41 | 		"species": ["C","C","N","C","N","C"],
 42 |     	"coordinates": [
 43 |     		[-0.0064,-0.7598, 0.0000],
 44 |     		[ 0.0000, 0.6406, 0.0000],
 45 |     		[ 1.2851, 1.1007, 0.0000],
 46 |     		[ 2.0137, 0.0000, 0.0000],
 47 |     		[ 1.2553,-1.1458, 0.0000],
 48 |     		[ 1.7228,-2.5268, 0.0000]],
 49 |     	"mass": [12.0107, 12.0107, 14.0067, 12.0107, 14.0067, 12.0107],
 50 |     	"atoms_to_align": "all"},
 51 | 	"2-amino-5-methyl-3-thiophenecarbonitrile": {
 52 | 		"smarts": "CC1=CC(=C(S1)N)C#N",
 53 | 		"species": ["C","C","C","C","C","S","N","C","N"],
 54 | 		"coordinates": [
 55 | 			[ 0.3605, 2.6830, 0.0000],
 56 | 			[ 0.0000, 1.2373, 0.0000],
 57 | 			[-1.2610, 0.7100, 0.0000],
 58 | 			[-1.2610,-0.7100, 0.0000],
 59 | 			[ 0.0000,-1.2373, 0.0000],
 60 | 			[ 1.1948, 0.0000, 0.0000],
 61 | 			[ 0.3387,-2.5957, 0.0000],
 62 | 			[-2.4179,-1.5505, 0.0000],
 63 | 			[-3.3402,-2.2206, 0.0000]],
 64 | 		"mass": [12.0107,12.0107,12.0107,12.0107,12.0107,32.065,14.0067,12.0107,14.0067],
 65 | 		"atoms_to_align": "all"},
 66 | 	"2-amino-5-methylpyridine": {
 67 |     	"smarts": "Nc1[nr6]cc([#6])cc1", 
 68 | 		"species": ["N","C","N","C","C","C","C","C"],
 69 |     	"coordinates": [
 70 |     		[ 2.7650, 0.0000, 0.0000],
 71 |     		[ 1.3750, 0.0000, 0.0000],
 72 |     		[ 0.6875, 1.1908, 0.0000],
 73 |     		[-0.6875, 1.1908, 0.0000],
 74 |     		[-1.3750, 0.0000, 0.0000],
 75 |     		[-2.8650, 0.0000, 0.0000],
 76 |     		[-0.6875,-1.1908, 0.0000],
 77 |     		[ 0.6875,-1.1908, 0.0000]],
 78 |     	"mass": [14.0067, 12.0107, 14.0067, 12.0107, 12.0107, 12.0107, 12.0107, 12.0107],
 79 |     	"atoms_to_align": "all"},
 80 | 	"3,4-dihydro-6-methyl-2H-1-benzothiopyran-1,1-dioxide": {
 81 |     	"smarts": "c1(cc(C)cc2)c2S(=O)(=O)CCC1", 
 82 | 		"species": ["C","C","C","C","C","C","C","S","O","O","C","C","C"],
 83 |     	"coordinates": [
 84 |     		[-0.4945, 1.0391, 0.3642],
 85 |     		[-1.4765, 1.2151, 1.3522],
 86 |     		[-2.0765, 0.1971, 2.0502],
 87 |     		[-3.0915, 0.5341, 3.1202],
 88 |     		[-1.6695,-1.1089, 1.7352],
 89 |     		[-0.7375,-1.3329, 0.7352],
 90 |     		[-0.1485,-0.2669, 0.0562],
 91 |     		[ 1.1135,-0.6809,-1.0898],
 92 |     		[ 2.3305,-0.8849,-0.3648],
 93 |     		[ 0.6555,-1.7579,-1.9438],
 94 |     		[ 1.2625, 0.8221,-2.0138],
 95 |     		[ 1.3835, 2.0051,-1.0808],
 96 |     		[ 0.0975, 2.2351,-0.3338]],
 97 |     	"mass": [12.0107, 12.0107, 12.0107, 12.0107, 12.0107, 12.0107, 12.0107, 32.0650, 15.9994, 15.9994, 12.0107, 12.0107, 12.0107],
 98 |     	"atoms_to_align": [0,1,2,3,4,5,6,7,10,11,12]},
 99 | 	"3-fluoro-2,6-dichlorotoluene": {
100 |     	"smarts": "[#6]c1c(Cl)c(F)ccc(Cl)1", 
101 | 		"species": ["C","C","C","Cl","C","F","C","C","C","Cl"],
102 |     	"coordinates": [
103 |     		[ 2.8650, 0.0000, 0.0000],
104 |     		[ 1.3750, 0.0000, 0.0000],
105 |     		[ 0.6875, 1.1908, 0.0000],
106 |     		[ 1.5575, 2.6977, 0.0000],
107 |     		[-0.6875, 1.1908, 0.0000],
108 |     		[-1.3269, 2.3599, 0.0000],
109 |     		[-1.3750, 0.0000, 0.0000],
110 |     		[-0.6875,-1.1908, 0.0000],
111 |     		[ 0.6875,-1.1908, 0.0000],
112 |     		[ 1.5575,-2.6977, 0.0000]],
113 |     	"mass": [12.0107, 12.0107, 12.0107, 35.4530, 12.0107, 18.9980, 12.0107, 12.0107, 12.0107, 35.4530],
114 |     	"atoms_to_align": "all"},
115 |     "3-methylpyridine_aliphatic": {
116 |     	"smarts": "[CH3]c1c[nr6]c(C)cc1", 
117 | 		"species": ["C","C","C","N","C","C","C","C"],
118 |     	"coordinates": [
119 |     	    [ 2.8650, 0.0000, 0.0000],
120 |     		[ 1.3750, 0.0000, 0.0000],
121 |     		[ 0.6875, 1.1908, 0.0000],
122 |     		[-0.6875, 1.1908, 0.0000],
123 |     		[-1.3750, 0.0000, 0.0000],
124 |     	    [-1.8650, 0.0000, 0.0000],
125 |     		[-0.6875,-1.1908, 0.0000],
126 |     		[ 0.6875,-1.1908, 0.0000]],
127 |     	"mass": [12.0107, 12.0107, 12.0107, 14.0067, 12.0107, 12.0107, 12.0107, 12.0107],
128 |     	"atoms_to_align": "all"},
129 |     "3-methylpyridine_aromatic": {
130 |     	"smarts": "[CH3]c1c[nr6]c(c)cc1", 
131 | 		"species": ["C","C","C","N","C","C","C","C"],
132 |     	"coordinates": [
133 |     	    [ 2.8650, 0.0000, 0.0000],
134 |     		[ 1.3750, 0.0000, 0.0000],
135 |     		[ 0.6875, 1.1908, 0.0000],
136 |     		[-0.6875, 1.1908, 0.0000],
137 |     		[-1.3750, 0.0000, 0.0000],
138 |     	    [-2.7500, 0.0000, 0.0000],
139 |     		[-0.6875,-1.1908, 0.0000],
140 |     		[ 0.6875,-1.1908, 0.0000]],
141 |     	"mass": [12.0107, 12.0107, 12.0107, 14.0067, 12.0107, 12.0107, 12.0107, 12.0107],
142 |     	"atoms_to_align": "all"},
143 | 	"4-aminopiperidine_chair": {
144 |     	"smarts": "N[CH]1[CH2][CH2][NH][CH2][CH2]1", 
145 | 		"species": ["N","C","C","C","N","C","C"],
146 |     	"coordinates": [
147 |     		[-1.4300, 0.0000, 0.0000],
148 |     		[ 0.0000, 0.0000, 0.0000],
149 |     		[ 0.6256, 1.2862,-0.5655],
150 |     		[ 2.1636, 1.2862,-0.5655],
151 |     		[ 2.7892, 0.0000,-1.1310],
152 |     		[ 2.1636,-1.2862,-0.5655],
153 |     		[ 0.6256,-1.2862,-0.5655]],
154 |     	"mass": [14.0067, 12.0107, 12.0107, 12.0107, 14.0067, 12.0107, 12.0107],
155 |     	"atoms_to_align": [0,1,4]},
156 |     "4-aminopiperidine_boat": {
157 |     	"smarts": "N[CH]1[CH2][CH2][NH][CH2][CH2]1", 
158 | 		"species": ["N","C","C","C","N","C","C"],
159 |     	"coordinates": [
160 |     		[-1.4300, 0.0000, 0.0000],
161 |     		[ 0.0000, 0.0000, 0.0000],
162 |     		[ 0.6256, 1.2862,-0.5655],
163 |     		[ 2.1636, 1.2862,-0.5655],
164 |     		[ 2.7892, 0.0000, 0.0000],
165 |     		[ 2.1636,-1.2862,-0.5655],
166 |     		[ 0.6256,-1.2862,-0.5655]],
167 |     	"mass": [14.0067, 12.0107, 12.0107, 12.0107, 14.0067, 12.0107, 12.0107],
168 |     	"atoms_to_align": [0,1,4]},
169 |     "4-aminopyridamine": {
170 |     	"smarts": "[NH2]c1[nr6]c[nr6]cc1", 
171 | 		"species": ["N","C","N","C","N","C","C"],
172 |     	"coordinates": [
173 |     	    [-1.0691, 2.7106, 0.0000],
174 |     		[-1.1604, 1.3461, 0.0000],
175 |     		[-2.2179, 0.5199, 0.0000],
176 |     		[-2.1823,-0.8127, 0.0000],
177 |     		[-1.0594,-1.5475, 0.0000],
178 |     		[-0.0064,-0.7598, 0.0000],
179 |     		[ 0.0000, 0.6406, 0.0000]],
180 |     	"mass": [14.0067, 12.0107, 14.0067, 12.0107, 14.0067, 12.0107, 12.0107],
181 |     	"atoms_to_align": "all"},
182 | 	"4-hydroxyquinoline": {
183 |     	"smarts": "c1(cccc2)c2C(=O)C=CN1", 
184 | 		"species": ["C","C","C","C","C","C","C","O","C","C","N"],
185 |     	"coordinates": [
186 |     		[ 0.6875,-1.1908, 0.0000],
187 |     		[ 1.3750,-2.3816, 0.0000],
188 |     		[ 0.6875,-3.5724, 0.0000],
189 |     		[-0.6875,-3.5724, 0.0000],
190 |     		[-1.3750,-2.3816, 0.0000],
191 |     		[-0.6875,-1.1908, 0.0000],
192 |     		[-1.3750, 0.0000, 0.0000],
193 |     		[-2.7550, 0.0000, 0.0000],
194 |     		[-0.6875, 1.1908, 0.0000],
195 |     		[ 0.6875, 1.1908, 0.0000],
196 |     		[ 1.3750, 0.0000, 0.0000]],
197 |     	"mass": [12.0107, 12.0107, 12.0107, 12.0107, 12.0107, 12.0107, 12.0107, 15.9994, 12.0107, 12.0107, 14.0067],
198 |     	"atoms_to_align": "all"},
199 | 	"5-amino-1-3-4-thiadozole": {
200 |     	"smarts": "[SX2r5]1[CX3r5]=[NX2r5][NX2r5]=[CX3r5]1(N)", 
201 | 		"species": ["S","C","N","N","C","N"],
202 |     	"coordinates": [
203 |     	    [ 1.2445, 0.0000, 0.0000],
204 |     		[ 0.0000, 1.2018, 0.0000],
205 |     		[-1.2059, 0.6649, 0.0000],
206 |     		[-1.2059,-0.6649, 0.0000],
207 |     		[ 0.0000,-1.2018, 0.0000],
208 |     		[ 0.2869,-2.5516, 0.0000]],
209 |     	"mass": [32.0650, 12.0107, 14.0067, 14.0067, 12.0107, 14.0067],
210 |     	"atoms_to_align": "all"},	
211 | 	"9-methyladenine": {
212 |     	"smarts": "[#7]c1[nr6]c[nr6]c(N([#6])[CH]=N2)c21", 
213 | 		"species": ["N","C","N","C","N","C","N","C","C","N","C"],
214 |     	"coordinates": [
215 |     	    [-1.0691, 2.7106, 0.0000],
216 |     		[-1.1604, 1.3461, 0.0000],
217 |     		[-2.2179, 0.5199, 0.0000],
218 |     		[-2.1823,-0.8127, 0.0000],
219 |     		[-1.0594,-1.5475, 0.0000],
220 |     		[-0.0064,-0.7598, 0.0000],
221 |     		[ 1.2553,-1.1458, 0.0000],
222 |     		[ 1.7228,-2.5268, 0.0000],
223 |     		[ 2.0137, 0.0000, 0.0000],
224 |     		[ 1.2851, 1.1007, 0.0000],
225 |     		[ 0.0000, 0.6406, 0.0000]],
226 |     	"mass": [14.0067, 12.0107, 14.0067, 12.0107, 14.0067, 12.0107, 14.0067, 12.0107, 12.0107, 14.0067, 12.0107],
227 |     	"atoms_to_align": "all"},
228 | 	"acetamide": {
229 |     	"smarts": "[#6]C(=O)N",
230 | 		"species": ["C","C","O","N"],
231 |     	"coordinates": [
232 |     	    [-1.4900, 0.0000, 0.0000],
233 |     		[ 0.0000, 0.0000, 0.0000],
234 |     		[ 0.6335,-1.2018, 0.0000],
235 |     		[ 0.6069, 1.0543, 0.0000]],
236 |     	"mass": [12.0107, 12.0107, 15.9994, 14.0067],
237 |     	"atoms_to_align": "all"},
238 | 	"acetate_aliphatic": {
239 |     	"smarts": "C[CX3](=O)[OX2!H]", 
240 | 		"species": ["C","C","O","O"],
241 |     	"coordinates": [
242 |     	    [-1.5220, 0.0000, 0.0000],
243 |     		[ 0.0000, 0.0000, 0.0000],
244 |     		[ 0.6662,-1.0220, 0.0000],
245 |     		[ 0.4781, 1.2454, 0.0000]],
246 |     	"mass": [12.0107, 12.0107, 15.9994, 15.9994],
247 |     	"atoms_to_align": "all"},
248 |     "acetate_aromatic": {
249 |     	"smarts": "c[CX3](=O)[OX2!H]", 
250 | 		"species": ["C","C","O","O"],
251 |     	"coordinates": [
252 |     	    [-1.4800, 0.0000, 0.0000],
253 |     		[ 0.0000, 0.0000, 0.0000],
254 |     		[ 0.6662,-1.0220, 0.0000],
255 |     		[ 0.4781, 1.2454, 0.0000]],
256 |     	"mass": [12.0107, 12.0107, 15.9994, 15.9994],
257 |     	"atoms_to_align": "all"},
258 | 	"acridin":{
259 |         "smarts": "c1ccc2cc3ccccc3nc2c1",
260 |         "species": ["C","C","C","C","C","C","C","C","C","C","C","N","C","C"],
261 |         "coordinates": [
262 |             [ 0.6875, 3.5724, 0.0000], 
263 |             [-0.6875, 3.5724, 0.0000], 
264 |             [-1.3750, 2.3816, 0.0000], 
265 |             [-0.6875, 1.1908, 0.0000], 
266 |             [-1.3750, 0.0000, 0.0000], 
267 |             [-0.6875,-1.1908, 0.0000], 
268 |             [-1.3750,-2.3816, 0.0000], 
269 |             [-0.6875,-3.5724, 0.0000], 
270 |             [ 0.6875,-3.5724, 0.0000], 
271 |             [ 1.3750,-2.3816, 0.0000], 
272 |             [ 0.6875,-1.1908, 0.0000], 
273 |             [ 1.3750, 0.0000, 0.0000], 
274 |             [ 0.6875, 1.1908, 0.0000], 
275 |             [ 1.3750, 2.3816, 0.0000]],
276 |         "mass": [12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,14.0067,12.0107,12.0107],
277 |         "atoms_to_align": "all"},
278 | 	"aniline_NX2": {
279 |     	"smarts": "[NX2]c1ccccc1",  
280 | 		"species": ["N","C","C","C","C","C","C"],
281 |     	"coordinates": [
282 |     		[ 2.7850, 0.0000, 0.0000],
283 |     		[ 1.3750, 0.0000, 0.0000],
284 |     		[ 0.6875, 1.1908, 0.0000],
285 |     		[-0.6875, 1.1908, 0.0000],
286 |     		[-1.3750, 0.0000, 0.0000],
287 |     		[-0.6875,-1.1908, 0.0000],
288 |     		[ 0.6875,-1.1908, 0.0000]],
289 |     	"mass": [14.0067, 12.0107, 12.0107, 12.0107, 12.0107, 12.0107, 12.0107],
290 |     	"atoms_to_align": "all"},
291 |     "aniline_NX3": {
292 |     	"smarts": "[NX3]c1ccccc1",  
293 | 		"species": ["N","C","C","C","C","C","C"],
294 |     	"coordinates": [
295 |     		[ 2.7850, 0.0000, 0.0000],
296 |     		[ 1.3750, 0.0000, 0.0000],
297 |     		[ 0.6875, 1.1908, 0.0000],
298 |     		[-0.6875, 1.1908, 0.0000],
299 |     		[-1.3750, 0.0000, 0.0000],
300 |     		[-0.6875,-1.1908, 0.0000],
301 |     		[ 0.6875,-1.1908, 0.0000]],
302 |     	"mass": [14.0067, 12.0107, 12.0107, 12.0107, 12.0107, 12.0107, 12.0107],
303 |     	"atoms_to_align": "all"},
304 | 	"anthracene":{
305 |         "smarts": "c1ccc2cc3ccccc3cc2c1",
306 |         "species": ["C","C","C","C","C","C","C","C","C","C","C","C","C","C"],
307 |         "coordinates": [
308 |             [ 0.6875, 3.5724, 0.0000], 
309 |             [-0.6875, 3.5724, 0.0000], 
310 |             [-1.3750, 2.3816, 0.0000], 
311 |             [-0.6875, 1.1908, 0.0000], 
312 |             [-1.3750, 0.0000, 0.0000], 
313 |             [-0.6875,-1.1908, 0.0000], 
314 |             [-1.3750,-2.3816, 0.0000], 
315 |             [-0.6875,-3.5724, 0.0000], 
316 |             [ 0.6875,-3.5724, 0.0000], 
317 |             [ 1.3750,-2.3816, 0.0000], 
318 |             [ 0.6875,-1.1908, 0.0000], 
319 |             [ 1.3750, 0.0000, 0.0000], 
320 |             [ 0.6875, 1.1908, 0.0000], 
321 |             [ 1.3750, 2.3816, 0.0000]],
322 |         "mass": [12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107,12.0107],
323 |         "atoms_to_align": "all"},
324 | 	"benzene": {
325 |     	"smarts": "c1ccccc1", 
326 | 		"species": ["C","C","C","C","C","C"],
327 |     	"coordinates": [
328 |     		[ 1.3750, 0.0000, 0.0000],
329 |     		[ 0.6875, 1.1908, 0.0000],
330 |     		[-0.6875, 1.1908, 0.0000],
331 |     		[-1.3750, 0.0000, 0.0000],
332 |     		[-0.6875,-1.1908, 0.0000],
333 |     		[ 0.6875,-1.1908, 0.0000]],
334 |     	"mass": [12.0107, 12.0107, 12.0107, 12.0107, 12.0107, 12.0107],
335 |     	"atoms_to_align": "all"},
336 | 	"butane": {
337 | 		"smarts": "[#6]CC[#6]",
338 | 		"species": ["C","C","C","C"],
339 | 		"coordinates": [
340 | 			[ 1.8875, 0.6192, 0.0000],
341 | 			[ 0.3575, 0.6192, 0.0000],
342 | 			[-0.3575,-0.6192, 0.0000],
343 | 			[-1.8875,-0.6192, 0.0000]],
344 | 		"mass": [12.0107,12.0107,12.0107,12.0107],
345 | 		"atoms_to_align": "all"},
346 |     "carboxylic_acid": {
347 |     	"smarts": "[#6][CX3](=O)[OH]", 
348 | 		"species": ["C","C","O","O"],
349 |     	"coordinates": [
350 |     		[-1.5220, 0.0000, 0.0000],
351 |     		[ 0.0000, 0.0000, 0.0000],
352 |     		[ 0.6998,-0.9994, 0.0000],
353 |     		[ 0.4903, 1.3113, 0.0000]],
354 |     	"mass": [12.0107, 12.0107, 15.9994, 15.9994],
355 |     	"atoms_to_align": "all"},
356 | 	"cis-tetrahydrofuran-3,4-diol": {
357 |     	"smarts": "O1CC([OH])C([OH])C1", 
358 | 		"species": ["O","C","C","O","C","O","C"],
359 |     	"coordinates": [
360 |     		[-0.7840,-0.0022,-0.9770],
361 |     		[-0.1190,-1.1740,-0.5270],
362 |     		[ 0.5940,-0.8120, 0.7840],
363 |     		[-0.2960,-0.9570, 1.8640],
364 |     		[ 0.8420, 0.6910, 0.5760],
365 |     		[ 1.0930, 1.4200, 1.7580],
366 |     		[-0.4090, 1.1330,-0.1860]],
367 |     	"mass": [15.9994, 12.0107, 12.0107, 15.9994, 12.0107, 15.9994, 12.0107],
368 |     	"atoms_to_align": "all"},
369 | 	"dimethyl-sulfone": {
370 |     	"smarts": "[#6]S(=O)(=O)[#6]", 
371 | 		"species": ["C","S","O","O","C"],
372 |     	"coordinates": [
373 |     		[-1.3445, 0.0000,-1.0887],
374 |     		[ 0.0000, 0.0000, 0.0000],
375 |     		[ 1.3445, 0.0000,-1.0887],
376 |     		[ 0.0000, 1.2533, 0.7091],
377 |     		[ 0.0000,-1.2533, 0.7091]],
378 |     	"mass": [12.0107, 32.0650, 15.9994, 15.9994, 12.0107],
379 |     	"atoms_to_align": [0,1,4]},
380 | 	"ester_aliphatic-aliphatic": {
381 |     	"smarts": "C[!R;O]C(=O)C", 
382 | 		"species": ["C","O","C","O","C"],
383 |     	"coordinates": [
384 |     		[ 1.8642, 1.2513, 0.0000],
385 |     		[ 0.4343, 1.2613, 0.0000],
386 |     		[ 0.0000, 0.0000, 0.0000],
387 |     		[ 0.6998,-0.9994, 0.0000],
388 |     		[-1.5220, 0.0000, 0.0000]],
389 |     	"mass": [12.0107, 15.9994, 12.0107, 15.9994, 12.0107],
390 |     	"atoms_to_align": "all"},
391 | 	"ester_aliphatic-aromatic": {
392 |     	"smarts": "C[!R;O]C(=O)c", 
393 | 		"species": ["C","O","C","O","C"],
394 |     	"coordinates": [
395 |     		[ 1.8642, 1.2513, 0.0000],
396 |     		[ 0.4343, 1.2613, 0.0000],
397 |     		[ 0.0000, 0.0000, 0.0000],
398 |     		[ 0.6998,-0.9994, 0.0000],
399 |     		[-1.5220, 0.0000, 0.0000]],
400 |     	"mass": [12.0107, 15.9994, 12.0107, 15.9994, 12.0107],
401 |     	"atoms_to_align": "all"},
402 | 	"ester_aromatic-aliphatic": {   	
403 | 		"smarts": "c[!R;O]C(=O)C", 
404 | 		"species": ["C","O","C","O","C"],
405 |     	"coordinates": [
406 |     		[ 1.8642, 1.2513, 0.0000],
407 |     		[ 0.4343, 1.2613, 0.0000],
408 |     		[ 0.0000, 0.0000, 0.0000],
409 |     		[ 0.6998,-0.9994, 0.0000],
410 |     		[-1.5220, 0.0000, 0.0000]],
411 |     	"mass": [12.0107, 15.9994, 12.0107, 15.9994, 12.0107],
412 |     	"atoms_to_align": "all"},
413 | 	"ester_aromatic-aromatic": {
414 |     	"smarts": "c[!R;O]C(=O)c", 
415 | 		"species": ["C","O","C","O","C"],
416 |     	"coordinates": [
417 |     		[ 1.8642, 1.2513, 0.0000],
418 |     		[ 0.4343, 1.2613, 0.0000],
419 |     		[ 0.0000, 0.0000, 0.0000],
420 |     		[ 0.6998,-0.9994, 0.0000],
421 |     		[-1.5220, 0.0000, 0.0000]],
422 |     	"mass": [12.0107, 15.9994, 12.0107, 15.9994, 12.0107],
423 |     	"atoms_to_align": "all"},
424 |     "ethanol": {
425 |     	"smarts": "[#6][CH2][OH]", 
426 | 		"species": ["C","C"],
427 |     	"coordinates": [
428 |     		[-1.5280, 0.0000, 0.0000],
429 |     		[ 0.0000, 0.0000, 0.0000],
430 |     		[ 0.4880, 1.3335, 0.0000]],
431 |     	"mass": [12.0107, 12.0107, 15.9994],
432 |     	"atoms_to_align": "all"},
433 | 	"ethoxide": {
434 |     	"smarts": "[OX2][CH2][CH3]", 
435 | 		"species": ["O","C","C"],
436 |     	"coordinates": [
437 |     	    [-1.3300, 0.0000, 0.0000],
438 |     		[ 0.0000, 0.0000, 0.0000],
439 |     		[ 0.8097, 1.2958, 0.0000]],
440 |     	"mass": [15.9994, 12.0107, 12.0107],
441 |     	"atoms_to_align": "all"},
442 |     "ethyl": {
443 |     	"smarts": "[#6][CH2][CH3]",  
444 | 		"species": ["C","C","C"],
445 |     	"coordinates": [
446 |     		[-1.5280, 0.0000, 0.0000],
447 |     		[ 0.0000, 0.0000, 0.0000],
448 |     		[ 0.8544, 1.2668, 0.0000]],
449 |     	"mass": [12.0107, 12.0107, 12.0107],
450 |     	"atoms_to_align": "all"},
451 | 	"ethylamine": {
452 |     	"smarts": "[NX3][CH2][CH3]", 
453 | 		"species": ["N","C","C"],
454 |     	"coordinates": [
455 |     		[-1.4380, 0.0000, 0.0000],
456 |     		[ 0.0000, 0.0000, 0.0000],
457 |     		[ 0.5879, 1.4104, 0.0000]],
458 |     	"mass": [14.0067, 12.0107, 12.0107],
459 |     	"atoms_to_align": "all"},
460 | 	"methanesulfonamide": {
461 |     	"smarts": "[#6]S(=O)(=O)[NH2]", 
462 | 		"species": ["C","S","O","O","N"],
463 |     	"coordinates": [
464 |     		[-1.7300, 0.0000, 0.0000],
465 |     		[ 0.0000, 0.0000, 0.0000],
466 |     		[ 0.3778,-1.2731,-0.5567],
467 |     		[ 0.3778, 1.2731,-0.5567],
468 |     		[ 0.4408, 0.0000, 1.6211]],
469 |     	"mass": [12.0107, 32.0650, 15.9994, 15.9994, 14.0067],
470 |     	"atoms_to_align": [0,1,4]},
471 | 	"methyl_ethyl_ether_L_aliphatic": {
472 |     	"smarts": "[#6]OC[CH3]", 
473 | 		"species": ["C","O","C","C"],
474 |     	"coordinates": [
475 |     		[-1.5200, 0.0000, 0.0000],
476 |     		[ 0.0000, 0.0000, 0.0000],
477 |     		[ 0.5186, 1.3165, 0.0000],
478 |     		[ 1.9329, 1.2721, 0.0000]],
479 |     	"mass": [12.0107, 15.9994, 12.0107, 12.0107],
480 |     	"atoms_to_align": "all"},
481 |     "methyl_ethyl_ether_L_aromatic": {
482 |     	"smarts": "cOC[CH3]", 
483 | 		"species": ["C","O","C","C"],
484 |     	"coordinates": [
485 |     		[-1.5200, 0.0000, 0.0000],
486 |     		[ 0.0000, 0.0000, 0.0000],
487 |     		[ 0.5186, 1.3165, 0.0000],
488 |     		[ 1.9329, 1.2721, 0.0000]],
489 |     	"mass": [12.0107, 15.9994, 12.0107, 12.0107],
490 |     	"atoms_to_align": "all"},
491 | 	"methyl_ethyl_ether_R_aliphatic": {
492 |     	"smarts": "[CH3]OC[#6]", 
493 | 		"species": ["C","O","C","C"],
494 |     	"coordinates": [
495 |     		[-1.5200, 0.0000, 0.0000],
496 |     		[ 0.0000, 0.0000, 0.0000],
497 |     		[ 0.5186, 1.3165, 0.0000],
498 |     		[ 1.9329, 1.2721, 0.0000]],
499 |     	"mass": [12.0107, 15.9994, 12.0107, 12.0107],
500 |     	"atoms_to_align": "all"},
501 | 	"methyl_ethyl_ether_R_aromatic": {
502 |     	"smarts": "[CH3]OCc", 
503 | 		"species": ["C","O","C","C"],
504 |     	"coordinates": [
505 |     		[-1.5200, 0.0000, 0.0000],
506 |     		[ 0.0000, 0.0000, 0.0000],
507 |     		[ 0.5186, 1.3165, 0.0000],
508 |     		[ 1.9329, 1.2721, 0.0000]],
509 |     	"mass": [12.0107, 15.9994, 12.0107, 12.0107],
510 |     	"atoms_to_align": "all"},
511 | 	"nitromethane": {
512 | 		"smarts": "[#6]N(=O)=O",
513 | 		"species": ["C","N","O","O"],
514 | 		"coordinates": [
515 | 			[-1.4460, 0.0000, 0.0000],
516 | 			[ 0.0000, 0.0000, 0.0000],
517 | 			[ 0.6150, 1.0650, 0.0000],
518 | 			[ 0.6150,-1.0650, 0.0000]],
519 | 		"mass": [12.0107,14.0067,15.9994,15.9994],
520 | 		"atoms_to_align": "all"},
521 | 	"pyrazole": {
522 |     	"smarts": "[#6r5]1=[#6r5][Nr5][Nr5]=[#6r5]1", 
523 | 		"species": ["C","C","N","N","C"],
524 |     	"coordinates": [
525 |     		[ 0.8185, 0.0000, 0.0000],
526 |     		[ 0.0000, 1.0861, 0.0000],
527 |     		[-1.2883, 0.5916, 0.0000],
528 |     		[-1.2848,-0.7446, 0.0000],
529 |     		[-0.0240,-1.1181, 0.0000]],
530 |     	"mass": [12.0107, 12.0107, 14.0067, 14.0067, 12.0107],
531 |     	"atoms_to_align": "all"},
532 |     "pyridine": {
533 |     	"smarts": "[nr6]1ccccc1",  
534 | 		"species": ["N","C","C","C","C","C"],
535 |     	"coordinates": [
536 |     		[ 1.3750, 0.0000, 0.0000],
537 |     		[ 0.6875, 1.1908, 0.0000],
538 |     		[-0.6875, 1.1908, 0.0000],
539 |     		[-1.3750, 0.0000, 0.0000],
540 |     		[-0.6875,-1.1908, 0.0000],
541 |     		[ 0.6875,-1.1908, 0.0000]],
542 |     	"mass": [14.0067, 12.0107, 12.0107, 12.0107, 12.0107, 12.0107],
543 |     	"atoms_to_align": "all"},
544 |     "thioacetamide": {
545 |     	"smarts": "[#6]C(=S)[NH2]", 
546 | 		"species": ["C","C","S","N"], 
547 |     	"coordinates": [
548 |     		[-1.4750, 0.0000, 0.0000],
549 |     		[ 0.0000, 0.0000, 0.0000],
550 |     		[ 0.7707, 1.2577, 0.0000],
551 |     		[ 0.5522,-1.2264, 0.0000]],
552 |     	"mass": [12.0107, 12.0107, 32.0650, 14.0067],
553 |     	"atoms_to_align": "all"}
554 | }


--------------------------------------------------------------------------------
/source_data/space_group_properties.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "P1": {
  3 |         "name_HM": "P 1",
  4 |         "name_Hall": "P 1",
  5 |         "number": 1,
  6 |         "crystal_system": "triclinic",
  7 |         "z_crystal": 1,
  8 |         "symmetry_operations": ["x, y, z"]
  9 |     },
 10 |     "P-1": {
 11 |         "name_HM": "P-1",
 12 |         "name_Hall": "-P 1",
 13 |         "number": 2,
 14 |         "crystal_system": "triclinic",
 15 |         "z_crystal": 2,
 16 |         "symmetry_operations": ["x, y, z",
 17 |                                 "1.0-x, 1.0-y, 1.0-z"]
 18 |     },
 19 |     "P21": {
 20 |         "name_HM": "P 1 21 1",
 21 |         "name_Hall": "P 2yb",
 22 |         "number": 4,
 23 |         "crystal_system": "monoclinic",
 24 |         "z_crystal": 2,
 25 |         "symmetry_operations": ["x, y, z",
 26 |                                 "1.0-x, 0.5+y, 1.0-z"]
 27 |     },
 28 |     "C2": {
 29 |         "name_HM": "C 1 2 1",
 30 |         "name_Hall": "C 2y",
 31 |         "number": 5,
 32 |         "crystal_system": "monoclinic",
 33 |         "z_crystal": 4,
 34 |         "symmetry_operations": ["x, y, z",
 35 |                                 "1.0-x, y, 1.0-z",
 36 |                                 "0.5+x, 0.5+y, z",
 37 |                                 "0.5-x, 0.5+y, 1.0-z"]
 38 |     },
 39 |     "Pc": {
 40 |         "name_HM": "P 1 c 1",
 41 |         "name_Hall": "P -2yc",
 42 |         "number": 7,
 43 |         "crystal_system": "monoclinic",
 44 |         "z_crystal": 2,
 45 |         "symmetry_operations": ["x, y, z",
 46 |                                 "x, 1.0-y, 0.5+z"]
 47 |     },
 48 |     "Cc": {
 49 |         "name_HM": "C 1 c 1",
 50 |         "name_Hall": "C -2yc",
 51 |         "number": 9,
 52 |         "crystal_system": "monoclinic",
 53 |         "z_crystal": 4,
 54 |         "symmetry_operations": ["x, y, z",
 55 |                                 "x, 1.0-y, 0.5+z",
 56 |                                 "0.5+x, 0.5+y, z",
 57 |                                 "0.5+x, 0.5-y, 0.5+z"]
 58 |     },
 59 |     "P21/m": {
 60 |         "name_HM": "P 1 21/m 1",
 61 |         "name_Hall": "-P 2yb",
 62 |         "number": 11,
 63 |         "crystal_system": "monoclinic",
 64 |         "z_crystal": 4,
 65 |         "symmetry_operations": ["x, y, z",
 66 |                                 "1.0-x, 0.5+y, 1.0-z",
 67 |                                 "1.0-x, 1.0-y, 1.0-z",
 68 |                                 "x, 0.5-y, z"]
 69 |     },
 70 |     "C2/m": {
 71 |         "name_HM": "C 1 2/m 1",
 72 |         "name_Hall": "-C 2y",
 73 |         "number": 12,
 74 |         "crystal_system": "monoclinic",
 75 |         "z_crystal": 8,
 76 |         "symmetry_operations": ["x, y, z",
 77 |                                 "1.0-x, y, 1.0-z",
 78 |                                 "1.0-x, 1.0-y, 1.0-z",
 79 |                                 "x, 1.0-y, z",
 80 |                                 "0.5+x, 0.5+y, z",
 81 |                                 "0.5-x, 0.5+y, 1.0-z",
 82 |                                 "0.5-x, 0.5-y, 1.0-z",
 83 |                                 "0.5+x, 0.5-y, z"]
 84 |     },
 85 |     "P2/c": {
 86 |         "name_HM": "P 1 2/c 1",
 87 |         "name_Hall": "-P 2yc",
 88 |         "number": 13,
 89 |         "crystal_system": "monoclinic",
 90 |         "z_crystal": 4,
 91 |         "symmetry_operations": ["x, y, z",
 92 |                                 "1.0-x, y, 0.5-z",
 93 |                                 "1.0-x, 1.0-y, 1.0-z",
 94 |                                 "x, 1.0-y, 0.5+z"]
 95 |     },
 96 |     "P21/c": {
 97 |         "name_HM": "P 1 21/c 1",
 98 |         "name_Hall": "-P 2ybc",
 99 |         "number": 14,
100 |         "crystal_system": "monoclinic",
101 |         "z_crystal": 4,
102 |         "symmetry_operations": ["x, y, z",
103 |                                 "1.0-x, 0.5+y, 0.5-z",
104 |                                 "1.0-x, 1.0-y, 1.0-z",
105 |                                 "x, 0.5-y, 0.5+z"]
106 |     },
107 |     "P21/n": {
108 |         "name_HM": "P 1 21/n 1",
109 |         "name_Hall": "-P 2yn",
110 |         "number": 14,
111 |         "crystal_system": "monoclinic",
112 |         "z_crystal": 4,
113 |         "symmetry_operations": ["x, y, z",
114 |                                 "0.5-x, 0.5+y, 0.5-z",
115 |                                 "1.0-x, 1.0-y, 1.0-z",
116 |                                 "0.5+x, 0.5-y, 0.5+z"]
117 |     },
118 |     "C2/c": {
119 |         "name_HM": "C 1 2/c 1",
120 |         "name_Hall": "-c 2yc",
121 |         "number": 15,
122 |         "crystal_system": "monoclinic",
123 |         "z_crystal": 8,
124 |         "symmetry_operations": ["x, y, z",
125 |                                 "1.0-x, y, 0.5-z",
126 |                                 "1.0-x, 1.0-y, 1.0-z",
127 |                                 "x, 1.0-y, 0.5+z",
128 |                                 "0.5+x, 0.5+y, z",
129 |                                 "0.5-x, 0.5+y, 0.5-z",
130 |                                 "0.5-x, 0.5-y, 1.0-z",
131 |                                 "0.5+x, 0.5-y, 0.5+z"]
132 |     },
133 |     "P21212": {
134 |         "name_HM": "P 21 21 2",
135 |         "name_Hall": "P 2 2ab",
136 |         "number": 18,
137 |         "crystal_system": "orthorhombic",
138 |         "z_crystal": 4,
139 |         "symmetry_operations": ["x, y, z",
140 |                                 "0.5+x, 0.5-y, 1.0-z",
141 |                                 "0.5-x, 0.5+y, 1.0-z",
142 |                                 "1.0-x, 1.0-y, z"]
143 |     },
144 |     "P212121": {
145 |         "name_HM": "P 21 21 21",
146 |         "name_Hall": "P 2ac 2ab",
147 |         "number": 19,
148 |         "crystal_system": "orthorhombic",
149 |         "z_crystal": 4,
150 |         "symmetry_operations": ["x, y, z",
151 |                                 "0.5+x, 0.5-y, 1.0-z",
152 |                                 "1.0-x, 0.5+y, 0.5-z",
153 |                                 "0.5-x, 1.0-y, 0.5+z"]
154 |     },
155 |     "Pca21": {
156 |         "name_HM": "P c a 21",
157 |         "name_Hall": "P 2c -2ac",
158 |         "number": 29,
159 |         "crystal_system": "orthorhombic",
160 |         "z_crystal": 4,
161 |         "symmetry_operations": ["x, y, z",
162 |                                 "0.5-x, y, 0.5+z",
163 |                                 "0.5+x, 1.0-y, z",
164 |                                 "1.0-x, 1.0-y, 0.5+z"]
165 |     },
166 |     "Pna21": {
167 |         "name_HM": "P n a 21",
168 |         "name_Hall": "P 2c -2n",
169 |         "number": 33,
170 |         "crystal_system": "orthorhombic",
171 |         "z_crystal": 4,
172 |         "symmetry_operations": ["x, y, z",
173 |                                 "0.5-x, 0.5+y, 0.5+z",
174 |                                 "0.5+x, 0.5-y, z",
175 |                                 "1.0-x, 1.0-y, 0.5+z"]
176 |     },
177 |     "Pbcn": {
178 |         "name_HM": "P b c n",
179 |         "name_Hall": "-P 2n 2ab",
180 |         "number": 60,
181 |         "crystal_system": "orthorhombic",
182 |         "z_crystal": 8,
183 |         "symmetry_operations": ["x, y, z",
184 |                                 "0.5-x, 0.5+y, z",
185 |                                 "x, 1.0-y, 0.5+z",
186 |                                 "0.5+x, 0.5+y, 0.5-z",
187 |                                 "1.0-x, 1.0-y, 1.0-z",
188 |                                 "0.5+x, 0.5-y, 1.0-z",
189 |                                 "1.0-x, y, 0.5-z",
190 |                                 "0.5-x, 0.5-y, 0.5+z"]
191 |     },
192 |     "Pbca": {
193 |         "name_HM": "P b c a",
194 |         "name_Hall": "-P 2ac 2ab",
195 |         "number": 61,
196 |         "crystal_system": "orthorhombic",
197 |         "z_crystal": 8,
198 |         "symmetry_operations": ["x, y, z",
199 |                                 "0.5-x, 0.5+y, z",
200 |                                 "x, 0.5-y, 0.5+z",
201 |                                 "0.5+x, y, 0.5-z",
202 |                                 "1.0-x, 1.0-y, 1.0-z",
203 |                                 "0.5+x, 0.5-y, 1.0-z",
204 |                                 "1.0-x, 0.5+y, 0.5-z",
205 |                                 "0.5-x, 1.0-y, 0.5+z"]
206 |     },
207 |     "Pnma": {
208 |         "name_HM": "P n m a",
209 |         "name_Hall": "-P 2ac 2n",
210 |         "number": 62,
211 |         "crystal_system": "orthorhombic",
212 |         "z_crystal": 8,
213 |         "symmetry_operations": ["x, y, z",
214 |                                 "0.5-x, 0.5+y, 0.5+z",
215 |                                 "x, 0.5-y, z",
216 |                                 "0.5+x, y, 0.5-z",
217 |                                 "1.0-x, 1.0-y, 1.0-z",
218 |                                 "0.5+x, 0.5-y, 0.5-z",
219 |                                 "1.0-x, 0.5+y, 1.0-z",
220 |                                 "0.5-x, 1.0-y, 0.5+z"]
221 |     },
222 |     "I41/a": {
223 |         "name_HM": "I 41/a",
224 |         "name_Hall": "I 4bw -1bw",
225 |         "number": 88,
226 |         "crystal_system": "tetragonal",
227 |         "z_crystal": 16,
228 |         "symmetry_operations": ["x, y, z",
229 |                                 "1.0-x, 1.0-y, z",
230 |                                 "1.0-y, 0.5+x, 0.25+z",
231 |                                 "y, 0.5-x, 0.25+z",
232 |                                 "1.0-x, 0.5-y, 0.25-z",
233 |                                 "x, 0.5+y, 0.25-z",
234 |                                 "y, 1.0-x, 1.0-z",
235 |                                 "1.0-y, x, 1.0-z",
236 |                                 "0.5+x, y, z",
237 |                                 "0.5-x, 0.5-y, 0.5+z",
238 |                                 "0.5-y, x, 0.75+z",
239 |                                 "0.5+y, 1.0-x, 0.75+z",
240 |                                 "0.5-x, 1.0-y, 0.75-z",
241 |                                 "0.5+x, y, 0.75-z",
242 |                                 "0.5+y, 0.5-x, 0.5-z",
243 |                                 "0.5-y, 0.5+x, 0.5-z"]
244 |     },
245 |     "R-3": {
246 |         "name_HM": "R -3",
247 |         "name_Hall": "-R 3",
248 |         "number": 148,
249 |         "crystal_system": "trigonal",
250 |         "z_crystal": 18,
251 |         "symmetry_operations": ["x, y, z",
252 |                                 "1.0-y, x-y, z",
253 |                                 "-x+y, 1.0-x, z",
254 |                                 "1.0-x, 1.0-y, 1.0-z",
255 |                                 "y, -x+y, 1.0-z",
256 |                                 "x-y, x, 1.0-z",
257 |                                 "2/3+x, 1/3+y, 1/3+z",
258 |                                 "2/3-y, 1/3+x-y, 1/3+z",
259 |                                 "2/3-x+y, 1/3-x, 1/3+z",
260 |                                 "2/3-x, 1/3-y, 1/3-z",
261 |                                 "2/3+y, 1/3-x+y, 1/3-z",
262 |                                 "2/3+x-y, 1/3+x, 1/3-z",
263 |                                 "1/3+x, 2/3+y, 2/3+z",
264 |                                 "1/3-y, 2/3+x-y, 2/3+z",
265 |                                 "1/3-x+y, 2/3+x, 2/3+z",
266 |                                 "1/3-x, 2/3-y, 2/3+z",
267 |                                 "1/3+y, 2/3-x+y, 2/3+z",
268 |                                 "1/3+x-y, 2/3+x, 2/3+z"]
269 |     }
270 | }
271 | 


--------------------------------------------------------------------------------