├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── bin ├── edview.py └── nxs2tif.py ├── diffractem ├── __init__.py ├── adxv.py ├── compute.py ├── dataset.py ├── io.py ├── map_image.py ├── nexus.py ├── pre_proc_opts.py ├── proc2d.py ├── proc_peaks.py ├── quick_proc.py ├── stream2sol.py ├── stream_parser.py └── tools.py ├── docs ├── Makefile ├── conf.py ├── crystfel.rst ├── dataset.rst ├── diffractem.adxv.rst ├── diffractem.compute.rst ├── diffractem.dataset.rst ├── diffractem.io.rst ├── diffractem.map_image.rst ├── diffractem.nexus.rst ├── diffractem.pre_proc_opts.rst ├── diffractem.proc2d.rst ├── diffractem.proc_peaks.rst ├── diffractem.rst ├── diffractem.stream_parser.rst ├── diffractem.tools.rst ├── edview.rst ├── file_format.rst ├── index.rst ├── installation.rst ├── make.bat ├── map_image.rst ├── modules.rst ├── pre_processing.rst └── requirements.txt ├── ideas.md ├── requirements.txt ├── setup.cfg ├── setup.py ├── src └── peakfinder8_extension │ ├── peakfinder8.cpp │ ├── peakfinder8.hh │ ├── peakfinder8_extension.cpp │ └── peakfinder8_extension.pyx └── version.txt /.gitignore: -------------------------------------------------------------------------------- 1 | debug.py 2 | *.egg-info 3 | .idea/ 4 | *-checkpoint.py 5 | .ipynb_checkpoints 6 | __pycache__ 7 | bin/modify_stream.py 8 | .vscode 9 | .DS_Store 10 | playbook.ipynb 11 | dist/ 12 | build/ 13 | playground.py 14 | pre_proc_play.py 15 | notebooks/dask-worker-space 16 | notebooks/proc_data 17 | *.h5 18 | *.nxs 19 | *.so 20 | *.dll 21 | *.dylib 22 | notebooks/ 23 | conda-recipe/ 24 | docs/_* 25 | publish.sh 26 | *.pyd 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 2.1, February 1999 3 | 4 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | [This is the first released version of the Lesser GPL. It also counts 10 | as the successor of the GNU Library Public License, version 2, hence 11 | the version number 2.1.] 12 | 13 | Preamble 14 | 15 | The licenses for most software are designed to take away your 16 | freedom to share and change it. By contrast, the GNU General Public 17 | Licenses are intended to guarantee your freedom to share and change 18 | free software--to make sure the software is free for all its users. 19 | 20 | This license, the Lesser General Public License, applies to some 21 | specially designated software packages--typically libraries--of the 22 | Free Software Foundation and other authors who decide to use it. You 23 | can use it too, but we suggest you first think carefully about whether 24 | this license or the ordinary General Public License is the better 25 | strategy to use in any particular case, based on the explanations below. 26 | 27 | When we speak of free software, we are referring to freedom of use, 28 | not price. Our General Public Licenses are designed to make sure that 29 | you have the freedom to distribute copies of free software (and charge 30 | for this service if you wish); that you receive source code or can get 31 | it if you want it; that you can change the software and use pieces of 32 | it in new free programs; and that you are informed that you can do 33 | these things. 34 | 35 | To protect your rights, we need to make restrictions that forbid 36 | distributors to deny you these rights or to ask you to surrender these 37 | rights. These restrictions translate to certain responsibilities for 38 | you if you distribute copies of the library or if you modify it. 39 | 40 | For example, if you distribute copies of the library, whether gratis 41 | or for a fee, you must give the recipients all the rights that we gave 42 | you. You must make sure that they, too, receive or can get the source 43 | code. If you link other code with the library, you must provide 44 | complete object files to the recipients, so that they can relink them 45 | with the library after making changes to the library and recompiling 46 | it. And you must show them these terms so they know their rights. 47 | 48 | We protect your rights with a two-step method: (1) we copyright the 49 | library, and (2) we offer you this license, which gives you legal 50 | permission to copy, distribute and/or modify the library. 51 | 52 | To protect each distributor, we want to make it very clear that 53 | there is no warranty for the free library. Also, if the library is 54 | modified by someone else and passed on, the recipients should know 55 | that what they have is not the original version, so that the original 56 | author's reputation will not be affected by problems that might be 57 | introduced by others. 58 | 59 | Finally, software patents pose a constant threat to the existence of 60 | any free program. We wish to make sure that a company cannot 61 | effectively restrict the users of a free program by obtaining a 62 | restrictive license from a patent holder. Therefore, we insist that 63 | any patent license obtained for a version of the library must be 64 | consistent with the full freedom of use specified in this license. 65 | 66 | Most GNU software, including some libraries, is covered by the 67 | ordinary GNU General Public License. This license, the GNU Lesser 68 | General Public License, applies to certain designated libraries, and 69 | is quite different from the ordinary General Public License. We use 70 | this license for certain libraries in order to permit linking those 71 | libraries into non-free programs. 72 | 73 | When a program is linked with a library, whether statically or using 74 | a shared library, the combination of the two is legally speaking a 75 | combined work, a derivative of the original library. The ordinary 76 | General Public License therefore permits such linking only if the 77 | entire combination fits its criteria of freedom. The Lesser General 78 | Public License permits more lax criteria for linking other code with 79 | the library. 80 | 81 | We call this license the "Lesser" General Public License because it 82 | does Less to protect the user's freedom than the ordinary General 83 | Public License. It also provides other free software developers Less 84 | of an advantage over competing non-free programs. These disadvantages 85 | are the reason we use the ordinary General Public License for many 86 | libraries. However, the Lesser license provides advantages in certain 87 | special circumstances. 88 | 89 | For example, on rare occasions, there may be a special need to 90 | encourage the widest possible use of a certain library, so that it becomes 91 | a de-facto standard. To achieve this, non-free programs must be 92 | allowed to use the library. A more frequent case is that a free 93 | library does the same job as widely used non-free libraries. In this 94 | case, there is little to gain by limiting the free library to free 95 | software only, so we use the Lesser General Public License. 96 | 97 | In other cases, permission to use a particular library in non-free 98 | programs enables a greater number of people to use a large body of 99 | free software. For example, permission to use the GNU C Library in 100 | non-free programs enables many more people to use the whole GNU 101 | operating system, as well as its variant, the GNU/Linux operating 102 | system. 103 | 104 | Although the Lesser General Public License is Less protective of the 105 | users' freedom, it does ensure that the user of a program that is 106 | linked with the Library has the freedom and the wherewithal to run 107 | that program using a modified version of the Library. 108 | 109 | The precise terms and conditions for copying, distribution and 110 | modification follow. Pay close attention to the difference between a 111 | "work based on the library" and a "work that uses the library". The 112 | former contains code derived from the library, whereas the latter must 113 | be combined with the library in order to run. 114 | 115 | GNU LESSER GENERAL PUBLIC LICENSE 116 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 117 | 118 | 0. This License Agreement applies to any software library or other 119 | program which contains a notice placed by the copyright holder or 120 | other authorized party saying it may be distributed under the terms of 121 | this Lesser General Public License (also called "this License"). 122 | Each licensee is addressed as "you". 123 | 124 | A "library" means a collection of software functions and/or data 125 | prepared so as to be conveniently linked with application programs 126 | (which use some of those functions and data) to form executables. 127 | 128 | The "Library", below, refers to any such software library or work 129 | which has been distributed under these terms. A "work based on the 130 | Library" means either the Library or any derivative work under 131 | copyright law: that is to say, a work containing the Library or a 132 | portion of it, either verbatim or with modifications and/or translated 133 | straightforwardly into another language. (Hereinafter, translation is 134 | included without limitation in the term "modification".) 135 | 136 | "Source code" for a work means the preferred form of the work for 137 | making modifications to it. For a library, complete source code means 138 | all the source code for all modules it contains, plus any associated 139 | interface definition files, plus the scripts used to control compilation 140 | and installation of the library. 141 | 142 | Activities other than copying, distribution and modification are not 143 | covered by this License; they are outside its scope. The act of 144 | running a program using the Library is not restricted, and output from 145 | such a program is covered only if its contents constitute a work based 146 | on the Library (independent of the use of the Library in a tool for 147 | writing it). Whether that is true depends on what the Library does 148 | and what the program that uses the Library does. 149 | 150 | 1. You may copy and distribute verbatim copies of the Library's 151 | complete source code as you receive it, in any medium, provided that 152 | you conspicuously and appropriately publish on each copy an 153 | appropriate copyright notice and disclaimer of warranty; keep intact 154 | all the notices that refer to this License and to the absence of any 155 | warranty; and distribute a copy of this License along with the 156 | Library. 157 | 158 | You may charge a fee for the physical act of transferring a copy, 159 | and you may at your option offer warranty protection in exchange for a 160 | fee. 161 | 162 | 2. You may modify your copy or copies of the Library or any portion 163 | of it, thus forming a work based on the Library, and copy and 164 | distribute such modifications or work under the terms of Section 1 165 | above, provided that you also meet all of these conditions: 166 | 167 | a) The modified work must itself be a software library. 168 | 169 | b) You must cause the files modified to carry prominent notices 170 | stating that you changed the files and the date of any change. 171 | 172 | c) You must cause the whole of the work to be licensed at no 173 | charge to all third parties under the terms of this License. 174 | 175 | d) If a facility in the modified Library refers to a function or a 176 | table of data to be supplied by an application program that uses 177 | the facility, other than as an argument passed when the facility 178 | is invoked, then you must make a good faith effort to ensure that, 179 | in the event an application does not supply such function or 180 | table, the facility still operates, and performs whatever part of 181 | its purpose remains meaningful. 182 | 183 | (For example, a function in a library to compute square roots has 184 | a purpose that is entirely well-defined independent of the 185 | application. Therefore, Subsection 2d requires that any 186 | application-supplied function or table used by this function must 187 | be optional: if the application does not supply it, the square 188 | root function must still compute square roots.) 189 | 190 | These requirements apply to the modified work as a whole. If 191 | identifiable sections of that work are not derived from the Library, 192 | and can be reasonably considered independent and separate works in 193 | themselves, then this License, and its terms, do not apply to those 194 | sections when you distribute them as separate works. But when you 195 | distribute the same sections as part of a whole which is a work based 196 | on the Library, the distribution of the whole must be on the terms of 197 | this License, whose permissions for other licensees extend to the 198 | entire whole, and thus to each and every part regardless of who wrote 199 | it. 200 | 201 | Thus, it is not the intent of this section to claim rights or contest 202 | your rights to work written entirely by you; rather, the intent is to 203 | exercise the right to control the distribution of derivative or 204 | collective works based on the Library. 205 | 206 | In addition, mere aggregation of another work not based on the Library 207 | with the Library (or with a work based on the Library) on a volume of 208 | a storage or distribution medium does not bring the other work under 209 | the scope of this License. 210 | 211 | 3. You may opt to apply the terms of the ordinary GNU General Public 212 | License instead of this License to a given copy of the Library. To do 213 | this, you must alter all the notices that refer to this License, so 214 | that they refer to the ordinary GNU General Public License, version 2, 215 | instead of to this License. (If a newer version than version 2 of the 216 | ordinary GNU General Public License has appeared, then you can specify 217 | that version instead if you wish.) Do not make any other change in 218 | these notices. 219 | 220 | Once this change is made in a given copy, it is irreversible for 221 | that copy, so the ordinary GNU General Public License applies to all 222 | subsequent copies and derivative works made from that copy. 223 | 224 | This option is useful when you wish to copy part of the code of 225 | the Library into a program that is not a library. 226 | 227 | 4. You may copy and distribute the Library (or a portion or 228 | derivative of it, under Section 2) in object code or executable form 229 | under the terms of Sections 1 and 2 above provided that you accompany 230 | it with the complete corresponding machine-readable source code, which 231 | must be distributed under the terms of Sections 1 and 2 above on a 232 | medium customarily used for software interchange. 233 | 234 | If distribution of object code is made by offering access to copy 235 | from a designated place, then offering equivalent access to copy the 236 | source code from the same place satisfies the requirement to 237 | distribute the source code, even though third parties are not 238 | compelled to copy the source along with the object code. 239 | 240 | 5. A program that contains no derivative of any portion of the 241 | Library, but is designed to work with the Library by being compiled or 242 | linked with it, is called a "work that uses the Library". Such a 243 | work, in isolation, is not a derivative work of the Library, and 244 | therefore falls outside the scope of this License. 245 | 246 | However, linking a "work that uses the Library" with the Library 247 | creates an executable that is a derivative of the Library (because it 248 | contains portions of the Library), rather than a "work that uses the 249 | library". The executable is therefore covered by this License. 250 | Section 6 states terms for distribution of such executables. 251 | 252 | When a "work that uses the Library" uses material from a header file 253 | that is part of the Library, the object code for the work may be a 254 | derivative work of the Library even though the source code is not. 255 | Whether this is true is especially significant if the work can be 256 | linked without the Library, or if the work is itself a library. The 257 | threshold for this to be true is not precisely defined by law. 258 | 259 | If such an object file uses only numerical parameters, data 260 | structure layouts and accessors, and small macros and small inline 261 | functions (ten lines or less in length), then the use of the object 262 | file is unrestricted, regardless of whether it is legally a derivative 263 | work. (Executables containing this object code plus portions of the 264 | Library will still fall under Section 6.) 265 | 266 | Otherwise, if the work is a derivative of the Library, you may 267 | distribute the object code for the work under the terms of Section 6. 268 | Any executables containing that work also fall under Section 6, 269 | whether or not they are linked directly with the Library itself. 270 | 271 | 6. As an exception to the Sections above, you may also combine or 272 | link a "work that uses the Library" with the Library to produce a 273 | work containing portions of the Library, and distribute that work 274 | under terms of your choice, provided that the terms permit 275 | modification of the work for the customer's own use and reverse 276 | engineering for debugging such modifications. 277 | 278 | You must give prominent notice with each copy of the work that the 279 | Library is used in it and that the Library and its use are covered by 280 | this License. You must supply a copy of this License. If the work 281 | during execution displays copyright notices, you must include the 282 | copyright notice for the Library among them, as well as a reference 283 | directing the user to the copy of this License. Also, you must do one 284 | of these things: 285 | 286 | a) Accompany the work with the complete corresponding 287 | machine-readable source code for the Library including whatever 288 | changes were used in the work (which must be distributed under 289 | Sections 1 and 2 above); and, if the work is an executable linked 290 | with the Library, with the complete machine-readable "work that 291 | uses the Library", as object code and/or source code, so that the 292 | user can modify the Library and then relink to produce a modified 293 | executable containing the modified Library. (It is understood 294 | that the user who changes the contents of definitions files in the 295 | Library will not necessarily be able to recompile the application 296 | to use the modified definitions.) 297 | 298 | b) Use a suitable shared library mechanism for linking with the 299 | Library. A suitable mechanism is one that (1) uses at run time a 300 | copy of the library already present on the user's computer system, 301 | rather than copying library functions into the executable, and (2) 302 | will operate properly with a modified version of the library, if 303 | the user installs one, as long as the modified version is 304 | interface-compatible with the version that the work was made with. 305 | 306 | c) Accompany the work with a written offer, valid for at 307 | least three years, to give the same user the materials 308 | specified in Subsection 6a, above, for a charge no more 309 | than the cost of performing this distribution. 310 | 311 | d) If distribution of the work is made by offering access to copy 312 | from a designated place, offer equivalent access to copy the above 313 | specified materials from the same place. 314 | 315 | e) Verify that the user has already received a copy of these 316 | materials or that you have already sent this user a copy. 317 | 318 | For an executable, the required form of the "work that uses the 319 | Library" must include any data and utility programs needed for 320 | reproducing the executable from it. However, as a special exception, 321 | the materials to be distributed need not include anything that is 322 | normally distributed (in either source or binary form) with the major 323 | components (compiler, kernel, and so on) of the operating system on 324 | which the executable runs, unless that component itself accompanies 325 | the executable. 326 | 327 | It may happen that this requirement contradicts the license 328 | restrictions of other proprietary libraries that do not normally 329 | accompany the operating system. Such a contradiction means you cannot 330 | use both them and the Library together in an executable that you 331 | distribute. 332 | 333 | 7. You may place library facilities that are a work based on the 334 | Library side-by-side in a single library together with other library 335 | facilities not covered by this License, and distribute such a combined 336 | library, provided that the separate distribution of the work based on 337 | the Library and of the other library facilities is otherwise 338 | permitted, and provided that you do these two things: 339 | 340 | a) Accompany the combined library with a copy of the same work 341 | based on the Library, uncombined with any other library 342 | facilities. This must be distributed under the terms of the 343 | Sections above. 344 | 345 | b) Give prominent notice with the combined library of the fact 346 | that part of it is a work based on the Library, and explaining 347 | where to find the accompanying uncombined form of the same work. 348 | 349 | 8. You may not copy, modify, sublicense, link with, or distribute 350 | the Library except as expressly provided under this License. Any 351 | attempt otherwise to copy, modify, sublicense, link with, or 352 | distribute the Library is void, and will automatically terminate your 353 | rights under this License. However, parties who have received copies, 354 | or rights, from you under this License will not have their licenses 355 | terminated so long as such parties remain in full compliance. 356 | 357 | 9. You are not required to accept this License, since you have not 358 | signed it. However, nothing else grants you permission to modify or 359 | distribute the Library or its derivative works. These actions are 360 | prohibited by law if you do not accept this License. Therefore, by 361 | modifying or distributing the Library (or any work based on the 362 | Library), you indicate your acceptance of this License to do so, and 363 | all its terms and conditions for copying, distributing or modifying 364 | the Library or works based on it. 365 | 366 | 10. Each time you redistribute the Library (or any work based on the 367 | Library), the recipient automatically receives a license from the 368 | original licensor to copy, distribute, link with or modify the Library 369 | subject to these terms and conditions. You may not impose any further 370 | restrictions on the recipients' exercise of the rights granted herein. 371 | You are not responsible for enforcing compliance by third parties with 372 | this License. 373 | 374 | 11. If, as a consequence of a court judgment or allegation of patent 375 | infringement or for any other reason (not limited to patent issues), 376 | conditions are imposed on you (whether by court order, agreement or 377 | otherwise) that contradict the conditions of this License, they do not 378 | excuse you from the conditions of this License. If you cannot 379 | distribute so as to satisfy simultaneously your obligations under this 380 | License and any other pertinent obligations, then as a consequence you 381 | may not distribute the Library at all. For example, if a patent 382 | license would not permit royalty-free redistribution of the Library by 383 | all those who receive copies directly or indirectly through you, then 384 | the only way you could satisfy both it and this License would be to 385 | refrain entirely from distribution of the Library. 386 | 387 | If any portion of this section is held invalid or unenforceable under any 388 | particular circumstance, the balance of the section is intended to apply, 389 | and the section as a whole is intended to apply in other circumstances. 390 | 391 | It is not the purpose of this section to induce you to infringe any 392 | patents or other property right claims or to contest validity of any 393 | such claims; this section has the sole purpose of protecting the 394 | integrity of the free software distribution system which is 395 | implemented by public license practices. Many people have made 396 | generous contributions to the wide range of software distributed 397 | through that system in reliance on consistent application of that 398 | system; it is up to the author/donor to decide if he or she is willing 399 | to distribute software through any other system and a licensee cannot 400 | impose that choice. 401 | 402 | This section is intended to make thoroughly clear what is believed to 403 | be a consequence of the rest of this License. 404 | 405 | 12. If the distribution and/or use of the Library is restricted in 406 | certain countries either by patents or by copyrighted interfaces, the 407 | original copyright holder who places the Library under this License may add 408 | an explicit geographical distribution limitation excluding those countries, 409 | so that distribution is permitted only in or among countries not thus 410 | excluded. In such case, this License incorporates the limitation as if 411 | written in the body of this License. 412 | 413 | 13. The Free Software Foundation may publish revised and/or new 414 | versions of the Lesser General Public License from time to time. 415 | Such new versions will be similar in spirit to the present version, 416 | but may differ in detail to address new problems or concerns. 417 | 418 | Each version is given a distinguishing version number. If the Library 419 | specifies a version number of this License which applies to it and 420 | "any later version", you have the option of following the terms and 421 | conditions either of that version or of any later version published by 422 | the Free Software Foundation. If the Library does not specify a 423 | license version number, you may choose any version ever published by 424 | the Free Software Foundation. 425 | 426 | 14. If you wish to incorporate parts of the Library into other free 427 | programs whose distribution conditions are incompatible with these, 428 | write to the author to ask for permission. For software which is 429 | copyrighted by the Free Software Foundation, write to the Free 430 | Software Foundation; we sometimes make exceptions for this. Our 431 | decision will be guided by the two goals of preserving the free status 432 | of all derivatives of our free software and of promoting the sharing 433 | and reuse of software generally. 434 | 435 | NO WARRANTY 436 | 437 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 438 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 439 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 440 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 441 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 442 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 443 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 444 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 445 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 446 | 447 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 448 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 449 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 450 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 451 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 452 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 453 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 454 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 455 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 456 | DAMAGES. 457 | 458 | END OF TERMS AND CONDITIONS 459 | 460 | How to Apply These Terms to Your New Libraries 461 | 462 | If you develop a new library, and you want it to be of the greatest 463 | possible use to the public, we recommend making it free software that 464 | everyone can redistribute and change. You can do so by permitting 465 | redistribution under these terms (or, alternatively, under the terms of the 466 | ordinary General Public License). 467 | 468 | To apply these terms, attach the following notices to the library. It is 469 | safest to attach them to the start of each source file to most effectively 470 | convey the exclusion of warranty; and each file should have at least the 471 | "copyright" line and a pointer to where the full notice is found. 472 | 473 | 474 | Copyright (C) 475 | 476 | This library is free software; you can redistribute it and/or 477 | modify it under the terms of the GNU Lesser General Public 478 | License as published by the Free Software Foundation; either 479 | version 2.1 of the License, or (at your option) any later version. 480 | 481 | This library is distributed in the hope that it will be useful, 482 | but WITHOUT ANY WARRANTY; without even the implied warranty of 483 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 484 | Lesser General Public License for more details. 485 | 486 | You should have received a copy of the GNU Lesser General Public 487 | License along with this library; if not, write to the Free Software 488 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 489 | 490 | Also add information on how to contact you by electronic and paper mail. 491 | 492 | You should also get your employer (if you work as a programmer) or your 493 | school, if any, to sign a "copyright disclaimer" for the library, if 494 | necessary. Here is a sample; alter the names: 495 | 496 | Yoyodyne, Inc., hereby disclaims all copyright interest in the 497 | library `Frob' (a library for tweaking knobs) written by James Random Hacker. 498 | 499 | , 1 April 1990 500 | Ty Coon, President of Vice 501 | 502 | That's all there is to it! -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include src/peakfinder8_extension/peakfinder8.hh -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # diffractem 2 | 3 | Pre-processing software for serial electron diffraction (SerialED) data. 4 | See https://doi.org/10.1101/682575 for example results. 5 | 6 | Diffractem is intended for usage within Jupyter notebooks - get a set of examples here: https://github.com/robertbuecker/serialed-examples. 7 | 8 | ## Installation 9 | _diffractem_ is tailored to pre-processing SerialED data primarily for crystallographic analysis using _CrystFEL_, version 0.10.0 or higher: `https://www.desy.de/~twhite/crystfel/index.html`. 10 | To make most of _diffractem_'s functionality, if you do not have it already, please download and install _CrystFEL_ following the installation instructions given on its homepage. 11 | During the build process of _CrystFEL_ using _meson_, the _pinkIndexer_ component will automatically be downloaded and installed. 12 | 13 | ### Create conda enivronment 14 | We _strongly_ suggest to use the Anaconda3 Python distribution/package manager, and create a dedicated environment within it for diffractem. 15 | If you do not have Anaconda installed, it is sufficient to obtain the minimal _Miniconda_ of the `conda` package manager at https://docs.conda.io/en/latest/miniconda. 16 | 17 | Once installed, please create a new anaconda environment for diffractem, and activate it: 18 | ``` 19 | conda create -n diffractem -c conda-forge python=3.10 numpy scipy pandas dask distributed jupyterlab ipywidgets ipympl tifffile h5py 20 | conda activate diffractem 21 | ``` 22 | 23 | ### Install diffractem 24 | Finally install diffractem itself, either from PyPi: 25 | ``` 26 | pip install diffractem 27 | ``` 28 | or, if you want to play/develop a bit more and stay up-to-date, you can clone this git repository and install diffractem in developer mode: 29 | ``` 30 | git clone https://github.com/robertbuecker/diffractem 31 | cd diffractem 32 | pip install -e . 33 | ``` 34 | 35 | Now you should be ready to go! To get started, why don't you download the example notebooks: 36 | ``` 37 | git clone https://github.com/robertbuecker/serialed-examples 38 | ``` 39 | And get example raw data at MPDL Edmond: https://edmond.mpdl.mpg.de/imeji/collection/32lI6YJ7DZaF5L_K. 40 | 41 | And when you're ready to go: just make your own branches of the notebooks for your own projects, and have fun! 42 | 43 | --- 44 | diffractem, (C) 2019-2022 Robert Bücker, robert.buecker@rigaku.com 45 | 46 | peakfinder8, (C) 2014-2019 Deutsches Elektronen-Synchrotron DESY 47 | -------------------------------------------------------------------------------- /bin/edview.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import hdf5plugin 3 | from diffractem.stream_parser import StreamParser 4 | from diffractem.dataset import Dataset 5 | from diffractem.proc_peaks import get_pk_data 6 | import argparse 7 | import pandas as pd 8 | import numpy as np 9 | import h5py 10 | import pyqtgraph as pg 11 | from PyQt5 import QtGui, QtWidgets, QtCore 12 | from PyQt5.QtWidgets import (QPushButton, QSpinBox, QCheckBox, 13 | QTextEdit, QWidget, QApplication, QGridLayout, QTableWidget, QTableWidgetItem) 14 | from diffractem.adxv import Adxv 15 | from warnings import warn 16 | from typing import Optional, Union 17 | from time import sleep 18 | 19 | # non-trivial detector geometries are currently not supported (licensing trouble) 20 | # from cfelpyutils.crystfel_utils import load_crystfel_geometry 21 | # from cfelpyutils.geometry_utils import apply_geometry_to_data, compute_visualization_pix_maps 22 | 23 | pg.setConfigOptions(imageAxisOrder='row-major') 24 | 25 | app = pg.mkQApp() 26 | 27 | class EDViewer(QWidget): 28 | 29 | def __init__(self, args): 30 | 31 | super().__init__() 32 | self.dataset = Dataset() 33 | self.args = args 34 | self.data_path = None 35 | self.current_shot = pd.Series() 36 | self.diff_image = np.empty((0,0)) 37 | self.map_image = np.empty((0,0)) 38 | self.init_widgets() 39 | self.adxv = None 40 | self.geom = None 41 | 42 | self.read_files() 43 | self.switch_shot(0) 44 | 45 | if self.args.internal: 46 | self.hist_img.setLevels(np.quantile(self.diff_image, 0.02), np.quantile(self.diff_image, 0.98)) 47 | 48 | self.update() 49 | 50 | self.show() 51 | 52 | def closeEvent(self, a0: QtGui.QCloseEvent) -> None: 53 | if not self.args.internal: 54 | self.adxv.exit() 55 | a0.accept() 56 | 57 | def read_files(self): 58 | 59 | file_type = args.filename.rsplit('.', 1)[-1] 60 | 61 | if file_type == 'stream': 62 | print(f'Parsing stream file {args.filename}...') 63 | stream = StreamParser(args.filename) 64 | # with open('tmp.geom', 'w') as fh: 65 | # fh.write('\n'.join(stream._geometry_string)) 66 | # self.geom = load_crystfel_geometry('tmp.geom') 67 | # os.remove('tmp.geom') 68 | # if len(self.geom['panels']) == 1: 69 | # print('Single-panel geometry, so ignoring transforms for now.') 70 | # #TODO make this more elegant, e.g. by overwriting image transform func with identity 71 | # self.geom = None 72 | self.geom = None 73 | 74 | try: 75 | self.data_path = stream.geometry['data'] 76 | except KeyError: 77 | if args.geometry is None: 78 | raise ValueError('No data location specified in geometry file. Please use -d parameter.') 79 | 80 | files = sorted(list(stream.shots['file'].unique())) 81 | # print('Loading data files found in stream... \n', '\n'.join(files)) 82 | try: 83 | self.dataset = Dataset.from_files(files, load_tables=False, init_stacks=False, open_stacks=False) 84 | self.dataset.load_tables(features=True) 85 | # print(self.dataset.shots.columns) 86 | self.dataset.merge_stream(stream) 87 | # get_selection would not be the right method to call (changes IDs), instead do... 88 | self.dataset._shots = self.dataset._shots.loc[self.dataset._shots.selected,:].reset_index(drop=True) 89 | # TODO get subset for incomplete coverage 90 | print('Merged stream and hdf5 shot lists') 91 | except Exception as err: 92 | self.dataset = Dataset() 93 | self.dataset._shots = stream.shots 94 | self.dataset._peaks = stream.peaks 95 | self.dataset._predict = stream.indexed 96 | self.dataset._shots['selected'] = True 97 | print('Could not load shot lists from H5 files, but have that from the stream file.') 98 | print(f'Reason: {err}') 99 | 100 | if args.geometry is not None: 101 | raise ValueError('Geometry files are currently not supported.') 102 | # self.geom = load_crystfel_geometry(args.geometry) 103 | 104 | if file_type in ['lst', 'h5', 'hdf', 'nxs']: 105 | self.dataset = Dataset.from_list(args.filename, load_tables=True, init_stacks=False, open_stacks=False) 106 | if not self.dataset.shots.selected.all(): 107 | # dirty removal of unwanted shots is sufficient in this case: 108 | self.dataset._shots = self.dataset._shots.loc[self.dataset._shots.selected,:].reset_index(drop=True) 109 | 110 | if args.data_path is not None: 111 | self.data_path = args.data_path 112 | 113 | if self.data_path is None: 114 | # data path neither set via stream file, nor explicitly. We have to guess. 115 | try: 116 | with h5py.File(self.dataset.shots.file.iloc[0], 'r') as fh: 117 | base = '/%/data'.replace('%', self.dataset.shots.subset.iloc[0]) 118 | self.data_path = '/%/data/' + fh[base].attrs['signal'] 119 | print('Found data path', self.data_path) 120 | except Exception as err: 121 | warn(str(err), RuntimeWarning) 122 | print('Could not find out data path. Assuming /%/data/raw_counts') 123 | self.data_path = '/%/data/raw_counts' 124 | 125 | if self.args.query: 126 | print('Only showing shots with', self.args.query) 127 | #self.dataset.select(self.args.query) 128 | #self.dataset = self.dataset.get_selection(self.args.query, file_suffix=None, reset_id=False) 129 | #print('cutting shot list only') 130 | self.dataset._shots = self.dataset._shots.query(args.query) 131 | 132 | if self.args.sort_crystals: 133 | print('Re-sorting shots by region/crystal/run.') 134 | self.dataset._shots = self.dataset._shots.sort_values(by=['sample', 'region', 'crystal_id', 'run']) 135 | 136 | if not self.args.internal: 137 | #adxv_args = {'wavelength': 0.0251, 'distance': 2280, 'pixelsize': 0.055} 138 | adxv_args = {} 139 | self.adxv = Adxv(hdf5_path=self.data_path.replace('%', 'entry'), 140 | adxv_bin=self.args.adxv_bin, **adxv_args) 141 | 142 | self.b_goto.setMaximum(self.dataset.shots.shape[0]-1) 143 | self.b_goto.setMinimum(0) 144 | 145 | def update_image(self): 146 | print(self.current_shot) 147 | with h5py.File(self.current_shot['file'], mode='r') as f: 148 | 149 | if self.args.internal: 150 | path = self.data_path.replace('%', self.current_shot.subset) 151 | print('Loading {}:{} from {}'.format(path, 152 | self.current_shot['shot_in_subset'], self.current_shot['file'])) 153 | if len(f[path].shape) == 3: 154 | self.diff_image = f[path][int(self.current_shot['shot_in_subset']), ...] 155 | elif len(f[path].shape) == 2: 156 | self.diff_image = f[path][:] 157 | 158 | self.diff_image[np.isnan(self.diff_image)] = 0 159 | self.hist_img.setHistogramRange(np.partition(self.diff_image.flatten(), 100)[100], np.partition(self.diff_image.flatten(), -100)[-100]) 160 | 161 | levels = self.hist_img.getLevels() 162 | # levels = (max(levels[0], -1), levels[1]) 163 | levels = (levels[0], levels[1]) 164 | if self.geom is not None: 165 | raise RuntimeError('This should not happen') 166 | # self.diff_image = apply_geometry_to_data(self.diff_image, self.geom) 167 | self.img.setImage(self.diff_image, autoRange=False) 168 | 169 | self.img.setLevels(levels) 170 | self.hist_img.setLevels(levels[0], levels[1]) 171 | 172 | if not self.args.no_map: 173 | try: 174 | path = args.map_path.replace('%', self.current_shot['subset']) 175 | self.map_image = f[path][...] 176 | self.mapimg.setImage(self.map_image) 177 | except KeyError: 178 | warn('No map found at {}!'.format(path), Warning) 179 | 180 | if not self.args.internal: 181 | self.adxv.load_image(self.current_shot.file) 182 | self.adxv.slab(self.current_shot.shot_in_subset + 1) 183 | 184 | def update_plot(self): 185 | 186 | allpk = [] 187 | 188 | if self.b_peaks.isChecked(): 189 | 190 | if (len(self.dataset.peaks) == 0) or args.cxi_peaks: 191 | path = args.cxi_peaks_path.replace('%', self.current_shot.subset) 192 | print('Loading CXI peaks of {}:{} from {}'.format(path, 193 | self.current_shot['shot_in_subset'], self.current_shot['file'])) 194 | with h5py.File(self.current_shot.file) as fh: 195 | ii = int(self.current_shot['shot_in_subset']) 196 | Npk = fh[path + '/nPeaks'][ii] 197 | x = fh[path + '/peakXPosRaw'][ii, :Npk] 198 | y = fh[path + '/peakYPosRaw'][ii, :Npk] 199 | 200 | peaks = pd.DataFrame((x, y), index=['fs/px', 'ss/px']).T 201 | 202 | else: 203 | peaks = self.dataset.peaks.loc[(self.dataset.peaks.file == self.current_shot.file) 204 | & (self.dataset.peaks.Event == self.current_shot.Event), 205 | ['fs/px', 'ss/px']] - 0.5 206 | x = peaks.loc[:,'fs/px'] 207 | y = peaks.loc[:,'ss/px'] 208 | 209 | if self.geom is not None: 210 | raise RuntimeError('Someone set geom to something. This should not happen.') 211 | # print('Projecting peaks...') 212 | # maps = compute_visualization_pix_maps(self.geom) 213 | # x = maps.x[y.astype(int), x.astype(int)] 214 | # y = maps.y[y.astype(int), x.astype(int)] 215 | 216 | if self.args.internal: 217 | ring_pen = pg.mkPen('g', width=0.8) 218 | self.found_peak_canvas.setData(x, y, 219 | symbol='o', size=13, pen=ring_pen, brush=(0, 0, 0, 0), antialias=True) 220 | else: 221 | allpk.append(peaks.assign(group=0)) 222 | 223 | else: 224 | self.found_peak_canvas.clear() 225 | 226 | if self.b_pred.isChecked() and (self.dataset.predict.shape[0] > 0): 227 | 228 | pred = self.dataset.predict.loc[(self.dataset.predict.file == self.current_shot.file) 229 | & (self.dataset.predict.Event == self.current_shot.Event), 230 | ['fs/px', 'ss/px']] - 0.5 231 | 232 | if self.geom is not None: 233 | raise RuntimeError('Someone set geom to not None. This should not happen.') 234 | # print('Projecting predictions...') 235 | # maps = compute_visualization_pix_maps(self.geom) 236 | # x = maps.x[pred.loc[:,'ss/px'].astype(int), 237 | # pred.loc[:,'fs/px'].astype(int)] 238 | # y = maps.y[pred.loc[:,'ss/px'].astype(int), 239 | # pred.loc[:,'fs/px'].astype(int)] 240 | else: 241 | x = pred.loc[:,'fs/px'] 242 | y = pred.loc[:,'ss/px'] 243 | 244 | if self.args.internal: 245 | square_pen = pg.mkPen('r', width=0.8) 246 | self.predicted_peak_canvas.setData(x, y, 247 | symbol='s', size=13, pen=square_pen, brush=(0, 0, 0, 0), antialias=True) 248 | else: 249 | allpk.append(pred.assign(group=1)) 250 | 251 | else: 252 | self.predicted_peak_canvas.clear() 253 | 254 | if not self.args.internal and len(allpk) > 0: 255 | self.adxv.define_spot('green', 5, 0, 0) 256 | self.adxv.define_spot('red', 0, 10, 1) 257 | self.adxv.load_spots(pd.concat(allpk, axis=0, ignore_index=True).values) 258 | elif not self.args.internal: 259 | self.adxv.load_spots(np.empty((0,3))) 260 | 261 | if self.dataset.features.shape[0] > 0: 262 | ring_pen = pg.mkPen('g', width=2) 263 | dot_pen = pg.mkPen('y', width=0.5) 264 | 265 | region_feat = self.dataset.features.loc[(self.dataset.features['region'] == self.current_shot['region']) 266 | & (self.dataset.features['sample'] == self.current_shot['sample'])] 267 | 268 | print('Number of region features:', region_feat.shape[0]) 269 | 270 | if self.current_shot['crystal_id'] != -1: 271 | single_feat = region_feat.loc[region_feat['crystal_id'] == self.current_shot['crystal_id'], :] 272 | x0 = single_feat['crystal_x'].squeeze() 273 | y0 = single_feat['crystal_y'].squeeze() 274 | if self.b_locations.isChecked(): 275 | self.found_features_canvas.setData(region_feat['crystal_x'], region_feat['crystal_y'], 276 | symbol='+', size=7, pen=dot_pen, brush=(0, 0, 0, 0), pxMode=True) 277 | else: 278 | self.found_features_canvas.clear() 279 | 280 | if self.b_zoom.isChecked(): 281 | self.map_box.setRange(xRange=(x0 - 5 * args.beam_diam, x0 + 5 * args.beam_diam), 282 | yRange=(y0 - 5 * args.beam_diam, y0 + 5 * args.beam_diam)) 283 | self.single_feature_canvas.setData([x0], [y0], 284 | symbol='o', size=args.beam_diam, pen=ring_pen, 285 | brush=(0, 0, 0, 0), pxMode=False) 286 | try: 287 | c_real = np.cross([self.current_shot.astar_x, self.current_shot.astar_y, self.current_shot.astar_z], 288 | [self.current_shot.bstar_x, self.current_shot.bstar_y, self.current_shot.bstar_z]) 289 | b_real = np.cross([self.current_shot.cstar_x, self.current_shot.cstar_y, self.current_shot.cstar_z], 290 | [self.current_shot.astar_x, self.current_shot.astar_y, self.current_shot.astar_z]) 291 | a_real = np.cross([self.current_shot.bstar_x, self.current_shot.bstar_y, self.current_shot.bstar_z], 292 | [self.current_shot.cstar_x, self.current_shot.cstar_y, self.current_shot.cstar_z]) 293 | a_real = 20 * a_real / np.sum(a_real ** 2) ** .5 294 | b_real = 20 * b_real / np.sum(b_real ** 2) ** .5 295 | c_real = 20 * c_real / np.sum(c_real ** 2) ** .5 296 | self.a_dir.setData(x=x0 + np.array([0, a_real[0]]), y=y0 + np.array([0, a_real[1]])) 297 | self.b_dir.setData(x=x0 + np.array([0, b_real[0]]), y=y0 + np.array([0, b_real[1]])) 298 | self.c_dir.setData(x=x0 + np.array([0, c_real[0]]), y=y0 + np.array([0, c_real[1]])) 299 | except: 300 | print('Could not read lattice vectors.') 301 | else: 302 | self.single_feature_canvas.setData([x0], [y0], 303 | symbol='o', size=13, pen=ring_pen, brush=(0, 0, 0, 0), pxMode=True) 304 | self.map_box.setRange(xRange=(0, self.map_image.shape[1]), yRange=(0, self.map_image.shape[0])) 305 | 306 | 307 | 308 | else: 309 | self.single_feature_canvas.setData([], []) 310 | 311 | def update(self): 312 | 313 | self.found_peak_canvas.clear() 314 | self.predicted_peak_canvas.clear() 315 | app.processEvents() 316 | 317 | self.update_image() 318 | if args.cxi_peaks and not args.internal: 319 | # give adxv some time to display the image before accessing the CXI data 320 | sleep(0.2) 321 | self.update_plot() 322 | 323 | print(self.current_shot) 324 | 325 | # CALLBACK FUNCTIONS 326 | 327 | def switch_shot(self, shot_id=None): 328 | if shot_id is None: 329 | shot_id = self.b_goto.value() 330 | 331 | self.shot_id = max(0, shot_id % self.dataset.shots.shape[0]) 332 | self.current_shot = self.dataset.shots.iloc[self.shot_id, :] 333 | self.meta_table.setRowCount(self.current_shot.shape[0]) 334 | self.meta_table.setColumnCount(2) 335 | 336 | for row, (k, v) in enumerate(self.current_shot.items()): 337 | self.meta_table.setItem(row, 0, QTableWidgetItem(k)) 338 | self.meta_table.setItem(row, 1, QTableWidgetItem(str(v))) 339 | 340 | self.meta_table.resizeRowsToContents() 341 | 342 | shot = self.current_shot 343 | title = {'sample': '', 'region': 'Reg', 'feature': 'Feat', 'frame': 'Frame', 'event': 'Ev', 'file': ''} 344 | titlestr = '' 345 | for k, v in title.items(): 346 | titlestr += f'{v} {shot[k]}' if k in shot.keys() else '' 347 | titlestr += f' ({shot.name} of {self.dataset.shots.shape[0]})' 348 | print(titlestr) 349 | 350 | self.setWindowTitle(titlestr) 351 | 352 | self.b_goto.blockSignals(True) 353 | self.b_goto.setValue(self.shot_id) 354 | self.b_goto.blockSignals(False) 355 | 356 | self.update() 357 | 358 | def switch_shot_rel(self, shift): 359 | self.switch_shot(self.shot_id + shift) 360 | 361 | def mouse_moved(self, evt): 362 | mousePoint = self.img.mapFromDevice(evt[0]) 363 | x, y = round(mousePoint.x()), round(mousePoint.y()) 364 | x = min(max(0, x), self.diff_image.shape[1] - 1) 365 | y = min(max(0, y), self.diff_image.shape[0] - 1) 366 | I = self.diff_image[y, x] 367 | #print(x, y, I) 368 | self.info_text.setPos(x, y) 369 | self.info_text.setText(f'{x:0.1f}, {y:0.1f}: {I:0.1f}') 370 | 371 | def init_widgets(self): 372 | 373 | self.imageWidget = pg.GraphicsLayoutWidget() 374 | 375 | # IMAGE DISPLAY 376 | 377 | # A plot area (ViewBox + axes) for displaying the image 378 | self.image_box = self.imageWidget.addViewBox() 379 | self.image_box.setAspectLocked() 380 | 381 | self.img = pg.ImageItem() 382 | self.img.setZValue(0) 383 | self.image_box.addItem(self.img) 384 | self.proxy = pg.SignalProxy(self.img.scene().sigMouseMoved, rateLimit=60, slot=self.mouse_moved) 385 | 386 | self.found_peak_canvas = pg.ScatterPlotItem() 387 | self.image_box.addItem(self.found_peak_canvas) 388 | self.found_peak_canvas.setZValue(2) 389 | self.found_peak_canvas.sigClicked.connect(self.onPeakClick) 390 | 391 | self.predicted_peak_canvas = pg.ScatterPlotItem() 392 | self.image_box.addItem(self.predicted_peak_canvas) 393 | self.predicted_peak_canvas.setZValue(2) 394 | self.predicted_peak_canvas.sigClicked.connect(self.onPredictionClick) 395 | 396 | self.info_text = pg.TextItem(text='') 397 | self.image_box.addItem(self.info_text) 398 | self.info_text.setPos(0, 0) 399 | 400 | # Contrast/color control 401 | self.hist_img = pg.HistogramLUTItem(self.img, fillHistogram=False) 402 | self.imageWidget.addItem(self.hist_img) 403 | 404 | # MAP DISPLAY 405 | 406 | self.map_widget = pg.GraphicsLayoutWidget() 407 | self.map_widget.setWindowTitle('region map') 408 | 409 | # Map image control 410 | self.map_box = self.map_widget.addViewBox() 411 | self.map_box.setAspectLocked() 412 | 413 | self.mapimg = pg.ImageItem() 414 | self.mapimg.setZValue(0) 415 | self.map_box.addItem(self.mapimg) 416 | 417 | self.found_features_canvas = pg.ScatterPlotItem() 418 | self.map_box.addItem(self.found_features_canvas) 419 | self.found_features_canvas.setZValue(2) 420 | 421 | self.single_feature_canvas = pg.ScatterPlotItem() 422 | self.map_box.addItem(self.single_feature_canvas) 423 | self.single_feature_canvas.setZValue(2) 424 | 425 | # lattice vectors 426 | self.a_dir = pg.PlotDataItem(pen=pg.mkPen('r', width=1)) 427 | self.b_dir = pg.PlotDataItem(pen=pg.mkPen('g', width=1)) 428 | self.c_dir = pg.PlotDataItem(pen=pg.mkPen('b', width=1)) 429 | self.map_box.addItem(self.a_dir) 430 | self.map_box.addItem(self.b_dir) 431 | self.map_box.addItem(self.c_dir) 432 | 433 | # Contrast/color control 434 | self.hist_map = pg.HistogramLUTItem(self.mapimg) 435 | self.map_widget.addItem(self.hist_map) 436 | 437 | ### CONTROl BUTTONS 438 | 439 | b_rand = QPushButton('rnd') 440 | b_plus10 = QPushButton('+10') 441 | b_minus10 = QPushButton('-10') 442 | b_last = QPushButton('last') 443 | self.b_peaks = QCheckBox('peaks') 444 | self.b_pred = QCheckBox('crystal') 445 | self.b_zoom = QCheckBox('zoom') 446 | self.b_locations = QCheckBox('locations') 447 | self.b_locations.setChecked(True) 448 | b_reload = QPushButton('reload') 449 | self.b_goto = QSpinBox() 450 | 451 | b_rand.clicked.connect(lambda: self.switch_shot(np.random.randint(0, self.dataset.shots.shape[0] - 1))) 452 | b_plus10.clicked.connect(lambda: self.switch_shot_rel(+10)) 453 | b_minus10.clicked.connect(lambda: self.switch_shot_rel(-10)) 454 | b_last.clicked.connect(lambda: self.switch_shot(self.dataset.shots.index.max())) 455 | self.b_peaks.stateChanged.connect(self.update) 456 | self.b_pred.stateChanged.connect(self.update) 457 | self.b_zoom.stateChanged.connect(self.update) 458 | self.b_locations.stateChanged.connect(self.update) 459 | b_reload.clicked.connect(lambda: self.read_files()) 460 | self.b_goto.valueChanged.connect(lambda: self.switch_shot(None)) 461 | 462 | self.button_layout = QtGui.QGridLayout() 463 | self.button_layout.addWidget(b_plus10, 0, 2) 464 | self.button_layout.addWidget(b_minus10, 0, 1) 465 | self.button_layout.addWidget(b_rand, 0, 4) 466 | self.button_layout.addWidget(b_last, 0, 3) 467 | self.button_layout.addWidget(self.b_goto, 0, 0) 468 | self.button_layout.addWidget(b_reload, 0, 10) 469 | self.button_layout.addWidget(self.b_peaks, 0, 21) 470 | self.button_layout.addWidget(self.b_pred, 0, 22) 471 | self.button_layout.addWidget(self.b_zoom, 0, 23) 472 | self.button_layout.addWidget(self.b_locations, 0, 24) 473 | 474 | self.meta_table = QTableWidget() 475 | self.meta_table.verticalHeader().setVisible(False) 476 | self.meta_table.horizontalHeader().setVisible(False) 477 | self.meta_table.setFont(QtGui.QFont('Helvetica', 10)) 478 | 479 | # --- TOP-LEVEL ARRANGEMENT 480 | self.top_layout = QGridLayout() 481 | self.setLayout(self.top_layout) 482 | 483 | if self.args.internal: 484 | self.top_layout.addWidget(self.imageWidget, 0, 0) 485 | self.top_layout.setColumnStretch(0, 2) 486 | 487 | if not self.args.no_map: 488 | self.top_layout.addWidget(self.map_widget, 0, 1) 489 | self.top_layout.setColumnStretch(1, 1.5) 490 | 491 | self.top_layout.addWidget(self.meta_table, 0, 2) 492 | self.top_layout.addLayout(self.button_layout, 1, 0, 1, 3) 493 | 494 | self.top_layout.setColumnStretch(2, 0) 495 | 496 | def onPeakClick(self, points, ev): 497 | x, y = np.array([pt.pos().x() for pt in ev]).reshape(1,-1), \ 498 | np.array([pt.pos().y() for pt in ev]).reshape(1,-1) 499 | n = np.array([len(x)]) 500 | ctr_x, ctr_y = np.array(self.current_shot.center_x).reshape(1), \ 501 | np.array(self.current_shot.center_y).reshape(1) 502 | #TODO GET THE PROPER VALUES HERE, DUMMY 503 | cl = 3.06 504 | px = 55e-6 505 | pkd = get_pk_data(n, x, y, ctr_x, ctr_y, pxs=px, clen=cl, wl=0.0251) 506 | print('Clicked peak:\n' 507 | f'Raw position (px): {pkd["peakXPosRaw"][0,0]:.1f}, {pkd["peakYPosRaw"][0,0]:.1f}\n' 508 | f'Corrected position (px): {pkd["peakXPosCor"][0,0]:.1f}, {pkd["peakYPosCor"][0,0]:.1f}\n' 509 | f'Corrected position (mm): {1000*px*pkd["peakXPosCor"][0,0]:.2f}, {1000*px*pkd["peakYPosCor"][0,0]:.2f}\n' 510 | f'd vector (1/A), azimuth (deg): {pkd["peakD"][0,0]:.2f}, {180/np.pi*pkd["peakAzimuth"][0,0]:.1f}\n') 511 | 512 | def onPredictionClick(self, points, ev): 513 | x, y = np.array([pt.pos().x() for pt in ev]).reshape(1,-1), \ 514 | np.array([pt.pos().y() for pt in ev]).reshape(1,-1) 515 | n = np.array([len(x)]) 516 | ctr_x, ctr_y = np.array(self.current_shot.center_x).reshape(1), \ 517 | np.array(self.current_shot.center_y).reshape(1) 518 | #TODO GET THE PROPER VALUES HERE, DUMMY 519 | cl = 3.06 520 | px = 55e-6 521 | pkd = get_pk_data(n, x, y, ctr_x, ctr_y, pxs=px, clen=cl, wl=0.0251) 522 | print('Clicked prediction:\n' 523 | 'TODO: GET HKL\n' 524 | f'Raw position (px): {pkd["peakXPosRaw"][0,0]:.1f}, {pkd["peakYPosRaw"][0,0]:.1f}\n' 525 | f'Corrected position (px): {pkd["peakXPosCor"][0,0]:.1f}, {pkd["peakYPosCor"][0,0]:.1f}\n' 526 | f'Corrected position (mm): {1000*px*pkd["peakXPosCor"][0,0]:.2f}, {1000*px*pkd["peakYPosCor"][0,0]:.2f}\n' 527 | f'd vector (1/A), azimuth (deg): {pkd["peakD"][0,0]:.2f}, {180/np.pi*pkd["peakAzimuth"][0,0]:.1f}\n') 528 | 529 | if __name__ == '__main__': 530 | 531 | parser = argparse.ArgumentParser(description='Viewer for Serial Electron Diffraction data') 532 | parser.add_argument('filename', type=str, help='Stream file, list file, or HDF5') 533 | parser.add_argument('-g', '--geometry', type=str, help='CrystFEL geometry file, might be helpful') 534 | parser.add_argument('-q', '--query', type=str, help='Query string to filter shots by column values') 535 | parser.add_argument('-d', '--data_path', type=str, help='Data field in HDF5 file(s). Defaults to stream file or tries a few.') 536 | parser.add_argument('--internal', help='Use internal diffraction viewer instead of adxv', action='store_true') 537 | parser.add_argument('--adxv-bin', help='Location/command string of adxv binary', default='adxv') 538 | parser.add_argument('--map-path', type=str, help='Path to map image', default='/%/map/image') 539 | parser.add_argument('--feature-path', type=str, help='Path to map feature table', default='/%/map/features') 540 | parser.add_argument('--cxi-peaks', help='Prefer CXI-format peaks in HDF5 files over stream/HDF5 table', action='store_true') 541 | parser.add_argument('--cxi-peaks-path', type=str, help='Path to CXI peaks table', default='/%/data') 542 | parser.add_argument('--peaks-path', type=str, help='Path to peaks table in HDF5 files', default='/%/results/peaks') 543 | parser.add_argument('--predict-path', type=str, help='Path to prediction table', default='/%/results/predict') 544 | parser.add_argument('--no-map', help='Hide map, even if we had it', action='store_true') 545 | parser.add_argument('--beam-diam', type=int, help='Beam size displayed in real space, in pixels', default=5) 546 | parser.add_argument('--sort-crystals', help='Sort shots by crystal IDs', action='store_true') 547 | 548 | args = parser.parse_args() 549 | 550 | # operation modes: 551 | # (1) file list (+ geometry) + nxs: estimate geometry from nxs if geometry is absent 552 | # (2) expanded file list (+ geometry) + nxs: first match nxs self.current_shot lists vs expanded file list 553 | # (3) (expanded) file list + geometry + hdf5: omit map image automatically 554 | # (4) stream + nxs: as (2), peaks/predict in stream take precedence over nxs 555 | # (5) stream + hdf5: as (3) 556 | 557 | # TODO next: work on read_file 558 | viewer = EDViewer(args) 559 | 560 | import sys 561 | if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'): 562 | app.instance().exec_() 563 | -------------------------------------------------------------------------------- /bin/nxs2tif.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from tifffile import imsave 3 | import h5py 4 | import sys 5 | import numpy as np 6 | 7 | fh = h5py.File(sys.argv[1]) 8 | ds = fh['/entry/instrument/detector/data'] 9 | if len(sys.argv) > 2: 10 | fn = sys.argv[2] 11 | else: 12 | fn = sys.argv[1].rsplit('.', 1)[0] + '.tif' 13 | if ds.dtype == np.int32: 14 | ds = ds[:].astype(np.float32) 15 | imsave(fn, ds[:, :, :]) 16 | print('Wrote ' + fn) -------------------------------------------------------------------------------- /diffractem/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | __all__ = ['compute', 'io', 'proc2d', 'tools', 4 | 'map_image', 'pre_proc_opts', 'proc_peaks', 5 | 'nexus', 'stream_parser', 'adxv'] 6 | 7 | def version(): 8 | try: 9 | with open(__file__.rsplit('/',1)[0] + '/../version.txt') as fh: 10 | return fh.readline().strip() 11 | except FileNotFoundError: 12 | return 'Could not determine diffractem version' 13 | 14 | 15 | def gap_pixels(detector='Lambda750k'): 16 | """Returns the gap pixels of the Lambda detector as binary mask""" 17 | if detector == 'Lambda750k': 18 | gaps = np.zeros((516, 1556), dtype=np.bool) 19 | for k in range(255, 1296, 260): 20 | gaps[:, k:k+6] = True 21 | gaps[255:261] = True 22 | else: 23 | raise ValueError(f'Unknown detector: {detector}') 24 | return gaps 25 | 26 | 27 | def panel_pix(panel_id=1, pxmask=None, img=None, 28 | detector='Lambda750k', include_gap=True): 29 | 30 | if detector == 'Lambda750k': 31 | shape = (1556, 516) 32 | panel_size = 256 if include_gap else 255 33 | panel_gap = 4 if include_gap else 6 34 | cutoff = (60, 0) 35 | row, col = divmod(panel_id-1, 6) 36 | if panel_id > 6: 37 | col = 5-col 38 | if panel_id > 12: 39 | raise ValueError('panel_id cannot be larger than 12') 40 | else: 41 | raise ValueError(f'Unknown detector {detector}') 42 | 43 | mask = np.zeros((shape[1], shape[0])) 44 | #print(row,col) 45 | cstart = col*(panel_size + panel_gap) 46 | rstart = row*(panel_size + panel_gap) 47 | mask[rstart:rstart+panel_size, cstart:cstart+panel_size] = 1 48 | mask[:(cutoff[1]+1), :(cutoff[0]+1)] = 0 49 | mask[-(cutoff[1]+1):, -(cutoff[0]+1):] = 0 50 | if pxmask is not None: 51 | mask = mask - pxmask 52 | if img is None: 53 | return mask == 1 54 | else: 55 | cimg = img[rstart:rstart+panel_size, cstart:cstart+panel_size] 56 | if pxmask is not None: 57 | pm = pxmask[rstart:rstart+panel_size, cstart:cstart+panel_size] 58 | else: 59 | pm = np.zeros_like(cimg) 60 | cimg[pm != 0] = -1 61 | return cimg 62 | 63 | 64 | def normalize_names(strin): 65 | strout = strin 66 | for character in [' ', '/', '(', ')', '-']: 67 | strout = strout.replace(character, '_') 68 | return strout 69 | 70 | 71 | def normalize_keys(dictionary): 72 | d = {} 73 | for k, v in dictionary.items(): 74 | if isinstance(v, dict): 75 | d[normalize_names(k)] = normalize_keys(v) 76 | else: 77 | d[normalize_names(k)] = v 78 | return d 79 | -------------------------------------------------------------------------------- /diffractem/adxv.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adxv remote control. 3 | 4 | Inspired by: 5 | https://github.com/erikssod/adxv_class by Daniel Eriksson (MIT license) 6 | https://github.com/keitaroyam/yamtbx by Keitaro Yamashita (BSD license) 7 | 8 | """ 9 | import socket 10 | import subprocess 11 | import time 12 | import logging 13 | 14 | class Adxv: 15 | 16 | def __init__(self, adxv_bin=None, hdf5_path='/entry/data/raw_counts', **kwargs): 17 | 18 | self.logger = logging.getLogger() 19 | handler = logging.StreamHandler() 20 | handler.setFormatter( 21 | logging.Formatter(fmt=('[%(levelname)s] %(name)s ''%(funcName)s | %(message)s'))) 22 | self.logger.handlers = [handler] 23 | self.logger.setLevel('INFO') # or INFO, or DEBUG, etc 24 | 25 | self.logger = logging.getLogger(__name__) 26 | 27 | self.adxv_bin = adxv_bin 28 | self.adxv_opts = kwargs 29 | 30 | if self.adxv_bin is None: 31 | self.adxv_bin = "adxv" 32 | 33 | self.hdf5_path = hdf5_path 34 | self.adxv_proc = None # subprocess object 35 | self.adxv_port = 8100 # adxv's default port. overridden later. 36 | self.sock = None 37 | 38 | self.spot_type_counter = -1 39 | 40 | def start(self, cwd=None): 41 | 42 | if not self.is_alive(): 43 | 44 | # find available port number 45 | self.logger.debug('Searching for available port number') 46 | sock_test = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 47 | sock_test.bind(("localhost", 0)) 48 | self.adxv_port = sock_test.getsockname()[1] 49 | sock_test.close() 50 | self.logger.debug(f'Port {self.adxv_port} will be used for adxv. Attempting to connect.') 51 | 52 | # build adxv start command 53 | adxv_comm = self.adxv_bin + ' -socket {} -hdf5dataset {}'.format(self.adxv_port, self.hdf5_path) 54 | 55 | for opt, val in self.adxv_opts.items(): 56 | adxv_comm += ' -{} {}'.format(opt, val) 57 | 58 | # start adxv 59 | self.logger.debug(f'adxv command is: \n {adxv_comm}') 60 | self.adxv_proc = subprocess.Popen(adxv_comm, shell=True, cwd=cwd) 61 | 62 | for i in range(10): # try for 5 seconds. 63 | try: 64 | self.sock = socket.socket(socket.AF_INET, 65 | socket.SOCK_STREAM) # On OSX(?), need to re-create object when failed 66 | self.sock.connect(("localhost", self.adxv_port)) 67 | self.logger.info('Connected to Port {}'.format(self.adxv_port)) 68 | break 69 | except socket.error as err: 70 | self.logger.debug('Waiting for socket connection...') 71 | time.sleep(.5) 72 | continue 73 | 74 | def is_alive(self): 75 | return self.adxv_proc is not None and self.adxv_proc.poll() is None # None means still running. 76 | 77 | def send(self, payload): 78 | ''' 79 | Takes command, encodes it, and sends it down the socket. 80 | ''' 81 | 82 | self.start() 83 | 84 | try: 85 | self.logger.debug("payload = {}".format(payload)) 86 | self.sock.sendall(payload.encode()) 87 | 88 | except Exception as e: 89 | self.logger.error(e) 90 | 91 | def load_image(self, image_file: str): 92 | ''' 93 | Load an image file 94 | ''' 95 | payload = 'load_image %s\n' % (image_file) 96 | self.send(payload) 97 | 98 | def raise_window(self, window: str): 99 | ''' 100 | Raises a Window. must be one of 101 | 'Control', 'Image', 'Magnify', 'Line', or 102 | 'Load'. 103 | ''' 104 | payload = 'raise_window %s\n' % (window) 105 | self.send(payload) 106 | 107 | def raise_image(self): 108 | ''' 109 | Raises image window; see raise_window for 110 | additional options but this seems like the 111 | most common one. 112 | ''' 113 | payload = 'raise_window Image\n' 114 | self.send(payload) 115 | 116 | def save_image(self, path_name_format: str): 117 | ''' 118 | Save an image file (jpeg or tiff) 119 | ''' 120 | payload = 'save_image %s\n' % (path_name_format) 121 | self.send(payload) 122 | 123 | def slab(self, N: int): 124 | ''' 125 | Display slab N 126 | ''' 127 | payload = 'slab %i\n' % (N) 128 | self.send(payload) 129 | 130 | def set_slab(self, N: int): 131 | ''' 132 | Same as slab, but don’t load the image 133 | ''' 134 | payload = 'set_slab %i\n' % (N) 135 | self.send(payload) 136 | 137 | def slabs(self, N: int): 138 | ''' 139 | Slab thickness to display 140 | ''' 141 | payload = 'slabs %i\n' % (N) 142 | self.send(payload) 143 | 144 | def set_slabs(self, N: int): 145 | ''' 146 | Same as slabs, but don’t load the image 147 | ''' 148 | payload = 'set_slabs %i\n' % (N) 149 | self.send(payload) 150 | 151 | def exit(self): 152 | ''' 153 | Exit Adxv 154 | ''' 155 | payload = 'exit\n' 156 | self.send(payload) 157 | 158 | def stride(self, N: int): 159 | """ 160 | stride - sets Stride in the Load Window 161 | """ 162 | payload = 'stride %i\n' % (N) 163 | self.send(payload) 164 | 165 | def increment_slabs(self): 166 | """ 167 | increment_slabs - checks the +Slabs checkbox in the Load Window 168 | """ 169 | payload = 'increment_slabs\n' 170 | self.send(payload) 171 | 172 | def increment_files(self): 173 | """ 174 | increment_files - unchecks the +Slabs checkbox in the Load Window 175 | """ 176 | payload = 'increment_files\n' 177 | self.send(payload) 178 | 179 | def contrast_min(self, N: int): 180 | """ 181 | contrast_min - sets the min contrast value 182 | """ 183 | payload = 'contrast_min %i\n' % (N) 184 | self.send(payload) 185 | 186 | def contrast_max(self, N: int): 187 | """ 188 | contrast_max - sets the max contrast value 189 | """ 190 | payload = 'contrast_max %i\n' % (N) 191 | self.send(payload) 192 | 193 | def define_spot(self, color, radius=0, box=0, group=None): 194 | 195 | if group is None: 196 | self.spot_type_counter += 1 197 | else: 198 | self.spot_type_counter = group 199 | 200 | self.send('box %d %d\n' % (box, box)) # seems ignored? 201 | self.send('define_type %d color %s radius %d\n' % (group, color, radius)) 202 | 203 | return self.spot_type_counter 204 | 205 | def load_spots(self, spots): 206 | #if len(spots) == 0: 207 | # return 208 | 209 | self.send("load_spots %d\n" % len(spots)) 210 | 211 | for x, y, t in spots: 212 | self.send("%.2f %.2f %d\n" % (x, y, t)) 213 | 214 | self.send("end_of_pack\n") 215 | -------------------------------------------------------------------------------- /diffractem/compute.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import dask.array as da 3 | 4 | 5 | def map_reduction_func(imgs, fun, *args, output_len=1, dtype=np.float, **kwargs): 6 | """ 7 | Use dask array map blocks for functions that return a numpy vector of values (e.g. fit functions or 1D profiles) 8 | :param imgs: image stack as dask array, stacked along dimension 0 9 | :param fun: function to apply, needs to be able to process image stacks 10 | :param args: positional arguments to be supplied to the function. Note that these have to have three dimensions 11 | :param output_len: length of output numpy vector 12 | :param dtype: data type of output numpy vector 13 | :param kwargs: keyword arguments to be supplied to the function 14 | :return: 15 | """ 16 | 17 | assert isinstance(imgs, da.core.Array) 18 | 19 | args_new = [] 20 | for arg in args: 21 | # broadcasting on arrays works on the last dimension, whereas the stack is in the first. This may cause trouble 22 | # if a parameter array is 1D or 2D 23 | if isinstance(arg, da.core.Array) or isinstance(arg, np.ndarray): 24 | if arg.ndim == 1: 25 | #print('upcasting 1D') 26 | arg = arg[:, np.newaxis, np.newaxis] 27 | elif arg.ndim == 2: 28 | #print('upcasting 2D') 29 | arg = arg[:, :, np.newaxis] 30 | args_new.append(arg) 31 | # print(fun) 32 | # print([type(a) for a in args_new]) 33 | # print({kw: type(v) for kw, v in kwargs.items()}) 34 | out = imgs.map_blocks(fun, *args_new, chunks=(imgs.chunks[0], output_len), 35 | drop_axis=(1, 2), new_axis=1, dtype=dtype, **kwargs) 36 | return out 37 | 38 | 39 | -------------------------------------------------------------------------------- /diffractem/io.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import pandas as pd 4 | from dask import array as da 5 | from collections import defaultdict 6 | import dask.diagnostics 7 | import os.path 8 | from diffractem import normalize_names 9 | import warnings 10 | from typing import Union 11 | from glob import glob 12 | 13 | 14 | def expand_files(file_list: Union[str, list], scan_shots=False, validate=False): 15 | 16 | def remove_bs(fns): 17 | return [fn.replace('\\', '/') for fn in fns] 18 | 19 | if isinstance(file_list, list) or isinstance(file_list, tuple): 20 | fl = remove_bs(file_list) 21 | if scan_shots: 22 | fl = pd.DataFrame(fl, columns=['file']) 23 | 24 | elif isinstance(file_list, str) and file_list.endswith('.lst'): 25 | if scan_shots: 26 | fl = pd.read_csv(file_list, sep=' ', header=None, engine='python', 27 | names=['file', 'Event']) 28 | fl['file'] = remove_bs(fl['file']) 29 | if fl.Event.isna().all(): 30 | fl.drop('Event', axis=1, inplace=True) 31 | else: 32 | fl = [] 33 | for s in open(file_list, 'r').readlines(): 34 | if '//' in s: 35 | raise RuntimeError('Shot identifier found in list file. You may want to set scan_shots=True') 36 | fl.append(s.split(' ', 1)[0].strip()) 37 | fl = remove_bs(fl) 38 | 39 | elif isinstance(file_list, str) and (file_list.endswith('.h5') or file_list.endswith('.nxs')): 40 | fl = remove_bs(sorted(glob(file_list))) 41 | if scan_shots: 42 | fl = pd.DataFrame(fl, columns=['file']) 43 | 44 | else: 45 | raise TypeError('file_list must be a list file, single or glob pattern of h5/nxs files, or a list of filenames') 46 | 47 | if (not scan_shots) and (not len(fl) == len(set(fl))): 48 | raise ValueError('File identifiers are not unique, most likely because the file names are not.') 49 | 50 | if validate: 51 | if scan_shots: 52 | raise ValueError('Validation is only allowed if scan_shot=False.') 53 | valid_files = [] 54 | for r in fl: 55 | try: 56 | with h5py.File(r, 'r') as fh: 57 | 58 | for k in fh.keys(): 59 | 60 | if (f'/{k}/shots' in fh) and (f'/{k}/map/features' in fh) and (f'/{k}/data' in fh): 61 | # print(r,': file validated!') 62 | valid_files.append(r) 63 | else: 64 | print(r, k, ': invalid file/subset!') 65 | except (OSError, IOError) as err: 66 | print('Could not open file', r, 'for validation because:') 67 | print(err) 68 | 69 | return valid_files 70 | 71 | else: 72 | return fl 73 | 74 | 75 | def dict_to_h5(grp, data, exclude=()): 76 | """ 77 | Write dictionary into HDF group (or file) object 78 | :param grp: HDF group or file object 79 | :param data: dictionary to be written into HDF5 80 | :param exclude: dataset or group names to be excluded 81 | :return: 82 | """ 83 | for k, v in data.items(): 84 | nk = normalize_names(k) 85 | if k in exclude: 86 | continue 87 | elif isinstance(v, dict): 88 | dict_to_h5(grp.require_group(nk), v, exclude=exclude) 89 | else: 90 | if nk in grp.keys(): 91 | grp[nk][...] = v 92 | else: 93 | grp.create_dataset(nk, data=v) 94 | 95 | 96 | def h5_to_dict(grp, exclude=('data', 'image'), max_len=100): 97 | """ 98 | Get dictionary from HDF group (or file) object 99 | :param grp: HDF group or file 100 | :param exclude: (sub-)group or dataset names to be excluded; by default 'data' and 'image 101 | :param max_len: maximum length of data field to be included (along first direction) 102 | :return: dictionary corresponding to HDF group 103 | """ 104 | d = {} 105 | for k, v in grp.items(): 106 | if k in exclude: 107 | continue 108 | if isinstance(v, h5py.Group): 109 | d[k] = h5_to_dict(v, exclude=exclude, max_len=max_len) 110 | elif isinstance(v, h5py.Dataset): 111 | if (len(v.shape) > 0) and (len(v) > max_len): 112 | print('Skipping', v.shape, len(v), max_len, v) 113 | continue 114 | d[k] = v.value 115 | return d 116 | 117 | def make_master_h5(file_list, file_name=None, abs_path=False, local_group='/', 118 | remote_group='/entry', verbose=False): 119 | fns, ids = expand_files(file_list, True) 120 | 121 | if isinstance(file_list, str) and file_list.endswith('.lst'): 122 | if file_name is None: 123 | file_name = file_list.rsplit('.', 1)[0] + '.h5' 124 | else: 125 | if file_name is None: 126 | raise ValueError('Please provide output file name explicitly, if input is not a file list.') 127 | 128 | f = h5py.File(file_name, 'w') 129 | 130 | try: 131 | 132 | subsets = [] 133 | 134 | for fn, id in zip(fns, ids): 135 | 136 | subset = id 137 | 138 | if subset in subsets: 139 | raise KeyError('File names are not unique!') 140 | else: 141 | subsets.append(subset) 142 | 143 | if abs_path: 144 | fn2 = os.getcwd() + '/' + fn 145 | else: 146 | fn2 = fn 147 | 148 | if not os.path.isfile(fn2): 149 | raise FileNotFoundError(f'File {fn2} present in {file_list} not found!') 150 | 151 | if verbose: 152 | print(f'Referencing file {fn2} as {subset}') 153 | if local_group != '/': 154 | f.require_group(local_group) 155 | 156 | f[local_group + '/' + subset] = h5py.ExternalLink(fn2, remote_group) 157 | 158 | except Exception as err: 159 | f.close() 160 | os.remove(file_name) 161 | raise err 162 | 163 | f.close() 164 | 165 | return file_name 166 | 167 | -------------------------------------------------------------------------------- /diffractem/nexus.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | from collections import defaultdict 4 | from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, wait, FIRST_EXCEPTION 5 | from itertools import repeat 6 | from typing import Union, List, Tuple, Optional 7 | import os 8 | import h5py 9 | import numpy as np 10 | import pandas as pd 11 | from warnings import warn 12 | from .io import expand_files, dict_to_h5 13 | from distributed.lock import Lock 14 | 15 | 16 | def _get_table_from_single_file(fn: str, path: str) -> pd.DataFrame: 17 | identifiers = path.rsplit('%', 1) 18 | lists = [] 19 | with h5py.File(fn, 'r') as fh: 20 | 21 | try: 22 | if len(identifiers) == 1: 23 | subsets = [''] 24 | else: 25 | subsets = fh[identifiers[0]].keys() 26 | 27 | for subset in subsets: 28 | tbl_path = path.replace('%', subset) 29 | if tbl_path not in fh: 30 | # warn(f'Group {tbl_path} not found in {fn}.') 31 | raise KeyError(f'Group {tbl_path} not found in {fn}.') 32 | # newlist = None 33 | 34 | if 'pandas_type' in fh[tbl_path].attrs: 35 | warn(f'{fn}:{tbl_path} in Pandas/PyTables format. Please consider converting.', DeprecationWarning) 36 | try: 37 | newlist = pd.read_hdf(fn, tbl_path) 38 | except Exception as err: 39 | print('Tried to load a table in old-style diffractem format (pytables-style):\n' 40 | f'{fn}:{tbl_path} in Pandas/PyTables format.\n' 41 | 'For this to work, you need to install the pytables package.\n' 42 | 'Also, please consider converting the files to new form.') 43 | raise err 44 | else: 45 | dt = {} 46 | for key, val in fh[tbl_path].items(): 47 | if val.ndim != 1: 48 | warn('Data fields in list group must be 1-D, {} is {}-D. Skipping.'.format(key, val.ndim)) 49 | continue 50 | dt_field = val.dtype 51 | if 'label' in val.attrs: 52 | k = val.attrs['label'] 53 | else: 54 | k = key 55 | if dt_field.type == np.string_: 56 | try: 57 | dt[k] = val[:].astype(np.str) 58 | except UnicodeDecodeError as err: 59 | print(f'Field {key} of type {dt_field} gave decoding trouble:') 60 | raise err 61 | else: 62 | dt[k] = val[:] 63 | newlist = pd.DataFrame().from_dict(dt) 64 | 65 | newlist['subset'] = subset 66 | newlist['file'] = fn 67 | lists.append(newlist) 68 | 69 | except KeyError as kerr: 70 | raise KeyError(f'{path} not found in {fn}.') 71 | 72 | return pd.concat(lists, axis=0, ignore_index=True) 73 | 74 | 75 | def get_table(files: Union[list, str], path='/%/shots', parallel=True) -> pd.DataFrame: 76 | 77 | files = expand_files(files) 78 | 79 | if parallel: 80 | with ProcessPoolExecutor() as p: 81 | out = p.map(_get_table_from_single_file, files, repeat(path)) 82 | # ftrs = [] 83 | # for fn in files: 84 | # ftrs.append(p.submit(_get_table_from_single_file, fn, path)) 85 | # TODO make this more robust against errors by changing to submit instead of map and handling single-file errors 86 | 87 | else: 88 | out = map(_get_table_from_single_file, files, repeat(path)) 89 | 90 | out = pd.concat(out, ignore_index=True, sort=False) 91 | 92 | return out 93 | 94 | 95 | def _store_table_to_single_subset(tbl: pd.DataFrame, fn: str, path: str, subset: str, format: str = 'nexus'): 96 | """ 97 | Helper function. Internal use only. 98 | """ 99 | 100 | tbl_path = path.replace('%', subset) 101 | if format == 'table': 102 | try: 103 | tbl.to_hdf(fn, tbl_path, format='table', data_columns=True) 104 | except ValueError: 105 | tbl.to_hdf(fn, tbl_path, format='table') 106 | 107 | elif format == 'nexus': 108 | with h5py.File(fn, 'a') as fh: 109 | for key, val in tbl.iteritems(): 110 | #print(f'Storing {key} ({val.shape}, {val.dtype}) to {fn}: {path}') 111 | grp = fh.require_group(tbl_path) 112 | grp.attrs['NX_class'] = 'NXcollection' 113 | k = key.replace('/', '_').replace('.', ' ') 114 | try: 115 | if k not in grp: 116 | ds = grp.require_dataset(k, shape=val.shape, dtype=val.dtype, maxshape=(None,)) 117 | else: 118 | ds = grp[k] 119 | if ds.shape[0] != val.shape[0]: 120 | ds.resize(val.shape[0], axis=0) 121 | #print('resizing', k) 122 | ds[:] = val 123 | except (TypeError, OSError) as err: 124 | if val.dtype == 'O': 125 | val2 = val.astype('S') 126 | if k in grp: 127 | del grp[k] 128 | ds = grp.require_dataset(k, shape=val.shape, dtype=val2.dtype, maxshape=(None,)) 129 | ds[:] = val2 130 | else: 131 | raise err 132 | 133 | ds.attrs['label'] = key 134 | else: 135 | raise ValueError('Storage format must be "table" or "nexus".') 136 | 137 | 138 | def store_table(table: pd.DataFrame, path: str, 139 | parallel: bool = True, format: str = 'nexus', 140 | file: Optional[str] = None, subset: Optional[str] = None): 141 | """ 142 | Stores a pandas DataFrame containing 'file' and 'subset' columns to multiple HDF5 files. Essentially a 143 | multi-file, multi-processed wrapper to pd.to_hdf 144 | :param table: DataFrame to be stored 145 | :param path: path in HDF5 files. % will be substituted by the respective subset name 146 | :param parallel: if True (default), writes files in parallel 147 | :param format: can be 'nexus' to write columns of table in separate arrays, or 'tables' to use PyTables to write 148 | a HDF5 table object. 149 | :return: list of futures (see documentation of concurrent.futures). [None] if parallel=False 150 | """ 151 | 152 | # TODO: could be that parallel execution with multiple subsets/table/types will not work 153 | 154 | if (file is None) and parallel: 155 | 156 | with ProcessPoolExecutor() as exec: 157 | futures = [] 158 | try: 159 | for (fn, ssn), ssdat in table.groupby(['file', 'subset']): 160 | futures.append(exec.submit(_store_table_to_single_subset, ssdat, fn, path, ssn, format)) 161 | except Exception as err: 162 | print('Error during storing table in', path) 163 | print('Table columns are:', ', '.join(table.columns)) 164 | # print(table) 165 | raise err 166 | 167 | wait(futures, return_when=FIRST_EXCEPTION) 168 | 169 | for f in futures: 170 | if f.exception(): 171 | raise f.exception() 172 | 173 | return futures 174 | 175 | else: 176 | #print(path) 177 | #print(table.columns) 178 | 179 | if file is not None: 180 | _store_table_to_single_subset(table, file, path, subset, format) 181 | 182 | else: 183 | for (fn, ssn), ssdat in table.groupby(['file', 'subset']): 184 | _store_table_to_single_subset(ssdat, fn, path, ssn, format) 185 | 186 | return [None] 187 | 188 | def _save_single_chunk(dat: np.ndarray, file: str, subset: str, label: str, 189 | idcs: Union[list, np.ndarray], data_pattern: str, lock): 190 | lock.acquire() 191 | with h5py.File(file, 'a') as fh: 192 | path = f'{data_pattern}/{label}'.replace('%', subset) 193 | fh[path][idcs,:,:] = dat 194 | lock.release() 195 | return file, subset, path, idcs 196 | 197 | def _save_single_chunk_multi(chks: dict, file: str, subset: str, 198 | idcs: Union[list, np.ndarray], lock: Lock): 199 | lock.acquire() 200 | # print('Have lock: ', lock) 201 | with h5py.File(file, 'a') as fh: 202 | for p, d in chks.items(): 203 | fh[p.replace('%', subset)][idcs,...] = d 204 | lock.release() 205 | return file, subset, list(chks.keys()), idcs 206 | 207 | def meta_to_nxs(filename, meta=None, exclude=('Detector',), meta_grp='/entry/instrument', 208 | data_grp='/entry/data', data_field='raw_counts', data_location='/entry/instrument/detector/data'): 209 | """ 210 | Merges a dict containing metadata information for a serial data acquisition into an existing detector nxs file. 211 | Additionally, it adds a soft link to the actual data for easier retrieval later (typically into /entry/data) 212 | :param filename: NeXus file or lists 213 | :param meta: can be set to {} -> no meta action performed. Or a JSON file name. If None, a JSON file name will be 214 | derived from nxs_file by replacing .nxs by .json (useful in loops) 215 | :param exclude: names of meta groups or fields to exclude 216 | :param meta_grp: location in the NeXus, where the metadata should go to 217 | :param data_grp: location of softlink to the data stack. No softlink action if None. 218 | :param data_field: name of the softlink to the data stack 219 | :param data_location: location of the data stack 220 | :return: 221 | """ 222 | 223 | # TODO: add functions to include flat field and pixel mask 224 | 225 | if (not isinstance(filename, str)) or filename.endswith('.lst'): 226 | fns = expand_files(filename) 227 | for fn in fns: 228 | meta_to_nxs(fn, meta=meta, exclude=exclude, meta_grp=meta_grp, 229 | data_grp=data_grp, data_field=data_field, data_location=data_location) 230 | return 231 | 232 | with h5py.File(filename, 'r+') as f: 233 | 234 | if meta is None: 235 | meta = filename.rsplit('.', 1)[0] + '.json' 236 | 237 | if isinstance(meta, str): 238 | try: 239 | meta = json.load(open(meta)) 240 | except FileNotFoundError: 241 | print('No metafile found.') 242 | meta = {} 243 | 244 | elif isinstance(meta, dict): 245 | pass 246 | 247 | elif isinstance(meta, pd.DataFrame): 248 | meta = next(iter(meta.to_dict('index').values())) 249 | 250 | dict_to_h5(f.require_group(meta_grp), meta, exclude=exclude) 251 | 252 | if data_grp is not None: 253 | dgrp = f.require_group(data_grp) 254 | dgrp.attrs['NX_class'] = np.string_('NXdata') 255 | dgrp.attrs['signal'] = np.string_(data_field) 256 | 257 | if data_field in dgrp.keys(): 258 | del dgrp[data_field] 259 | dgrp[data_field] = h5py.SoftLink(data_location) 260 | 261 | 262 | def get_meta_fields(files: Union[str, list], dataset_paths: Union[list, str, tuple, dict], shorten_labels=True): 263 | """ 264 | Get arbitrary meta data from files. 265 | :param files: 266 | :param dataset_paths: list of dataset paths, or dict of structure {dataset: default value} 267 | :param shorten_labels: only use final section of labels for columns of returned DataFrame 268 | :return: pandas DataFrame of metadata 269 | """ 270 | 271 | if isinstance(dataset_paths, str): 272 | dataset_paths = [dataset_paths] 273 | 274 | if isinstance(dataset_paths, list) or isinstance(dataset_paths, tuple): 275 | dataset_paths = {f: None for f in dataset_paths} 276 | 277 | values = defaultdict(dict) 278 | dtypes = {} 279 | fns = expand_files(files) 280 | 281 | for fn in fns: 282 | with h5py.File(fn, mode='r') as fh: 283 | for field, default in dataset_paths.items(): 284 | 285 | identifiers = field.rsplit('%', 1) 286 | 287 | if len(identifiers) == 1: 288 | subsets = [''] 289 | else: 290 | subsets = fh[identifiers[0]].keys() 291 | 292 | for subset in subsets: 293 | try: 294 | # print(f[field]) 295 | values[field][(fn, subset)] = fh[field.replace('%', subset)][...] 296 | dtypes[field] = fh[field.replace('%', subset)].dtype 297 | if dtypes[field] == 'O': 298 | dtypes[field] = str 299 | # print(field, fh[field.replace('%', subset)].dtype) 300 | except KeyError: 301 | values[field][(fn, subset)] = default 302 | 303 | newcols = {'level_0': 'file', 'level_1': 'subset'} 304 | if shorten_labels: 305 | newcols.update({k: k.rsplit('/', 1)[-1] for k in dataset_paths}) 306 | return pd.DataFrame(values).astype(dtypes).reset_index().rename(columns=newcols) 307 | 308 | 309 | def copy_h5(fn_from, fn_to, exclude=('%/detector/data', '/%/data/%', '/%/results/%'), mode='w-', 310 | print_skipped=False, h5_folder=None, h5_suffix='.h5'): 311 | """ 312 | Copies datasets h5/nxs files or lists of them to new ones, with exclusion of datasets. 313 | :param fn_from: single h5/nxs file or list file 314 | :param fn_to: new file name, or new list file. If the latter, specify with h5_folder and h5_suffix how the new names 315 | are supposed to be constructed 316 | :param exclude: patterns for data sets to be excluded. All regular expressions are allowed, % is mapped to .* 317 | (i.e., any string of any length), for compatibility with CrystFEL 318 | :param mode: mode in which new files are opened. By default w-, i.e., files are created, but never overwritten 319 | :param print_skipped: print the skipped data sets, for debugging 320 | :param h5_folder: if operating on a list: folder where new h5 files should go 321 | :param h5_suffix: if operating on a list: suffix appended to old files (after stripping their extension) 322 | :return: 323 | """ 324 | 325 | # multi-file copy, using recursive call. 326 | if (isinstance(fn_from, str) and fn_from.endswith('.lst')) or isinstance(fn_from, list): 327 | warn('Calling copy_h5 on a file list is not recommended anymore', DeprecationWarning) 328 | old_files = expand_files(fn_from) 329 | new_files = [] 330 | 331 | for ofn in old_files: 332 | # print(ofn) 333 | # this loop could beautifully be parallelized. For later... 334 | if h5_folder is None: 335 | h5_folder = ofn.rsplit('/', 1)[0] 336 | if h5_suffix is None: 337 | h5_suffix = ofn.rsplit('.', 1)[-1] 338 | nfn = h5_folder + '/' + ofn.rsplit('.', 1)[0].rsplit('/', 1)[-1] + h5_suffix 339 | new_files.append(nfn) 340 | # exclude detector data and shot list 341 | copy_h5(ofn, nfn, exclude, mode, print_skipped) 342 | 343 | with open(fn_to, 'w') as f: 344 | f.write('\n'.join(new_files)) 345 | 346 | return 347 | 348 | # single-file copy 349 | try: 350 | 351 | # no exclusion... simply copy file 352 | if len(exclude) == 0: 353 | from shutil import copyfile 354 | copyfile(fn_from, fn_to) 355 | return 356 | 357 | exclude_regex = [re.compile(ex.replace('%', '.*')) for ex in exclude] 358 | 359 | def copy_exclude(key, ds, to): 360 | # function to copy a single entry within a HDF hierarchy, and do recursive calls 361 | # if required. If it finds its key in the exclusion patterns, just skips that entry. 362 | 363 | for ek in exclude_regex: 364 | if ek.fullmatch(ds.name) is not None: 365 | if print_skipped: 366 | print(f'Skipping key {key} due to {ek}') 367 | return 368 | 369 | if isinstance(ds, h5py.Dataset): 370 | to.copy(ds, key) 371 | 372 | elif isinstance(ds, h5py.Group) and 'table_type' in ds.attrs.keys(): 373 | # pandas table is a group. Do NOT traverse into it (or experience infinite pain) 374 | # print(f'Copying table {key}') 375 | to.copy(ds, key) 376 | 377 | elif isinstance(ds, h5py.Group): 378 | # print(f'Creating group {key}') 379 | new_grp = to.require_group(key) 380 | 381 | # attribute copying. Lots of error catching required. 382 | try: 383 | for k, v in ds.attrs.items(): 384 | try: 385 | new_grp.attrs.create(k, v) 386 | except TypeError as err: 387 | new_grp.attrs.create(k, np.string_(v)) 388 | except OSError: 389 | # some newer HDF5 attribute types (used by pytables) will crash h5py even just listing them 390 | # print(f'Could not copy attributes of group {ds.name}') 391 | pass 392 | 393 | for k, v in ds.items(): 394 | lnk = ds.get(k, getlink=True) 395 | if isinstance(lnk, h5py.SoftLink): 396 | for ek in exclude_regex: 397 | if ek.fullmatch(lnk.path) is not None: 398 | if print_skipped: 399 | print(f'Skipping soft link to {ek}') 400 | break 401 | else: 402 | new_grp[k] = h5py.SoftLink(lnk.path) 403 | continue 404 | 405 | copy_exclude(k, v, new_grp) 406 | 407 | # for k, v in ds.items(): 408 | # lnk = ds.get(k, getlink=True) 409 | # if isinstance(lnk, h5py.SoftLink): 410 | # new_grp[k] = h5py.SoftLink(lnk.path) 411 | # continue 412 | # copy_exclude(k, v, new_grp) 413 | 414 | with h5py.File(fn_from, mode='r') as f, h5py.File(fn_to, mode=mode) as f2: 415 | copy_exclude('/', f, f2) 416 | 417 | except Exception as err: 418 | if os.path.exists(fn_to): 419 | os.remove(fn_to) 420 | print(f'Error occurred while attempting to copy data from {fn_from} to {fn_to}.') 421 | raise err 422 | -------------------------------------------------------------------------------- /diffractem/pre_proc_opts.py: -------------------------------------------------------------------------------- 1 | 2 | import yaml 3 | import pprint 4 | import json 5 | from typing import Union 6 | 7 | #TODO consider to make this a types.SimpleNamespace or use dataclasses.dataclass 8 | class PreProcOpts: 9 | def __init__(self, fn=None): 10 | 11 | self._filename = None 12 | 13 | self.reference: str = 'Ref12_reference.tif' #: Name of reference image for flat-field correction in TIF format 14 | self.pxmask: str = 'Ref12_pxmask.tif' #: Name of pixelmask TIF image 15 | self.correct_saturation: bool = True #: Correct for detector saturation using paralyzable model 16 | self.remove_background: bool = True #: Determine and subtract background during image correction 17 | self.dead_time: float = 1.9e-3 #: Dead time (in ms) for paralyzable detector model 18 | self.dead_time_gap_factor: float = 2 #: Factor for dead time in gap-pixels 19 | self.shutter_time: float = 2 #: Shutter time (in ms) for paralyzable detector model 20 | self.mask_gaps: bool = True #: Always mask detector gaps (regardless of dead pixel mask) 21 | self.interpolate_dead: bool = False #: Interpolate dead pixels instead of masking 22 | self.float: bool = False #: Process images as floating-point 23 | self.int_factor: int = 1 #: Factor to apply to corrected images if float=False 24 | self.find_peaks: bool = True #: apply peakfinder during image analysis 25 | self.compression: Union[int, str] = 32004 #: standard HDF5 compression. Suggested values: gzip, none, 32004 (lz4) 26 | self.cam_length: float = 2 #: Average camera length (in m). 27 | self.y_scale: float = 1 #: Scaling of camera length along y. DEPRECATED! 28 | self.ellipse_ratio: float = 1 #: ellipticity of camera lentgh along arbitrary axis 29 | self.ellipse_angle: float = 0 #: cam length ellipticity angle. (e.g. 0 is x, pi/2 is y,...) 30 | self.pixel_size: float = 55e-6 #: Pixel size (in m) 31 | self.wavelength: float = 0.0251 #: Radiation wave length (in A) 32 | self.com_threshold:float = 0.9 #: minimum counts in a pixel to be considered for the center-of-mass calculation 33 | self.com_xrng: int = 800 #: x range (px) around geometric pattern center in which to look for center of mass 34 | self.com_yrng: int = 800 #: y range (px) around geometric pattern center in which to look for center of mass 35 | self.lorentz_radius: int= 30 #: radius (px) around center of mass for Lorentz fit of zero order 36 | self.lorentz_maxshift: float = 36 #: maximum shift (px) of Lorentz fit center from center of mass 37 | self.xsize: int = 1556 #: x image size (px) 38 | self.ysize: int = 516 #: y image size (px) 39 | self.r_adf1: tuple = (50, 100) #: inner/outer radii for virtual ADF 1 (px) 40 | self.r_adf2: tuple = (100, 200) #: inner/outer radii for virtual ADF 2 (px) 41 | self.select_query: str = 'frame >= 0' #: query string for selection of shots from raw data 42 | self.agg_query: str = 'frame >= 0 and frame <= 5' #: query string for aggregation of patterns 43 | self.agg_file_suffix: str = '_agg.h5' #: file suffix for aggregated patterns 44 | self.aggregate: bool = True #: calculate aggregated patterns (only for real-time analysis) 45 | self.scratch_dir: str = '/scratch/diffractem' #: scratch directory for temporary data 46 | self.proc_dir: str = 'proc_data' #: directory for pre-processed data 47 | self.peak_data_path: str = '/%/data' #: path in HDF5 files to peak data in CXI format 48 | self.det_shift_x_path: str = 'det_shift_x_mm' #: path in HDF5 files to lab frame detector shift (x) 49 | self.det_shift_y_path: str = 'det_shift_y_mm' #: path in HDF5 files to lab frame detector shift (y) 50 | self.rechunk: bool = None 51 | self.peak_search_params: dict = \ 52 | {'min-res': 5, 'max-res': 600, 53 | 'local-bg-radius': 5, 'threshold': 8, 54 | 'min-pix-count': 3, 'max-pix-count': 10000, 55 | 'min-snr': 3.5, 56 | 'peaks': 'peakfinder8'} #: parameters for peak finding using peakfinder8 57 | self.indexing_params: dict = \ 58 | {'indexing': 'pinkIndexer', 59 | 'integration': 'rings-nograd-nocen', 60 | 'int-radius': '3,4,6', 61 | 'peaks': 'cxi', 62 | 'max-indexer-threads': 2, 63 | 'min-peaks': 15, 64 | 'no-refine': True, 65 | 'no-retry': True, 66 | 'no-check-peaks': True, 67 | 'camera-length-estimate': 1, 68 | 'pinkIndexer-considered-peaks-count': 4, 69 | 'pinkIndexer-angle-resolution': 4, 70 | 'pinkIndexer-refinement-type': 5, 71 | 'pinkIndexer-tolerance': 0.1, 72 | 'pinkIndexer-reflection-radius': 0.001, 73 | 'pinkIndexer-max-resolution-for-indexing': 2, 74 | 'pinkIndexer-no-check-indexed': False 75 | } #: indexamajig parameters for indexing 76 | self.integration_params: dict = \ 77 | {'indexing': 'file', 78 | 'integration': 'rings-nograd-nocen', 79 | 'int-radius': '3,4,6', 80 | 'peaks': 'cxi', 81 | 'max-indexer-threads': 2, 82 | 'min-peaks': 15, 83 | 'no-refine': True, 84 | 'no-retry': True, 85 | 'no-check-peaks': True, 86 | 'overpredict': False 87 | } #: indexamajig parameters for integration-only (without indexing) 88 | self.peak_search_params.update({'temp-dir': self.scratch_dir}) 89 | self.indexing_params.update({'temp-dir': self.scratch_dir}) 90 | self.indexing_params.update({'camera-length-estimate': self.cam_length}) 91 | self.max_peaks: int = 500 #: maximum number of peaks for peak finding 92 | self.im_exc = 'indexamajig' #: default executable for indexamajig, can point to non-standard path 93 | self.friedel_refine = True #: perform Friedel-pair center refinement in get_pattern_info 94 | self.min_peaks = 10 #: minimum peaks for Friedel refinement (and auto-preproc) 95 | self.peak_sigma = 2 #: peak blurring for Friedel refinement (increase for sloppy initial center) 96 | self.friedel_max_radius = None #: maximum radius in pixels of peaks to be considered for Friedel refinement 97 | 98 | if fn is not None: 99 | self.load(fn) 100 | 101 | def __str__(self): 102 | return pprint.pformat(self.__dict__) 103 | 104 | def __repr__(self): 105 | return pprint.pformat(self.__dict__) 106 | 107 | def load(self, fn=None): 108 | 109 | fn = self._filename if fn is None else fn 110 | if fn is None: 111 | raise ValueError('Please set the option file name first') 112 | 113 | if fn.endswith('json'): 114 | config = json.load(open(fn, 'r')) 115 | elif fn.endswith('yaml'): 116 | config = yaml.safe_load(open(fn, 'r')) 117 | else: 118 | raise ValueError('File extension must be .yaml or .json.') 119 | 120 | for k, v in config.items(): 121 | if k in self.__dict__: 122 | setattr(self, k, v) 123 | else: 124 | print('Option', k, 'in', fn, 'unknown.') 125 | 126 | self._filename = fn 127 | 128 | def save(self, fn: str): 129 | if fn.endswith('json'): 130 | json.dump(self.__dict__, open(fn, 'w'), skipkeys=True, indent=4) 131 | elif fn.endswith('yaml'): 132 | yaml.dump(self.__dict__, open(fn, 'w'), sort_keys=False) -------------------------------------------------------------------------------- /diffractem/proc_peaks.py: -------------------------------------------------------------------------------- 1 | # Friedel-pair refinement 2 | from scipy.optimize import least_squares 3 | import numpy as np 4 | import pandas as pd 5 | from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, wait, ALL_COMPLETED 6 | from multiprocessing import current_process 7 | from typing import Optional 8 | from .pre_proc_opts import PreProcOpts 9 | from . import tools, proc2d 10 | from warnings import warn 11 | 12 | 13 | def _ctr_from_pks(pkl: np.ndarray, p0: np.ndarray, 14 | int_weight: bool = False, sigma: float = 2.0, bound: float = 5.0, label: str = None): 15 | """Gets the refined peak center position from a list of peaks containing Friedel mates 16 | 17 | Arguments: 18 | pkl {np.ndarray} -- [List of peaks, with x and y values in 0th and 1st column, optionally intensity on 2nd] 19 | p0 {np.ndarray} -- [Initial position] 20 | 21 | Keyword Arguments: 22 | int_weight {bool} -- [weight peaks by their intensity] (default: {False}) 23 | sigma {float} -- [assumed peak rms radius for matching] (default: {2.0}) 24 | bound {float} -- [maximum shift] (default: {5.0}) 25 | label {str} -- [label to be returned in output] (default: {None}) 26 | 27 | Returns: 28 | [tuple] -- [refined position, inverse cost function, label] 29 | """ 30 | if int_weight: 31 | corr = lambda p: np.sum(np.matmul(pkl[:, 2:3], pkl[:, 2:3].T) 32 | * np.exp(-((pkl[:, 0:1] + pkl[:, 0:1].T - 2 * p[0]) ** 2 33 | + (pkl[:, 1:2] + pkl[:, 1:2].T - 2 * p[1]) ** 2) / (2 * sigma ** 2))) \ 34 | / np.sum(np.matmul(pkl[:, 2:3], pkl[:, 2:3].T)) 35 | else: 36 | corr = lambda p: np.sum(np.exp(-((pkl[:, 0:1] + pkl[:, 0:1].T - 2 * p[0]) ** 2 37 | + (pkl[:, 1:2] + pkl[:, 1:2].T - 2 * p[1]) ** 2) / (2 * sigma ** 2))) \ 38 | / (2*pkl.shape[0]) 39 | 40 | fun = lambda p: 1 / max(corr(p), 1e-10) # prevent infs 41 | if np.isnan(fun(p0)): 42 | return p0, np.nan, label 43 | else: 44 | lsq = least_squares(fun, p0, bounds=(p0 - bound, p0 + bound)) 45 | return lsq.x - 0.5, 1 / lsq.cost, label # -0.5 changes from CrystFEL-like to pixel-center convention 46 | 47 | 48 | def center_friedel(peaks: pd.DataFrame, shots: Optional[pd.DataFrame] = None, 49 | p0=(778, 308), colnames=('fs/px', 'ss/px'), sigma=2, 50 | minpeaks=4, maxres: Optional[float] = None): 51 | """[Center refinement of diffraction patterns from a list of peaks, assuming the presence 52 | of a significant number of Friedel mates.] 53 | 54 | Arguments: 55 | peaks {[pd.DataFrame]} -- [peaks list for entire data set, as returned by StreamParser. CrystFEL convention!] 56 | 57 | Keyword Arguments: 58 | shots {[pd.DataFrame]} -- [shot list of data set, optional] (default: {None}) 59 | p0 {tuple} -- [starting position for center search] (default: {(778, 308)}) 60 | colnames {tuple} -- [column names for x and y coordinate] (default: {('fs/px', 'ss/px')}) 61 | sigma {int} -- [peak rms radius (determines 'sharpness' of matching)] (default: {2}) 62 | minpeaks {int} -- [minimum peak number to try matching] (default: {4}) 63 | maxres {int} -- [maximum radius of peaks to still be considered] (default: {None}) 64 | """ 65 | colnames = list(colnames) 66 | p0 = np.array(p0) 67 | 68 | if current_process().daemon: 69 | print('Danger, its a Daemon.') 70 | 71 | with ProcessPoolExecutor() as p: 72 | futures = [] 73 | for grp, pks in peaks.groupby(['file', 'Event']): 74 | pkl = pks.loc[:, colnames].values 75 | rsq = (pkl[:, 0] - p0[0]) ** 2 + (pkl[:, 1] - p0[1]) ** 2 76 | if maxres is not None: 77 | pkl = pkl[rsq < maxres ** 2, :] 78 | if (minpeaks is None) or pkl.shape[0] > minpeaks: 79 | futures.append(p.submit(_ctr_from_pks, pkl, p0, sigma=sigma, label=grp)) 80 | 81 | wait(futures, return_when=ALL_COMPLETED) 82 | if len(futures) == 0: 83 | cpos = shots[['file', 'Event']].copy() 84 | cpos['beam_x'] = p0[0] 85 | cpos['beam_y'] = p0[1] 86 | cpos['friedel_cost'] = np.nan 87 | 88 | return cpos 89 | 90 | # reformat result into a dataframe 91 | cpos = pd.concat([pd.DataFrame(data=np.array([t.result()[2] for t in futures if t.exception() is None]), 92 | columns=['file', 'Event']), 93 | pd.DataFrame(data=np.array([t.result()[0] for t in futures if t.exception() is None]), 94 | columns=['beam_x', 'beam_y']), 95 | pd.DataFrame(data=np.array([t.result()[1] for t in futures if t.exception() is None]), 96 | columns=['friedel_cost'])], 97 | axis=1) 98 | 99 | if shots is not None: 100 | # include shots that were not present in the peaks table 101 | cpos = shots[['file', 'Event']].merge(cpos, on=['file', 'Event'], how='left'). \ 102 | fillna({'beam_x': p0[0], 'beam_y': p0[1]}) 103 | 104 | return cpos 105 | 106 | 107 | def get_acf(npk, x, y, I=None, roi_length=512, output_radius=256, 108 | oversample=4, radial=True, px_ang=None, execution='processes'): 109 | """Gets the autocorrelation/pair correlation function of Bragg peak positions, 110 | optionally with intensity weighting. 111 | 112 | It is important to set the computation region properly (i.e., the 113 | maximum peak positions from the center to take into account), as this affects 114 | computation speed and impact of non-paraxiality at larger angles. It can 115 | be defined using the `roi_length` argument. 116 | 117 | Peaks must be given in CXI format. 118 | 119 | Args: 120 | npk (np.ndarray, int): number of peaks 121 | x (np.ndarray): x-coordinates of peaks, *relative to pattern center* 122 | y (np.ndarray): y-coordinates of peaks, *relative to pattern center* 123 | I ([type], optional): peak intensities. Set to 1 if None. Defaults to None. 124 | roi_length (int, optional): edge length of the region around the image 125 | center that is used for the computation. Defaults to 512. 126 | output_radius (int, optional): maximum included radius of the output ACF. 127 | The size of the 2D output will be 2*output_radius*oversample, 128 | the size of the radial average will be output_radius*oversample. Defaults to 600. 129 | oversample (int, optional): oversampling, that is, by how much smaller the bin 130 | sizes of the output are than that of the input (usually the pixels). Defaults to 4. 131 | radial (bool, optional): compute the radial average of the ACF. Defaults to True. 132 | px_ang (double, optional): diffraction angle corresponding to a distance of 1 pixel 133 | from the center, given in rad (practically: detector pixel size/cam length). If 134 | given, non-paraxiality of the geometry is corrected (not tested well yet). 135 | Defaults to None. 136 | execution (str, optional): way of parallelism if a stack of pattern peak data 137 | is supplied. Can be 'single-threaded', 'threads', 'processes'. 138 | 139 | Returns: 140 | np.ndarray: 2D autocorrelation function. 141 | Length will be 2 * oversample * output_range 142 | np.ndarray: 1D radial sum (None for radial=False). 143 | Length will be oversample * output_ramge 144 | """ 145 | 146 | from numpy import fft 147 | from itertools import repeat 148 | 149 | # if a stack of pattern data is supplied, call recursively on single shots 150 | if isinstance(npk, np.ndarray) and len(npk) > 1: 151 | _all_args = zip(npk, x, y, repeat(None) if I is None else I) 152 | _kwargs = {'roi_length': roi_length, 153 | 'output_radius': output_radius, 154 | 'oversample': oversample, 155 | 'radial': radial, 156 | 'px_ang': px_ang} 157 | if execution == 'single-threaded': 158 | res = [get_acf(*_args, **_kwargs) for _args in _all_args] 159 | else: 160 | with (ProcessPoolExecutor() if execution=='processes' 161 | else ThreadPoolExecutor()) as exc: 162 | ftrs = [exc.submit(get_acf, *_args, **_kwargs) for _args in _all_args] 163 | wait(ftrs, return_when='FIRST_EXCEPTION'); 164 | # for ftr in ftrs: 165 | # if ftr.exception() is not None: 166 | # raise ftr.exception() 167 | res = [f.result() for f in ftrs] 168 | return (np.stack(stk) for stk in zip(*res)) 169 | 170 | sz = roi_length * oversample 171 | rng = output_radius * oversample 172 | if rng > sz//2-1: 173 | raise ValueError(f'Maximum output range is {roi_length//2-1}.') 174 | 175 | if px_ang is not None: 176 | t_par = (x[:npk]**2 + y[:npk]**2)**.5 * px_ang 177 | acorr = 2*np.sin(np.arctan(t_par)/2) / t_par 178 | else: 179 | acorr = 1 180 | 181 | pkx = (oversample * acorr * x[:npk]).round().astype(int) + sz//2 182 | pky = (oversample * acorr * y[:npk]).round().astype(int) + sz//2 183 | pkI = None if I is None else I[:npk] 184 | 185 | valid = (pkx >= 0) & (pkx < sz) & (pky >= 0) & (pky < sz) 186 | pkx, pky, pkI = pkx[valid], pky[valid], 1 if I is None else pkI[valid] 187 | dense = np.zeros((sz, sz), dtype=np.float if I is None else np.uint8) 188 | dense[pky, pkx] = pkI if I is not None else 1 189 | # print(f'{dense.shape}, {rng}, {sz}') 190 | acf = fft.ifft2(np.abs(fft.fft2(dense))**2) 191 | acf = fft.ifftshift(acf).real 192 | if I is None: 193 | # if no intensities were given, the result is (should be) 194 | # integer, up to numerical noise 195 | acf = acf.round().astype(np.uint8) 196 | if acf[sz//2, sz//2] != sum(valid): 197 | warn(f'Autocorrelation center pixel ({acf[sz//2, sz//2]}) does not equal the peak number ({sum(valid)})!') 198 | acf[sz//2, sz//2] = 0 # remove self-correlation (which will be equal to the peak number) 199 | if radial: 200 | rad = proc2d.radial_proj(acf, min_size=rng, max_size=rng, 201 | my_func=np.sum, x0=sz//2, y0=sz//2) 202 | else: 203 | rad = None 204 | 205 | return acf[sz//2-rng:sz//2+rng, sz//2-rng:sz//2+rng], rad 206 | 207 | 208 | def get_pk_data(n_pk: np.ndarray, pk_x: np.ndarray, pk_y: np.ndarray, 209 | ctr_x: np.ndarray, ctr_y: np.ndarray, pk_I: Optional[np.ndarray] = None, 210 | opts: Optional[PreProcOpts] = None, 211 | peakmask=None, return_vec=True, pxs=None, 212 | clen=None, wl=None, el_rat=None, el_ang=None): 213 | 214 | if peakmask is None: 215 | peakmask = np.ones_like(pk_x, dtype=np.float) 216 | for N, row in zip(n_pk, peakmask): 217 | row[N:] = np.nan 218 | 219 | if opts is not None: 220 | pxs = opts.pixel_size if pxs is None else pxs 221 | clen = opts.cam_length if clen is None else clen 222 | wl = opts.wavelength if wl is None else wl 223 | el_rat = opts.ellipse_ratio if el_rat is None else el_rat 224 | el_ang = opts.ellipse_angle if el_ang is None else el_ang 225 | 226 | # assert (np.nansum(peakmask, axis=1) == n_pk).all() 227 | pk_xr, pk_yr = pk_x - ctr_x.reshape(-1,1), pk_y - ctr_y.reshape(-1,1) 228 | pk_xr, pk_yr = pk_xr * peakmask, pk_yr * peakmask 229 | 230 | # ellipticity correction 231 | if el_rat is not None and (el_rat != 1): 232 | c, s = np.cos(np.pi/180*el_ang), np.sin(np.pi/180*el_ang) 233 | pk_xrc, pk_yrc = 1/el_rat**.5*(c*pk_xr - s*pk_yr), el_rat**.5*(s*pk_xr + c*pk_yr) 234 | pk_xrc, pk_yrc = c*pk_xrc + s*pk_yrc, - s*pk_xrc + c*pk_yrc 235 | else: 236 | pk_xrc, pk_yrc = pk_xr, pk_yr 237 | 238 | res = {'peakXPosRaw': pk_x, 'peakYPosRaw': pk_y, 239 | 'peakXPosRel': pk_xr, 'peakYPosRel': pk_yr, 240 | 'peakXPosCor': pk_xrc, 'peakYPosCor': pk_yrc, 241 | 'nPeaks': n_pk} 242 | 243 | if pk_I is not None: 244 | res['peakTotalIntensity'] = pk_I 245 | 246 | if return_vec: 247 | if (pxs is None) or (clen is None) or (wl is None): 248 | raise ValueError('Cannot return angle parameters without pxs, clen, wl.') 249 | pk_r = (pk_xrc**2 + pk_yrc**2)**.5 250 | pk_tt = np.arctan(pxs * pk_r / clen) 251 | pk_az = np.arctan2(pk_yrc, pk_xrc) 252 | pk_d = wl/(2*np.sin(pk_tt/2)) 253 | res.update({'peakTwoTheta': pk_tt, 'peakAzimuth': pk_az, 'peakD': pk_d}) 254 | 255 | return res 256 | 257 | class Cell(object): 258 | """ 259 | Partially taken from the PyFAI package, with some simplifications 260 | and speed enhancements for d-spacing calculation, as well as a 261 | new refinement function. 262 | 263 | Calculates d-spacings and cell volume as described in: 264 | http://geoweb3.princeton.edu/research/MineralPhy/xtalgeometry.pdf 265 | """ 266 | lattices = ["cubic", "tetragonal", "hexagonal", "rhombohedral", 267 | "orthorhombic", "monoclinic", "triclinic"] 268 | ctr_types = {"P": "Primitive", 269 | "I": "Body centered", 270 | "F": "Face centered", 271 | "C": "Side centered", 272 | "R": "Rhombohedral"} 273 | 274 | def __init__(self, a=1, b=1, c=1, alpha=90, beta=90, gamma=90, 275 | lattice_type="triclinic", centering="P", 276 | unique_axis="c", d_min=2): 277 | """Constructor of the Cell class: 278 | 279 | Crystallographic units are Angstrom for distances and degrees for angles 280 | 281 | :param a,b,c: unit cell length in Angstrom 282 | :param alpha, beta, gamma: unit cell angle in degrees 283 | :param lattice: "cubic", "tetragonal", "hexagonal", "rhombohedral", "orthorhombic", "monoclinic", "triclinic" 284 | :param lattice_type: P, I, F, C or R 285 | """ 286 | self.a = a 287 | self.b = b 288 | self.c = c 289 | self.alpha = alpha 290 | self.beta = beta 291 | self.gamma = gamma 292 | self.lattice_type = lattice_type if lattice_type in self.lattices else "triclinic" 293 | self.unique_axis = unique_axis 294 | self._volume = None 295 | self.selection_rules = [] 296 | "contains a list of functions returning True(allowed)/False(forbidden)/None(unknown)" 297 | self.centering = centering 298 | self.hkl = None 299 | self._d_min = d_min 300 | self.init_hkl(d_min) 301 | 302 | def __repr__(self, *args, **kwargs): 303 | return "%s %s cell (unique %s) a=%.4f b=%.4f c=%.4f alpha=%.3f beta=%.3f gamma=%.3f" % \ 304 | (self.ctr_types[self.centering], self.lattice_type, self.unique_axis, 305 | self.a, self.b, self.c, self.alpha, self.beta, self.gamma) 306 | 307 | @classmethod 308 | def cubic(cls, a, centering="P"): 309 | """Factory for cubic lattice_types 310 | 311 | :param a: unit cell length 312 | """ 313 | a = float(a) 314 | self = cls(a, a, a, 90, 90, 90, 315 | lattice_type="cubic", centering=centering) 316 | return self 317 | 318 | @classmethod 319 | def tetragonal(cls, a, c, centering="P"): 320 | """Factory for tetragonal lattice_types 321 | 322 | :param a: unit cell length 323 | :param c: unit cell length 324 | """ 325 | a = float(a) 326 | self = cls(a, a, float(c), 90, 90, 90, 327 | lattice_type="tetragonal", centering=centering) 328 | return self 329 | 330 | @classmethod 331 | def orthorhombic(cls, a, b, c, centering="P"): 332 | """Factory for orthorhombic lattice_types 333 | 334 | :param a: unit cell length 335 | :param b: unit cell length 336 | :param c: unit cell length 337 | """ 338 | self = cls(float(a), float(b), float(c), 90, 90, 90, 339 | lattice_type="orthorhombic", centering=centering) 340 | return self 341 | 342 | @classmethod 343 | def hexagonal(cls, a, c, centering="P"): 344 | """Factory for hexagonal lattice_types 345 | 346 | :param a: unit cell length 347 | :param c: unit cell length 348 | """ 349 | a = float(a) 350 | self = cls(a, a, float(c), 90, 90, 120, 351 | lattice_type="hexagonal", centering=centering) 352 | return self 353 | 354 | @classmethod 355 | def monoclinic(cls, a, b, c, beta, centering="P"): 356 | """Factory for hexagonal lattice_types 357 | 358 | :param a: unit cell length 359 | :param b: unit cell length 360 | :param c: unit cell length 361 | :param beta: unit cell angle 362 | """ 363 | self = cls(float(a), float(b), float(c), 90, float(beta), 90, 364 | centering=centering, lattice_type="monoclinic", 365 | unique_axis='b') 366 | return self 367 | 368 | @classmethod 369 | def rhombohedral(cls, a, alpha, centering="P"): 370 | """Factory for hexagonal lattice_types 371 | 372 | :param a: unit cell length 373 | :param alpha: unit cell angle 374 | """ 375 | a = float(a) 376 | alpha = float(a) 377 | self = cls(a, a, a, alpha, alpha, alpha, 378 | lattice_type="rhombohedral", centering=centering) 379 | return self 380 | 381 | @classmethod 382 | def diamond(cls, a): 383 | """Factory for Diamond type FCC like Si and Ge 384 | 385 | :param a: unit cell length 386 | """ 387 | self = cls.cubic(a, centering="F") 388 | self.selection_rules.append(lambda h, k, l: not((h % 2 == 0) and (k % 2 == 0) and (l % 2 == 0) and ((h + k + l) % 4 != 0))) 389 | return self 390 | 391 | @classmethod 392 | def triclinic(cls, a, b, c, alpha, beta, gamma, centering="P"): 393 | a, b, c, alpha, beta, gamma = (float(p) for p in [a, b, c, alpha, beta, gamma]) 394 | self = cls(a, b, c, alpha, beta, gamma, lattice_type='triclinic', centering='P') 395 | return self 396 | 397 | @property 398 | def volume(self): 399 | if self._volume is None: 400 | self._volume = self.a * self.b * self.c 401 | if self.lattice_type not in ["cubic", "tetragonal", "orthorhombic"]: 402 | cosa = np.cos(self.alpha * np.pi / 180.) 403 | cosb = np.cos(self.beta * np.pi / 180.) 404 | cosg = np.cos(self.gamma * np.pi / 180.) 405 | self._volume *= np.sqrt(1 - cosa ** 2 - cosb ** 2 - cosg ** 2 406 | + 2 * cosa * cosb * cosg) 407 | return self._volume 408 | 409 | @property 410 | def centering(self): 411 | return self._centering 412 | 413 | @centering.setter 414 | def centering(self, centering): 415 | self._centering = centering if centering in self.ctr_types else "P" 416 | self.selection_rules = [lambda h, k, l: ~((h == 0) & (k == 0) & (l == 0))] 417 | if self._centering == "I": 418 | self.selection_rules.append(lambda h, k, l: (h + k + l) % 2 == 0) 419 | if self._centering == "F": 420 | self.selection_rules.append(lambda h, k, l: np.isin(h % 2 + k % 2 + l % 2, (0, 3))) 421 | if self._centering == "R": 422 | self.selection_rules.append(lambda h, k, l: ((h - k + l) % 3 == 0)) 423 | if self._centering == "C": 424 | self.selection_rules.append(lambda h, k, l: ((h + k) % 2 == 0)) 425 | 426 | def init_hkl(self, d_min: float = 5.): 427 | """Sets up a grid with valid Miller indices for this lattice. 428 | Useful to pre-compute the indices before running any optimization, 429 | which speeds up the computation. 430 | 431 | Args: 432 | d_min (float, optional): Minimum d-spacing, in A. Defaults to 5. 433 | """ 434 | hmax = int(np.ceil(self.a / d_min)) 435 | kmax = int(np.ceil(self.b / d_min)) 436 | lmax = int(np.ceil(self.c / d_min)) 437 | hkl = np.mgrid[-hmax:hmax+1, -kmax:kmax+1, -lmax:lmax+1] 438 | valid = np.stack([r(*hkl) for r in self.selection_rules], axis=0).all(axis=0) 439 | self.hkl = tuple(H[valid].ravel() for H in hkl) 440 | d = self.d(d_min=None) 441 | self.hkl = tuple(H[d >= d_min] for H in self.hkl) 442 | self._d_min = d_min 443 | 444 | def d(self, d_min=None, unique=False, a=None, b=None, c=None, 445 | alpha=None, beta=None, gamma=None): 446 | """Calculates d-spacings for the cell. Cell parameters can 447 | transiently be changed, which does *not* affect the values 448 | stored with the cell object. This is useful in the context 449 | of optimization. 450 | 451 | Args: 452 | d_min (float, optional): Minimum d-spacing. If None, uses 453 | the stored value of the object which can be set using 454 | init_hkl. Leaving it at None significantly speeds 455 | up the computation, which is recommended for 456 | refinements. Defaults to None. 457 | unique (bool, optional): if True, only unique d-spacings 458 | are returned. Otherwise, all spacings are returned which 459 | are ordered the same way as in the object's hkl attribute. 460 | Defaults to False. 461 | a (float, optional): Temporary cell length. Defaults to None. 462 | b (float, optional): Temporary cell length. Defaults to None. 463 | c (float, optional): Temporary cell length. Defaults to None. 464 | alpha (float, optional): Temporary cell angle. Defaults to None. 465 | beta (float, optional): Temporary cell angle. Defaults to None. 466 | gamma (float, optional): Temporary cell angle. Defaults to None. 467 | 468 | Returns: 469 | np.array: Array of d-spacings 470 | """ 471 | 472 | 473 | a = self.a if a is None else a 474 | b = self.b if b is None else b 475 | c = self.c if c is None else c 476 | alpha = self.alpha if alpha is None else alpha 477 | beta = self.beta if beta is None else beta 478 | gamma = self.gamma if gamma is None else gamma 479 | 480 | if (d_min is not None) and (d_min != self._d_min): 481 | self.init_hkl(d_min) 482 | 483 | h, k, l = self.hkl 484 | 485 | if self.lattice_type in ["cubic", "tetragonal", "orthorhombic"]: 486 | invd2 = (h / a) ** 2 + (k / b) ** 2 + (l / c) ** 2 487 | else: 488 | cosa, sina = np.cos(alpha * np.pi / 180), np.sin(alpha * np.pi / 180) 489 | cosb, sinb = np.cos(beta * np.pi / 180), np.sin(beta * np.pi / 180) 490 | cosg, sing = np.cos(gamma * np.pi / 180), np.sin(gamma * np.pi / 180) 491 | S11 = (b * c * sina) ** 2 492 | S22 = (a * c * sinb) ** 2 493 | S33 = (a * b * sing) ** 2 494 | S12 = a * b * c * c * (cosa * cosb - cosg) 495 | S23 = a * a * b * c * (cosb * cosg - cosa) 496 | S13 = a * b * b * c * (cosg * cosa - cosb) 497 | 498 | invd2 = (S11 * h * h + 499 | S22 * k * k + 500 | S33 * l * l + 501 | 2 * S12 * h * k + 502 | 2 * S23 * k * l + 503 | 2 * S13 * h * l) 504 | invd2 /= (self.volume) ** 2 505 | 506 | return np.sqrt(1 / (np.unique(invd2) if unique else invd2)) 507 | 508 | d_spacing = d 509 | 510 | def export(self, filename='refined.cell'): 511 | from textwrap import dedent 512 | """Exports the cell to a CrystFEL cell file. 513 | 514 | Args: 515 | filename (str, optional): Cell file name. Defaults to 'refined.cell'. 516 | """ 517 | 518 | cellfile = dedent(f''' 519 | CrystFEL unit cell file version 1.0 520 | 521 | lattice_type = {self.lattice_type} 522 | centering = {self.centering} 523 | unique_axis = {self.unique_axis} 524 | 525 | a = {self.a:.3f} A 526 | b = {self.b:.3f} A 527 | c = {self.c:.3f} A 528 | 529 | al = {self.alpha:.2f} deg 530 | be = {self.beta:.2f} deg 531 | ga = {self.gamma:.2f} deg 532 | ''').strip() 533 | 534 | with open(filename, 'w') as fh: 535 | fh.write(cellfile) 536 | 537 | def refine_powder(self, svec, pattern, method='distance', 538 | fill=0.1, min_prom=0., min_height=0., 539 | weights='prom', length_bound=2., angle_bound=3., 540 | **kwargs): 541 | """Refine unit cell parameters against a powder pattern. 542 | The refinement is done using a least-squares fit, where you can 543 | pick three different cost functions: 544 | 545 | * 'distance': the positions of the peaks in the powder pattern 546 | are detected. For each peak, the distance to the closest 547 | d-spacing is computed. 548 | * 'xcorr': the inverse values of the powder pattern at the 549 | d-spacings are computed. 550 | * 'derivative': the derivative of the powder pattern at the 551 | d-spacings are computed 552 | 553 | Depending on the chosen method, further parameters can be set. 554 | The function returns a new Cell object with refined parameters, and 555 | a structure with some useful information. 556 | 557 | Args: 558 | svec (np.ndarray): scattering vector (x-axis) of the powder pattern, 559 | expressed in inverse nanometer (not angstrom - following 560 | CrystFEL convention). 561 | pattern (np.ndarray): powder pattern at values svec (y-axis) 562 | method (str, optional): Cost function. See description. 563 | Defaults to 'distance'. 564 | fill (float, optional): Fill value for out-of-range or zero-count 565 | s-vectors if method is 'derivative' or 'xcorr'. Defaults to 0.1. 566 | min_prom (float, optional): Minimum prominence of peaks (that is, 567 | height relative to its vicinity) if method is 'distance'. Increase 568 | if too many small peaks are spuriously detected. Usually it is 569 | a good idea. Defaults to 0. 570 | min_height (float, optional): Minimum peak height to be detected. 571 | Usually min_prom is the better parameter. Defaults to 0. 572 | weights (str, optional): Weights of the peaks for the least-squares 573 | optimization if method is 'derivative'. Can be 'prom' or 'height'. 574 | Defaults to 'prom'. 575 | length_bound (float, optional): Bound range for cell lengths, in A. 576 | Defaults to 2. 577 | angle_bound (float, optional): Bound range for cell angles. Defaults to 3. 578 | **kwargs: Further arguments will be passed on to scipy.least_sqaures 579 | 580 | Returns: 581 | tuple: 2-Tuple of a new Cell object with the refined parameters, and 582 | a structure with useful information from the optimization, including 583 | the peak positions and heights if method was 'distance'. 584 | """ 585 | 586 | from scipy.interpolate import interp1d 587 | from scipy.optimize import least_squares 588 | 589 | # find out which parameters should be optimized 590 | if self.lattice_type == 'triclinic': 591 | parameters = ['a', 'b', 'c', 'alpha', 'beta', 'gamma'] 592 | elif self.lattice_type == 'monoclinic': 593 | parameters = ['a', 'b', 'c', 'beta'] 594 | elif self.lattice_type == 'orthorhombic': 595 | parameters = ['a', 'b', 'c'] 596 | elif self.lattice_type == 'tetragonal': 597 | parameters = ['a', 'c'] 598 | elif self.lattice_type == 'cubic': 599 | parameters = ['a'] 600 | elif self.lattice_type == 'hexagonal': 601 | parameters = ['a', 'c'] 602 | elif self.lattice_type == 'rhombohedral': 603 | parameters = ['a', 'alpha'] 604 | else: 605 | raise Exception(f'This should not happen (lattice type is set to {self.lattice_type}). Yell at Robert.') 606 | 607 | _, unique_pos = np.unique(self.d(), return_index=True) # unique d-spacings 608 | p0 = [getattr(self, cpar) for cpar in parameters] 609 | dsp = lambda p: self.d(**{cpar: p[ii] for ii, cpar in enumerate(parameters)})[unique_pos] 610 | bounds = ([getattr(self, cpar) - (length_bound if cpar in 'abc' else angle_bound) 611 | for cpar in parameters], 612 | [getattr(self, cpar) + (length_bound if cpar in 'abc' else angle_bound) 613 | for cpar in parameters]) 614 | 615 | if method == 'xcorr': 616 | cost_profile = interp1d(svec, 1/np.where(pattern != 0, pattern, fill), 617 | bounds_error=False, fill_value=fill) 618 | cost = lambda p: cost_profile(10/dsp(p)) 619 | pk_pos = pk_height = pk_prom = [] 620 | 621 | elif method == 'derivative': 622 | cost_profile = interp1d(svec[1:]/2+svec[:-1]/2, np.diff(pattern), 623 | bounds_error=False, fill_value=0) 624 | cost = lambda p: cost_profile(10/dsp(p)) 625 | pk_pos = pk_height = pk_prom = [] 626 | 627 | elif method == 'distance': 628 | from scipy.signal import find_peaks 629 | lim = 0.95 * 10/dsp(p0).min() 630 | pkdat = find_peaks(pattern[svec < lim], height=min_height, prominence=min_prom) 631 | pk_pos = svec[pkdat[0]] 632 | pk_height = pkdat[1]['peak_heights'] 633 | pk_prom = pkdat[1]['prominences'] 634 | w = pk_prom if weights == 'prom' else 1 635 | w = pk_height if weights == 'height' else 1 636 | cost = lambda p: 100*w * (np.abs(10/dsp(p).reshape(1,-1) 637 | - pk_pos.reshape(-1,1))).min(axis=1) 638 | 639 | else: 640 | raise ValueError(f'Unknown refinement method {method}') 641 | 642 | cost_init = 0.5 * (cost(p0)**2).sum() 643 | lsq = least_squares(cost, p0, bounds=bounds, **kwargs) 644 | 645 | # return a new cell with the optimized parameters 646 | C_ref = getattr(self, self.lattice_type)(centering=self._centering, 647 | **{cpar: lsq.x[ii] for ii, cpar in enumerate(parameters)}) 648 | C_ref.selection_rulse = self.selection_rules 649 | C_ref.init_hkl(self._d_min) 650 | 651 | info = {'lsq_result': lsq, 652 | 'initial_cost': cost_init} 653 | if method == 'distance': 654 | info.update({'peak_position': pk_pos, 655 | 'peak_height': pk_height, 656 | 'peak_prominence': pk_prom}) 657 | 658 | if not lsq.success: 659 | warn('Powder refinement did not converge!') 660 | 661 | return C_ref, info 662 | -------------------------------------------------------------------------------- /diffractem/quick_proc.py: -------------------------------------------------------------------------------- 1 | import hdf5plugin # required to access LZ4-encoded HDF5 data sets 2 | from diffractem import version, proc2d, pre_proc_opts, nexus, io 3 | from diffractem.dataset import Dataset 4 | from tifffile import imread 5 | import numpy as np 6 | import os 7 | import dask.array as da 8 | import h5py 9 | from dask.distributed import Client, LocalCluster, Lock 10 | import dask 11 | import argparse 12 | import subprocess 13 | import pandas as pd 14 | import random 15 | from warnings import warn 16 | from time import sleep 17 | 18 | def _fast_correct(*args, data_key='/%/data/corrected', 19 | shots_grp='/%/shots', 20 | peaks_grp='/%/data', **kwargs): 21 | 22 | imgs, info = proc2d.analyze_and_correct(*args, **kwargs) 23 | store_dat = {shots_grp + '/' + k: v for k, v in info.items() if k != 'peak_data'} 24 | store_dat.update({peaks_grp + '/' + k: v for k, v in info['peak_data'].items()}) 25 | store_dat[data_key] = imgs 26 | 27 | return store_dat 28 | 29 | def quick_proc(ds, opts, label_raw, label, client, reference=None, pxmask=None): 30 | 31 | reference = imread(opts.reference) if reference is None else reference 32 | pxmask = imread(opts.pxmask) if pxmask is None else pxmask 33 | 34 | stack = ds.stacks[label_raw] 35 | # stk_del = ds.stacks['label_raw'].to_delayed().ravel() 36 | 37 | # get array names and shapes by correcting a single image (the last one) 38 | sample_res = _fast_correct(stack[-1:,...].compute(scheduler='threading'), 39 | opts=opts, 40 | data_key=ds.data_pattern + '/' + label, 41 | shots_grp=ds.shots_pattern, 42 | peaks_grp=ds.data_pattern) 43 | 44 | # print({k: v.dtype for k, v in sample_res.items()}) 45 | 46 | # initialize file structure 47 | for (file, subset), grp in ds.shots.groupby(['file', 'subset']): 48 | with h5py.File(file, 'a') as fh: 49 | for pattern, data in sample_res.items(): 50 | path = pattern.replace('%', subset) 51 | # print('Initializing', file, path) 52 | fh.require_dataset(path, 53 | shape=(len(grp),) + data.shape[1:], 54 | dtype=data.dtype, 55 | chunks=(1,) + data.shape[1:], 56 | compression=opts.compression) 57 | fh[ds.data_pattern.replace('%', subset)].attrs['signal'] = label 58 | 59 | # array of integers corresponding to the chunk number 60 | chunk_label = np.concatenate([np.repeat(ii, cs) 61 | for ii, cs in enumerate(stack.chunks[0])]) 62 | 63 | # delay objects returning the image and info dictionary 64 | cmp_del = [dask.delayed(_fast_correct)(raw_chk, opts) 65 | for raw_chk in ds.raw_counts.to_delayed().ravel()] 66 | 67 | # file lock objects 68 | locks = {fn: Lock() for fn in ds.files} 69 | 70 | # make delay objects for writing results to file (= maximum side effects!) 71 | dels = [] 72 | for chks, (cl, sht) in zip(cmp_del, ds.shots.groupby(chunk_label)): 73 | assert len(sht.drop_duplicates(['file','subset'])) == 1 74 | ii_to = sht.shot_in_subset.values 75 | dels.append(dask.delayed(nexus._save_single_chunk_multi)(chks, 76 | file=sht.file.values[0], 77 | subset=sht.subset.values[0], 78 | idcs=ii_to, 79 | lock=locks[sht.file.values[0]] 80 | )) 81 | 82 | # random.shuffle(dels) # shuffling tasks to minimize concurrent file access 83 | chunk_info = client.compute(dels, sync=True) 84 | return pd.DataFrame(chunk_info, columns=['file', 'subset', 'path', 'shot_in_subset']) 85 | 86 | def main(): 87 | 88 | parser = argparse.ArgumentParser(description='Quick and dirty pre-processing for Serial Electron Diffraction data', 89 | allow_abbrev=False, epilog='Any other options are passed on as modification to the option file') 90 | parser.add_argument('filename', type=str, nargs='*', help='List or HDF5 file or glob pattern. Glob pattern must be given in SINGLE quotes.') 91 | parser.add_argument('-s', '--settings', type=str, help='Option YAML file. Defaults to \'preproc.yaml\'.', default='preproc.yaml') 92 | parser.add_argument('-A', '--address', type=str, help='Address of an existing dask.distributed cluster to use instead of making a new one. Defaults to making a new one.', default=None) 93 | parser.add_argument('-N', '--nprocs', type=int, help='Number of processes of a new dask.distributed cluster. Defaults to letting dask decide.', default=None) 94 | parser.add_argument('-L', '--local-directory', type=str, help='Fast (scratch) directory for computations. Defaults to the current directory.', default=None) 95 | parser.add_argument('-c', '--chunksize', type=int, help='Chunk size of raw data stack. Should be integer multiple of movie stack frames! Defaults to 100.', default=100) 96 | parser.add_argument('-l', '--list-file', type=str, help='Name of output list file', default='processed.lst') 97 | parser.add_argument('-w', '--wait-for-files', help='Wait for files matching wildcard pattern', action='store_true') 98 | parser.add_argument('--include-existing', help='When using -w/--wait-for-file, also include existing files', action='store_true') 99 | parser.add_argument('--append', help='Append to list instead of overwrite', action='store_true') 100 | parser.add_argument('-d', '--data-path-old', type=str, help='Raw data field in HDF5 file(s). Defaults to /entry/data/raw_data', default='/%/data/raw_counts') 101 | parser.add_argument('-n', '--data-path-new', type=str, help='Corrected data field in HDF5 file(s). Defaults to /entry/data/corrected', default='/%/data/corrected') 102 | parser.add_argument('--no-bgcorr', help='Skip background correction', action='store_true') 103 | parser.add_argument('--no-validate', help='Do not validate files before attempting to process', action='store_true') 104 | # parser.add_argument('ppopt', nargs=argparse.REMAINDER, help='Preprocessing options to be overriden') 105 | 106 | args, extra = parser.parse_known_args() 107 | # print(args, extra) 108 | # raise RuntimeError('thus far!') 109 | opts = pre_proc_opts.PreProcOpts(args.settings) 110 | 111 | label_raw = args.data_path_old.rsplit('/', 1)[-1] 112 | label = args.data_path_new.rsplit('/', 1)[-1] 113 | 114 | if extra: 115 | # If extra arguments have been supplied, overwrite existing values 116 | opt_parser = argparse.ArgumentParser() 117 | for k, v in opts.__dict__.items(): 118 | opt_parser.add_argument('--' + k, type=type(v), default=None) 119 | opts2 = opt_parser.parse_args(extra) 120 | 121 | for k, v in vars(opts2).items(): 122 | if v is not None: 123 | if type(v) != type(opts.__dict__[k]): 124 | warn('Mismatch of data types in overriden argument!', RuntimeWarning) 125 | print(f'Overriding option file setting {k} = {opts.__dict__[k]} ({type(opts.__dict__[k])}). ', 126 | f'New value is {v} ({type(v)})') 127 | opts.__dict__[k] = v 128 | 129 | # raise RuntimeError('thus far!') 130 | print(f'Running on diffractem:', version()) 131 | print(f'Current path is:', os.getcwd()) 132 | 133 | # client = Client() 134 | if args.address is not None: 135 | print('Connecting to cluster scheduler at', args.address) 136 | 137 | try: 138 | client = Client(address=args.address, timeout=3) 139 | except: 140 | print(f'\n----\nThere seems to be no dask.distributed scheduler running at {args.address}.\n' 141 | f'Please double-check or start one by either omitting the --address option.') 142 | return 143 | else: 144 | print('Creating a dask.distributed cluster...') 145 | client = Client(n_workers=args.nprocs, local_directory=args.local_directory, processes=True) 146 | print('\n\n---\nStarted dask.distributed cluster:') 147 | print(client) 148 | print('You can access the dashboard for monitoring at: ', client.dashboard_link) 149 | 150 | 151 | client.run(os.chdir, os.getcwd()) 152 | 153 | if len(args.filename) == 1: 154 | args.filename = args.filename[0] 155 | 156 | # print(args.filename) 157 | seen_raw_files = [] if args.include_existing else io.expand_files(args.filename) 158 | 159 | while True: 160 | 161 | if args.wait_for_files: 162 | 163 | # slightly awkward sequence to only open finished files... (but believe me - it works!) 164 | 165 | fns = io.expand_files(args.filename) 166 | # print(fns) 167 | fns = [fn for fn in fns if fn not in seen_raw_files] 168 | # validation... 169 | try: 170 | fns = io.expand_files(fns, validate=not args.no_validate) 171 | except (OSError, IOError, RuntimeError) as err: 172 | print(f'Could not open file(s) {" ".join(fns)} because of', err) 173 | print('Possibly, it is still being written to. Waiting a bit...') 174 | sleep(5) 175 | continue 176 | 177 | if not fns: 178 | print('No new files, waiting a bit...') 179 | sleep(5) 180 | continue 181 | else: 182 | print(f'Found new files(s):\n', '\n'.join(fns)) 183 | try: 184 | ds_raw = Dataset.from_files(fns, chunking=args.chunksize) 185 | except Exception as err: 186 | print(f'Could not open file(s) {" ".join(fns)} because of', err) 187 | print('Possibly, it is still being written to. Waiting a bit...') 188 | sleep(5) 189 | continue 190 | 191 | else: 192 | fns = io.expand_files(args.filename, validate=not args.no_validate) 193 | if fns: 194 | ds_raw = Dataset.from_files(fns, chunking=args.chunksize) 195 | else: 196 | print(f'\n---\n\nFile(s) {args.filename} not found or (all of them) invalid.') 197 | return 198 | 199 | seen_raw_files.extend(ds_raw.files) 200 | 201 | print('---- Have dataset ----') 202 | print(ds_raw) 203 | 204 | # delete undesired stacks 205 | delstacks = [sn for sn in ds_raw.stacks.keys() if sn != args.data_path_old.rsplit('/', 1)[-1]] 206 | for sn in delstacks: 207 | ds_raw.delete_stack(sn) 208 | 209 | if opts.aggregate: 210 | print('---- Aggregating raw data ----') 211 | ds_compute = ds_raw.aggregate(query=opts.agg_query, 212 | by=['sample', 'region', 'run', 'crystal_id'], 213 | how='sum', new_folder=opts.proc_dir, 214 | file_suffix=opts.agg_file_suffix) 215 | else: 216 | ds_compute = ds_raw.get_selection(query=opts.select_query, 217 | file_suffix=opts.agg_file_suffix) 218 | 219 | print('Initializing data files...') 220 | os.makedirs(opts.proc_dir, exist_ok=True) 221 | ds_compute.init_files(overwrite=True) 222 | 223 | print('Storing meta tables...') 224 | ds_compute.store_tables(shots=True, features=True) 225 | 226 | print(f'Processing diffraction data... monitor progress at {client.dashboard_link} (or forward port if remote)') 227 | chunk_info = quick_proc(ds_compute, opts, label_raw, label, client) 228 | 229 | # make sure that the calculation went consistent with the data set 230 | for (sh, sh_grp), (ch, ch_grp) in zip(ds_compute.shots.groupby(['file', 'subset']), chunk_info.groupby(['file', 'subset'])): 231 | if any(sh_grp.shot_in_subset.values != np.sort(np.concatenate(ch_grp.shot_in_subset.values))): 232 | raise ValueError(f'Incosistency between calculated data and shot list in {sh[0]}: {sh[1]} found. Please investigate.') 233 | 234 | ds_compute.write_list(args.list_file, append = args.append) 235 | 236 | print(f'Computation done. Processed files are in {args.list_file}') 237 | 238 | if not args.wait_for_files: 239 | break 240 | 241 | if __name__ == '__main__': 242 | main() -------------------------------------------------------------------------------- /diffractem/stream2sol.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Create solution file for '--indexing=file' from a stream 4 | # 5 | # Copyright © 2020-2021 Max-Planck-Gesellschaft 6 | # zur Förderung der Wissenschaften e.V. 7 | # Copyright © 2021 Deutsches Elektronen-Synchrotron DESY, 8 | # a research centre of the Helmholtz Association. 9 | # 10 | # Authors: 11 | # 2020 Robert Bücker 12 | # 2021 Thomas White 13 | 14 | from io import StringIO 15 | import re 16 | from warnings import warn 17 | from copy import deepcopy 18 | 19 | BEGIN_GEOM = '----- Begin geometry file -----' 20 | END_GEOM = '----- End geometry file -----' 21 | BEGIN_CELL = '----- Begin unit cell -----' 22 | END_CELL = '----- End unit cell -----' 23 | BEGIN_CHUNK = '----- Begin chunk -----' 24 | END_CHUNK = '----- End chunk -----' 25 | BEGIN_CRYSTAL = '--- Begin crystal' 26 | END_CRYSTAL = '--- End crystal' 27 | BEGIN_PEAKS = 'Peaks from peak search' 28 | END_PEAKS = 'End of peak list' 29 | BEGIN_REFLECTIONS = 'Reflections measured after indexing' 30 | END_REFLECTIONS = 'End of reflections' 31 | HEAD = 'CrystFEL stream format {}.{}'.format(2, 3) 32 | GENERATOR = 'Generated by diffractem StreamParser' 33 | PEAK_COLUMNS = ['fs/px', 'ss/px', '(1/d)/nm^-1', 'Intensity', 'Panel'] 34 | REFLECTION_COLUMNS = ['h', 'k', 'l', 'I', 'Sigma(I)', 'Peak', 'Background', 'fs/px', 'ss/px', 'Panel'] 35 | ID_FIELDS = ['file', 'Event', 'serial'] 36 | CRYSTAL_DATA_FIELS = ['astar', 'bstar', 'cstar', 'predict_refine/det_shift', 37 | 'profile_radius', 'diffraction_resolution_limit'] 38 | 39 | args = None 40 | class Crystal: 41 | 42 | def __init__(self, line): 43 | self.astar = (None, None, None) 44 | self.bstar = (None, None, None) 45 | self.cstar = (None, None, None) 46 | self.lattice_type = None 47 | self.centering = None 48 | self.unique_axis = None 49 | self.det_shift = (None, None) 50 | self.start_line = line 51 | 52 | @property 53 | def initialized(self): 54 | global legacy 55 | required_fields = [*self.astar, *self.bstar, *self.cstar, 56 | *self.det_shift] 57 | if not legacy: 58 | required_fields += [self.lattice_type, self.centering] 59 | return all([x is not None for x in required_fields]) 60 | 61 | @property 62 | def lattice_type_sym(self): 63 | if self.lattice_type == 'triclinic': 64 | return 'a' + self.centering 65 | elif self.lattice_type == 'monoclinic': 66 | return 'm' + self.centering + self.unique_axis 67 | elif self.lattice_type == 'orthorhombic': 68 | return 'o' + self.centering 69 | elif self.lattice_type == 'tetragonal': 70 | return 't' + self.centering + self.unique_axis 71 | elif self.lattice_type == 'cubic': 72 | return 'c' + self.centering 73 | elif self.lattice_type == 'hexagonal': 74 | return 'h' + self.centering + self.unique_axis 75 | elif self.lattice_type == 'rhombohedral': 76 | return 'r' + self.centering 77 | else: 78 | warn('Invalid lattice type {}'.format(self.lattice_type)) 79 | return 'invalid' 80 | 81 | def __str__(self): 82 | global legacy 83 | if not self.initialized: 84 | warn('Trying to get string from non-initialized crystal from line {}.'.format(self.start_line)) 85 | return None 86 | else: 87 | cs = ' '.join(['{0[0]} {0[1]} {0[2]}'.format(vec) 88 | for vec in [self.astar, self.bstar, self.cstar]]) 89 | cs += ' {0[0]} {0[1]}'.format(self.det_shift) 90 | if not legacy: 91 | cs += ' ' + self.lattice_type_sym 92 | return cs 93 | 94 | class Chunk: 95 | 96 | def __init__(self, line): 97 | self.file = None 98 | self.Event = None 99 | self.crystals = [] 100 | self.start_line = line 101 | self.x_shift = 0 102 | self.y_shift = 0 103 | 104 | @property 105 | def n_cryst(self): 106 | return len(self.crystals) 107 | 108 | @property 109 | def initialized(self): 110 | return (self.file is not None) and (self.Event is not None) 111 | 112 | def add_crystal(self, crystal): 113 | if (not crystal.initialized) or (crystal is None): 114 | raise RuntimeError('Trying to add non-initialied crystal to chunk from line {}.'.format(self.start_line)) 115 | self.crystals.append(deepcopy(crystal)) 116 | # print(crystal) 117 | 118 | def __str__(self): 119 | if not self.initialized: 120 | warn('Trying to get string from non-initialized chunk from line {}.'.format(self.start_line)) 121 | return None 122 | else: 123 | # return '\n'.join([' '.join([self.file, *self.Event.split('//'), str(cryst)]) 124 | # for ii, cryst in enumerate(self.crystals)]) 125 | # new-style (not working yet) 126 | return '\n'.join([' '.join([self.file, self.Event, str(cryst)]) 127 | for ii, cryst in enumerate(self.crystals)]) 128 | 129 | 130 | def parse_stream(stream, sol=None, return_meta=True, 131 | file_label='Image filename', event_label='Event', 132 | x_shift_label=None, y_shift_label=None, omit_cell=False): 133 | 134 | global legacy 135 | legacy = omit_cell 136 | if legacy: 137 | print('Generating legacy solution file (omitting cell type). Do you really want that?') 138 | 139 | curr_chunk = None 140 | curr_cryst = None 141 | geom = '' 142 | cell = '' 143 | command = '' 144 | parsing_geom = False 145 | parsing_cell = False 146 | parsing_peaks = False 147 | have_cell = False 148 | have_geom = False 149 | have_command = False 150 | parsing_reflections = False 151 | parse_vec = lambda l: tuple(float(k) for k in re.findall(r'[+-]?\d*\.\d*', l)) 152 | 153 | with open(stream, 'r') as fh_in, (StringIO() if sol is None else open(sol,'w')) as fh_out: 154 | 155 | for ln, l in enumerate(fh_in): 156 | 157 | if parsing_reflections: 158 | if l.startswith(END_REFLECTIONS): 159 | parsing_reflections = False 160 | else: 161 | # here, any reflection parsing would go 162 | pass 163 | 164 | elif parsing_peaks: 165 | if l.startswith(END_PEAKS): 166 | parsing_peaks = False 167 | else: 168 | # here, any peak parsing would go 169 | pass 170 | 171 | elif l.startswith(BEGIN_CHUNK): 172 | curr_chunk = Chunk(ln) 173 | 174 | elif (curr_chunk is not None) and (curr_cryst is None): 175 | # parsing chunks (= events = shots) _outside_ crystals 176 | 177 | if l.startswith(END_CHUNK): 178 | if not curr_chunk.initialized: 179 | raise RuntimeError('Incomplete chunk found before line ' + str(ln)) 180 | if curr_chunk.n_cryst: 181 | fh_out.write(str(curr_chunk) + '\n') 182 | # print(str(curr_chunk)) 183 | curr_chunk = None 184 | 185 | elif l.startswith(file_label): 186 | curr_chunk.file = l.split(' ', 2)[-1].strip() 187 | 188 | elif l.startswith(event_label): 189 | curr_chunk.Event = l.split(' ')[-1].strip() 190 | 191 | elif x_shift_label and l.startswith(x_shift_label): 192 | curr_chunk.x_shift = float(l.split(' ')[-1].strip()) 193 | 194 | elif y_shift_label and l.startswith(y_shift_label): 195 | curr_chunk.y_shift = float(l.split(' ')[-1].strip()) 196 | 197 | elif l.startswith(BEGIN_CRYSTAL): 198 | if not curr_chunk.initialized: 199 | raise RuntimeError('Crystal for incomplete chunk in ' + str(ln)) 200 | curr_cryst = Crystal(ln) 201 | 202 | elif curr_cryst is not None: 203 | # parsing a (single) crystal 204 | 205 | if l.startswith(END_CRYSTAL): 206 | curr_chunk.add_crystal(curr_cryst) 207 | curr_cryst = None 208 | 209 | elif l.startswith('astar'): 210 | curr_cryst.astar = parse_vec(l) 211 | 212 | elif l.startswith('bstar'): 213 | curr_cryst.bstar = parse_vec(l) 214 | 215 | elif l.startswith('cstar'): 216 | curr_cryst.cstar = parse_vec(l) 217 | 218 | elif l.startswith('lattice_type'): 219 | curr_cryst.lattice_type = l.split(' ')[2].strip() 220 | 221 | elif l.startswith('centering'): 222 | curr_cryst.centering = l.split(' ')[2].strip() 223 | 224 | elif l.startswith('unique_axis'): 225 | curr_cryst.unique_axis = l.split(' ')[2].strip() 226 | 227 | elif l.startswith('predict_refine/det_shift'): 228 | curr_cryst.det_shift = parse_vec(l) 229 | curr_cryst.det_shift = (curr_cryst.det_shift[0] + curr_chunk.x_shift, 230 | curr_cryst.det_shift[1] + curr_chunk.y_shift) 231 | 232 | elif l.startswith(BEGIN_GEOM) and not have_geom: 233 | parsing_geom = True 234 | 235 | elif parsing_geom: 236 | if not l.startswith(END_GEOM): 237 | geom += l 238 | else: 239 | parsing_geom = False 240 | have_geom = True 241 | 242 | elif l.startswith(BEGIN_CELL) and not have_cell: 243 | parsing_cell = True 244 | 245 | elif parsing_cell: 246 | if not l.startswith(END_CELL): 247 | cell += l 248 | else: 249 | parsing_cell = False 250 | have_cell = True 251 | 252 | elif ('indexamajig' in l) and not have_command: 253 | command = l 254 | have_command = True 255 | 256 | elif l.startswith(BEGIN_PEAKS): 257 | parsing_peaks = True 258 | 259 | elif l.startswith(BEGIN_REFLECTIONS): 260 | parsing_reflections = True 261 | 262 | if sol is None: 263 | out = fh_out.getvalue() 264 | if return_meta: 265 | return out, (command, geom, cell) 266 | else: 267 | return out 268 | 269 | else: 270 | if return_meta: 271 | return command, geom, cell 272 | 273 | def main(): 274 | global args 275 | 276 | from argparse import ArgumentParser 277 | parser = ArgumentParser(description='Conversion tool from stream to solution file(s) for re-integration/-refinement.') 278 | 279 | parser.add_argument('-i', '--input', type=str, help='Input stream file', required=True) 280 | parser.add_argument('-o', '--output', type=str, help='Output solution file', required=True) 281 | parser.add_argument('-g', '--geometry-out', type=str, help='Output geometry file (optional)') 282 | parser.add_argument('-p', '--cell-out', type=str, help='Output cell file (optional)') 283 | parser.add_argument('-L', '--legacy', help='Legacy file format: omit cell info', action='store_true') 284 | parser.add_argument('--file-field', type=str, help='Field in chunks for image filename', default='Image filename') 285 | parser.add_argument('--event-field', type=str, help='Field in chunk for event identifier', default='Event') 286 | parser.add_argument('--x-shift-field', type=str, help='Field in chunk for x-shift identifier', default='') 287 | parser.add_argument('--y-shift-field', type=str, help='Field in chunk for y-shift identifier', default='') 288 | 289 | args = parser.parse_args() 290 | 291 | meta = parse_stream(args.input, args.output, return_meta=True, 292 | file_label=args.file_field, event_label=args.event_field, 293 | x_shift_label=args.x_shift_field, y_shift_label=args.y_shift_field, 294 | omit_cell=args.legacy) 295 | # print('Original indexamajig call was: \n' + meta[0]) 296 | if args.geometry_out: 297 | with open(args.geometry_out, 'w') as fh: 298 | fh.write(meta[1]) 299 | 300 | if args.cell_out: 301 | if not meta[1]: 302 | print('No cell found in stream file. Not writing cell file.') 303 | else: 304 | with open(args.cell_out, 'w') as fh: 305 | fh.write(meta[2]) 306 | 307 | if __name__ == '__main__': 308 | main() 309 | -------------------------------------------------------------------------------- /diffractem/stream_parser.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from io import StringIO 3 | import numpy as np 4 | import re 5 | from typing import Union, Optional 6 | 7 | BEGIN_GEOM = '----- Begin geometry file -----' 8 | END_GEOM = '----- End geometry file -----' 9 | BEGIN_CELL = '----- Begin unit cell -----' 10 | END_CELL = '----- End unit cell -----' 11 | BEGIN_CHUNK = '----- Begin chunk -----' 12 | END_CHUNK = '----- End chunk -----' 13 | BEGIN_CRYSTAL = '--- Begin crystal' 14 | END_CRYSTAL = '--- End crystal' 15 | BEGIN_PEAKS = 'Peaks from peak search' 16 | END_PEAKS = 'End of peak list' 17 | BEGIN_REFLECTIONS = 'Reflections measured after indexing' 18 | END_REFLECTIONS = 'End of reflections' 19 | HEAD = 'CrystFEL stream format {}.{}'.format(2, 3) 20 | GENERATOR = 'Generated by diffractem StreamParser' 21 | PEAK_COLUMNS = ['fs/px', 'ss/px', '(1/d)/nm^-1', 'Intensity', 'Panel'] 22 | REFLECTION_COLUMNS = ['h', 'k', 'l', 'I', 'Sigma(I)', 'Peak', 'Background', 'fs/px', 'ss/px', 'Panel'] 23 | ID_FIELDS = ['file', 'Event', 'serial'] 24 | 25 | 26 | def make_substream(stream: 'StreamParser', Ncryst: int, seed: Optional[int] = None, 27 | filename: Optional[str] = None, query: Optional[str] = None): 28 | """Write a stream file containing a sub-set of events to a new stream file. 29 | 30 | Args: 31 | stream (StreamParser): StreamParser object holding the original stream 32 | Ncryst (int): Number of events to sample 33 | seed (Optional[int], optional): Seed of the random generator. Defaults to None. 34 | filename (Optional[str], optional): Output stream filename. Defaults to filename of original stream, 35 | with '-N_{Ncryst}' appended, where {Ncryst} is the number of sampled crystals. 36 | query (str, optional): query to pre-select events. A sensible choice might be to 37 | only pick indexed events by setting query='indexed_by != "none"'. Defaults to None. 38 | 39 | Returns: 40 | [type]: [description] 41 | """ 42 | 43 | fn2 = 'subsets/' + stream.filename.rsplit('.',1)[0] + f'-N_{Ncryst}.stream' \ 44 | if filename is None else filename 45 | 46 | sel = stream.shots if query is None else stream.shots.query(query) 47 | sel = sel.sample(n=Ncryst, random_state=seed) 48 | sel.sort_values(by='first_line', ascending=False, inplace=True) 49 | 50 | first = list(sel.first_line) 51 | last = list(sel.last_line) 52 | first.append(0) 53 | last.append(stream.shots.first_line.min() - 1) 54 | 55 | copying = False 56 | section = (first.pop(), last.pop()) 57 | 58 | with open(stream.filename,'r') as fh_from, open(fn2,'w') as fh_to: 59 | for ln, l in enumerate(fh_from): 60 | if not copying: 61 | if ln == section[0]: 62 | copying = True 63 | #print(section[0], ln) 64 | if copying: 65 | fh_to.write(l) 66 | if ln == section[1]: 67 | copying = False 68 | try: 69 | section = (first.pop(), last.pop()) 70 | except IndexError: 71 | break 72 | 73 | print('Wrote subset with', len(sel), 'events to', fn2) 74 | 75 | return fn2 76 | 77 | 78 | def augment_stream(streamname: str, outfile:str, new_fields: Union[pd.DataFrame, dict], where: str = 'chunk'): 79 | """Add new fields to chunk headers in the stream file, which can then be used for chopping or filtering. 80 | Somewhat similar to indexamajig's "include-hdf5-field" option, just *after* the fact. 81 | 82 | Args: 83 | streamname (str): Name of stream file 84 | new_fields (pd.DataFrame): pandas DataFrame with index matching the file and Event of the stream file 85 | and columns matching the additional fields to be added 86 | """ 87 | 88 | chunk_init = False 89 | found_fn = '' 90 | found_event = '' 91 | 92 | with open(streamname, 'r') as fh_in, open(outfile, 'w') as fh: 93 | for ln, l in enumerate(fh_in): 94 | 95 | if not chunk_init and l.startswith(BEGIN_CHUNK): 96 | # print('new chunk') 97 | chunk_init = True 98 | file_init = False 99 | event_init = False 100 | found_event = '' 101 | cols = list(new_fields.keys()) 102 | 103 | elif chunk_init and l.startswith('Image filename:'): 104 | found_fn = l.split(': ')[-1].strip() 105 | # print(found_fn) 106 | file_init = True 107 | 108 | elif chunk_init and l.startswith('Event:'): 109 | found_event = l.split(': ')[-1].strip() 110 | # print(found_event) 111 | event_init = True 112 | 113 | elif chunk_init and event_init and file_init and \ 114 | l.startswith(BEGIN_REFLECTIONS if where=='crystal' else BEGIN_PEAKS): 115 | # now is the time to insert the new stuff 116 | # print(found_fn, found_event) 117 | # print(chunk_init, event_init) 118 | for k, v in new_fields.loc[(found_fn, found_event),:].iteritems(): 119 | # print(v) 120 | fh.write(f'{k} = {v}\n') 121 | 122 | elif chunk_init and l.startswith(END_CHUNK): 123 | chunk_init = False 124 | 125 | fh.write(l) 126 | 127 | def chop_stream(streamname: str, id_list: list, id_field: str = 'hdf5/%/shots/frame', 128 | id_suffix: str = 'frame', fn_contains: str = None): 129 | """Chops a stream file into sub-streams containing only shots with a specific value of 130 | a defined field, which must be in the chunk header. Useful e.g. for chopping into aggregation 131 | frames, different sample grids, runs with different rotation angles etc. 132 | 133 | If you just want to *select* a sub-set of a stream file instead of chopping it up into many parts, 134 | consider using the stream_grep script included with CrystFEL, which is way faster and more flexible. 135 | 136 | Args: 137 | streamname (str): Stream file name 138 | id_list (str): List of values of the ID variable which you want to have in the final files. 139 | id_field (str): Field in chunk data to select by. Defaults to 'hdf5/%/shots/frame'. 140 | id_appendix (str): Appendix to be applied to the output stream file names. Defaults to 'frame'. 141 | 142 | """ 143 | 144 | outfiles = {} 145 | for fnum in id_list: 146 | outfiles[fnum] = open(streamname.rsplit('.', 1)[0] + f'-{id_suffix}{fnum}.stream', 'w') 147 | 148 | chunk_init = False 149 | chunk_string = '' 150 | value = -1 151 | 152 | with open(streamname, 'r') as fh_in: 153 | for ln, l in enumerate(fh_in): 154 | 155 | if not chunk_init and l.startswith(BEGIN_CHUNK): 156 | chunk_init = True 157 | chunk_string += l 158 | value = None 159 | include_file = True 160 | 161 | elif chunk_init and (fn_contains is not None) and l.startswith('Image filename'): 162 | found_fn = l.split(': ')[-1].strip() 163 | include_file = fn_contains in found_fn 164 | chunk_string += l 165 | 166 | elif chunk_init and l.startswith(id_field): 167 | found_value = l.rsplit('=',1)[-1].strip() 168 | found_value = parse_str_val(found_value) 169 | chunk_string += l 170 | value = found_value if found_value in id_list else None 171 | 172 | elif chunk_init and l.startswith(END_CHUNK): 173 | chunk_init = False 174 | chunk_string += l 175 | #print(frame) 176 | if (value is not None) and include_file: 177 | #print(chunk_string) 178 | outfiles[value].write(chunk_string) 179 | chunk_string = '' 180 | 181 | elif chunk_init: 182 | chunk_string += l 183 | 184 | elif not chunk_init: 185 | # no chunk initialized, write to all files 186 | for _, fh in outfiles.items(): 187 | fh.write(l) 188 | 189 | else: 190 | raise RuntimeError('This should not happen?! Please debug me.') 191 | 192 | def parse_str_val(input: str): 193 | try: 194 | return int(input.strip()) 195 | except ValueError: 196 | try: 197 | return float(input.strip()) 198 | except: 199 | return input.strip() 200 | 201 | class StreamParser: 202 | 203 | def __init__(self, filename, parse_now=True, serial_offset=-1, new_folder=None): 204 | 205 | self.merge_shot = False 206 | self.command = '' 207 | self._cell_string = [] 208 | self._geometry_string = [] 209 | self._peaks = pd.DataFrame() 210 | self._indexed = pd.DataFrame() 211 | self._shots = pd.DataFrame() 212 | self._crystals = pd.DataFrame() 213 | self._parsed_lines = 0 214 | self._total_lines = 0 215 | self.filename = filename 216 | self.serial_offset = serial_offset 217 | 218 | if parse_now: 219 | self.parse(new_folder) 220 | 221 | @property 222 | def geometry(self): 223 | """ 224 | 225 | :return: geometry section as dictionary 226 | """ 227 | 228 | g = {} 229 | for l in self._geometry_string: 230 | if l.startswith(';'): 231 | continue 232 | if '=' not in l: 233 | continue 234 | k, v = l.split(';')[0].split('=', 1) 235 | g[k.strip()] = parse_str_val(v) 236 | 237 | return g 238 | 239 | @property 240 | def cell(self): 241 | """ 242 | 243 | :return: cell section as dictionary 244 | """ 245 | 246 | c = {} 247 | for l in self._cell_string: 248 | if '=' not in l: 249 | continue 250 | k, v = l.split('=', 1) 251 | try: 252 | c[k.strip()] = float(v) 253 | except ValueError: 254 | c[k.strip()] = v.strip() 255 | 256 | return c 257 | 258 | @property 259 | def options(self): 260 | """ 261 | 262 | :return: crystfel call options (ONLY -- ones) as dict 263 | """ 264 | o = {} 265 | for opt in re.findall('--\S+', self.command): 266 | if '=' in opt: 267 | k, v = opt[2:].split('=', 1) 268 | try: 269 | o[k.strip()] = int(v) 270 | except ValueError: 271 | try: 272 | o[k.strip()] = float(v) 273 | except ValueError: 274 | o[k.strip()] = v.strip() 275 | else: 276 | o[opt[2:].strip()] = None 277 | return o 278 | 279 | @property 280 | def indexed(self): 281 | return self._indexed 282 | 283 | @property 284 | def peaks(self): 285 | return self._peaks 286 | 287 | @property 288 | def shots(self): 289 | return self._shots.merge(self._crystals, on=ID_FIELDS, how='left') 290 | 291 | @property 292 | def input_file(self): 293 | return self.command.split('-i ')[1].split(' -')[0].strip() 294 | 295 | @property 296 | def files(self): 297 | return list(self.shots.file.unique()) 298 | 299 | @property 300 | def num_crystals(self): 301 | return len(self._crystals) 302 | 303 | @property 304 | def num_shots(self): 305 | return len(self._shots) 306 | 307 | def parse(self, new_folder): 308 | 309 | linedat_peak = StringIO() 310 | linedat_index = StringIO() 311 | shotlist = [] 312 | crystallist = [] 313 | init_peak = False 314 | init_index = False 315 | init_geom = False 316 | init_cell = False 317 | init_crystal_info = False 318 | init_chunk = False 319 | shotdat = {'Event': None, 'shot_in_subset': None, 'subset': None, 320 | 'file': None, 'serial': None} 321 | crystal_info = {} 322 | idstr = None 323 | self._parsed_lines = 0 324 | self._total_lines = 0 325 | skip = False 326 | 327 | # lines are queried for their meaning. Lines belonging to tables are appended to StringIO virtual files, 328 | # which are then read into pandas data frames at the very end. The order of Queries is chosen to optimize 329 | # performance, that is, the table lines (most frequent) come first. 330 | with open(self.filename) as fh: 331 | 332 | for ln, l in enumerate(fh): 333 | 334 | self._parsed_lines += 1 335 | self._total_lines += 1 336 | if skip: 337 | skip = False 338 | continue 339 | 340 | # EVENT CHUNKS 341 | 342 | # Actual parsing (indexed peaks) 343 | if init_index and END_REFLECTIONS in l: 344 | init_index = False 345 | elif init_index: 346 | linedat_index.write( 347 | ' '.join([l.strip(), str(ln), idstr, '\n'])) 348 | 349 | # Actual parsing (found peaks) 350 | elif init_peak and END_PEAKS in l: 351 | init_peak = False 352 | elif init_peak: 353 | linedat_peak.write( 354 | ' '.join([l.strip(), str(ln), idstr, '\n'])) 355 | 356 | # Required info at chunk head 357 | elif BEGIN_CHUNK in l: 358 | shotdat = {'Event': '_', 'shot_in_subset': -1, 'subset': '_', 359 | 'file': '', 'serial': -1, 'first_line': ln, 'last_line': -1} 360 | init_chunk = True 361 | elif END_CHUNK in l: 362 | shotdat['last_line'] = ln 363 | shotlist.append(shotdat) 364 | shotdat = {'Event': None, 'shot_in_subset': None, 'subset': None, 365 | 'file': None, 'serial': None, 'first_line': None, 'last_line': None} 366 | init_chunk = False 367 | elif 'Event:' in l: 368 | shotdat['Event'] = l.split(': ')[-1].strip() 369 | dummy_shot = shotdat['Event'].split('//')[-1] 370 | if dummy_shot in ['_', '']: 371 | shotdat['shot_in_subset'] = 0 372 | else: 373 | shotdat['shot_in_subset'] = int(shotdat['Event'].split('//')[-1]) 374 | shotdat['subset'] = shotdat['Event'].split('//')[0].strip() 375 | elif 'Image filename:' in l: 376 | shotdat['file'] = l.split(':')[-1].strip() 377 | if new_folder is not None: 378 | shotdat['file'] = new_folder + '/' + shotdat['file'].rsplit('/', 1)[-1] 379 | elif 'Image serial number:' in l: 380 | shotdat['serial'] = int(l.split(': ')[1]) + self.serial_offset 381 | elif (' = ' in l) and (not init_crystal_info) and init_chunk: # optional shot info 382 | k, v = l.split(' = ', 1) 383 | shotdat[k.strip()] = parse_str_val(v) 384 | 385 | # Table parsing activation for found peaks 386 | elif (None not in shotdat.values()) and (BEGIN_PEAKS in l): 387 | skip = True # skip the column header line 388 | init_peak = True 389 | idstr = ' '.join([shotdat['file'], shotdat['Event'], str(shotdat['serial'])]) 390 | 391 | # Table parsing activation for indexing 392 | elif (None not in shotdat.values()) and (BEGIN_REFLECTIONS in l): 393 | skip = True 394 | init_index = True 395 | idstr = ' '.join([shotdat['file'], shotdat['Event'], str(shotdat['serial'])]) 396 | 397 | # Additional information from indexing 398 | elif BEGIN_CRYSTAL in l: 399 | crystal_info = {k: shotdat[k] for k in ID_FIELDS} 400 | init_crystal_info = True 401 | elif END_CRYSTAL in l: 402 | crystallist.append(crystal_info) 403 | crystal_info = {} 404 | init_crystal_info = False 405 | elif 'Cell parameters' in l: 406 | for k, v in zip(['a', 'b', 'c', 'dummy', 'al', 'be', 'ga'], l.split(' ')[2:9]): 407 | if k == 'dummy': 408 | continue 409 | crystal_info[k] = float(v) 410 | elif 'astar' in l: 411 | crystal_info.update( 412 | {k: float(v) for k, v in zip(['astar_x', 'astar_y', 'astar_z'], l.split(' ')[2:5])}) 413 | elif 'bstar' in l: 414 | crystal_info.update( 415 | {k: float(v) for k, v in zip(['bstar_x', 'bstar_y', 'bstar_z'], l.split(' ')[2:5])}) 416 | elif 'cstar' in l: 417 | crystal_info.update( 418 | {k: float(v) for k, v in zip(['cstar_x', 'cstar_y', 'cstar_z'], l.split(' ')[2:5])}) 419 | elif 'diffraction_resolution_limit' in l: 420 | crystal_info['diff_limit'] = float(l.rsplit(' nm', 1)[0].rsplit('= ', 1)[-1]) 421 | elif 'predict_refine/det_shift' in l: 422 | crystal_info['xshift'] = float(l.split(' ')[3]) 423 | crystal_info['yshift'] = float(l.split(' ')[6]) 424 | continue 425 | elif (' = ' in l) and init_crystal_info and init_chunk: # optional shot info 426 | k, v = l.split(' = ', 1) 427 | crystal_info[k.strip()] = parse_str_val(v) 428 | 429 | # CALL STRING 430 | 431 | elif 'indexamajig' in l: 432 | self.command = l 433 | 434 | # GEOMETRY FILE 435 | 436 | elif init_geom and (END_GEOM in l): 437 | init_geom = False 438 | elif init_geom: 439 | self._geometry_string.append(l.strip()) 440 | elif BEGIN_GEOM in l: 441 | init_geom = True 442 | 443 | # CELL FILE 444 | 445 | elif init_cell and (END_CELL in l): 446 | init_cell = False 447 | elif init_cell: 448 | self._cell_string.append(l.strip()) 449 | elif BEGIN_CELL in l: 450 | init_cell = True 451 | 452 | else: 453 | self._parsed_lines -= 1 454 | 455 | # Now convert to pandas data frames 456 | 457 | linedat_index.seek(0) 458 | linedat_peak.seek(0) 459 | self._peaks = pd.read_csv(linedat_peak, delim_whitespace=True, header=None, 460 | names=PEAK_COLUMNS + ['stream_line', 'file', 'Event', 'serial'] 461 | ).sort_values('serial').reset_index().sort_values(['serial', 'index']).reset_index( 462 | drop=True).drop('index', axis=1) 463 | 464 | self._indexed = pd.read_csv(linedat_index, delim_whitespace=True, header=None, 465 | names=REFLECTION_COLUMNS + ['stream_line', 'file', 'Event', 'serial'] 466 | ).sort_values('serial').reset_index().sort_values(['serial', 'index']).reset_index( 467 | drop=True).drop('index', axis=1) 468 | 469 | self._shots = pd.DataFrame(shotlist).sort_values('serial').reset_index(drop=True) 470 | if crystallist: 471 | self._crystals = pd.DataFrame(crystallist).sort_values('serial').reset_index(drop=True) 472 | else: 473 | self._crystals = pd.DataFrame(columns=ID_FIELDS) 474 | 475 | def write(self, filename, include_peaks=True, include_indexed=True, include_geom=True, include_cell=True): 476 | 477 | from tqdm import tqdm 478 | 479 | with open(filename, 'w') as fh: 480 | fh.write(HEAD+'\n'+GENERATOR+'\n'+self.command+'\n') 481 | if include_geom: 482 | fh.write(BEGIN_GEOM+'\n'+'\n'.join(self._geometry_string)+'\n'+END_GEOM + '\n') 483 | if include_cell: 484 | fh.write(BEGIN_CELL + '\n' + '\n'.join(self._cell_string) + '\n' + END_CELL + '\n') 485 | 486 | for ii, shot in tqdm(self._shots.iterrows(), total=len(self._shots)): 487 | fh.write(BEGIN_CHUNK + '\n') 488 | fh.write(f'Image filename: {shot.file}\n') 489 | fh.write(f'Event: {shot.Event}\n') 490 | fh.write(f'Image serial number: {shot.serial - self.serial_offset}\n') 491 | keys = set(shot.keys()).difference( 492 | {'Event', 'file', 'serial', 'shot_in_subset', 'subset'}) 493 | for k in keys: 494 | fh.write(f'{k} = {shot[k]}\n') 495 | if include_peaks: 496 | fh.write(BEGIN_PEAKS + '\n') 497 | self._peaks.loc[self._peaks.serial == shot.serial, PEAK_COLUMNS].to_csv( 498 | fh, sep=' ', index=False, na_rep='-nan') 499 | fh.write(END_PEAKS + '\n') 500 | 501 | crystals = self._crystals.loc[self._crystals.serial == shot.serial, :] 502 | 503 | for cid, crs in crystals.iterrows(): 504 | fh.write(BEGIN_CRYSTAL + '\n') 505 | fh.write(f'Cell parameters {crs.a} {crs.b} {crs.c} nm, {crs.al} {crs.be} {crs.ga} deg\n') 506 | fh.write(f'astar = {crs.astar_x} {crs.astar_y} {crs.astar_z} nm^-1\n') 507 | fh.write(f'bstar = {crs.bstar_x} {crs.bstar_y} {crs.bstar_z} nm^-1\n') 508 | fh.write(f'cstar = {crs.cstar_x} {crs.cstar_y} {crs.cstar_z} nm^-1\n') 509 | fh.write(f'diffraction_resolution_limit = {crs.diff_limit} nm^-1 or {10 / crs.diff_limit} A\n') 510 | fh.write(f'predict_refine/det_shift x = {crs.xshift} y = {crs.yshift} mm\n') 511 | keys = set(crs.keys()).difference( 512 | {'Event', 'file', 'serial', 'shot_in_subset', 'subset', 513 | 'a', 'b', 'c', 'al', 'be', 'ga', 514 | 'astar_x', 'astar_y', 'astar_z', 515 | 'bstar_x', 'bstar_y', 'bstar_z', 516 | 'cstar_x', 'cstar_y', 'cstar_z', 517 | 'diff_limit', 'xshift', 'yshift'}) 518 | for k in keys: 519 | fh.write(f'{k} = {crs[k]}\n') 520 | if include_indexed: 521 | fh.write(BEGIN_REFLECTIONS + '\n') 522 | self._indexed.loc[self._indexed.serial == shot.serial, REFLECTION_COLUMNS].to_csv( 523 | fh, sep=' ', index=False, na_rep='-nan') 524 | fh.write(END_REFLECTIONS + '\n') 525 | fh.write(END_CRYSTAL + '\n') 526 | fh.write(END_CHUNK + '\n') 527 | 528 | def change_path(self, new_folder=None, old_pattern=None, new_pattern=None): 529 | 530 | for df in [self._crystals, self._shots, self._indexed, self._peaks]: 531 | if (new_folder is not None) and (old_pattern is not None): 532 | df.file = new_folder + '/' + \ 533 | df.file.str.rsplit('/', 1, True).iloc[:, -1].str.replace(old_pattern, new_pattern) 534 | elif old_pattern is not None: 535 | df.file = df.file.str.replace(old_pattern, new_pattern) 536 | elif new_folder is not None: 537 | df.file = new_folder + '/' + df.file.str.rsplit('/', 1, True).iloc[:, -1] 538 | 539 | def get_cxi_format(self, what='peaks', shots=None, half_pixel_shift=True): 540 | 541 | if shots is None: 542 | shots = self.shots 543 | 544 | if half_pixel_shift: 545 | off = -.5 546 | else: 547 | off = 0 548 | 549 | if what == 'peaks': 550 | ifield = 'Intensity' 551 | indexed = False 552 | elif what in ['indexed', 'predict', 'prediction']: 553 | ifield = 'I' 554 | indexed = True 555 | else: 556 | raise ValueError('what must be peaks or indexed') 557 | 558 | # some majig to get CXI arrays 559 | if indexed: 560 | self._indexed['pk_id'] = self._indexed.groupby(['file', 'Event']).cumcount() 561 | pk2 = self._indexed.set_index(['file', 'Event', 'pk_id']) 562 | else: 563 | self._peaks['pk_id'] = self._peaks.groupby(['file', 'Event']).cumcount() 564 | pk2 = self._peaks.set_index(['file', 'Event', 'pk_id']) 565 | # joining step with shot list is required to make sure that shots without peaks/indexing stay in 566 | s2 = shots[['file', 'Event']].set_index(['file', 'Event']) 567 | s2.columns = pd.MultiIndex.from_arrays([[], []], names=('field', 'pk_id')) 568 | pk2 = s2.join(pk2.unstack(-1), how='left') 569 | if indexed: 570 | self._indexed.drop('pk_id', axis=1) 571 | else: 572 | self._peaks.drop('pk_id', axis=1) 573 | 574 | cxidat = { 575 | 'peakXPosRaw': (pk2['fs/px'] + off).fillna(0).values, 576 | 'peakYPosRaw': (pk2['ss/px'] + off).fillna(0).values, 577 | 'peakTotalIntensity': pk2[ifield].fillna(0).values, 578 | 'nPeaks': pk2['fs/px'].notna().sum(axis=1).values} 579 | 580 | if indexed: 581 | cxidat.update({'peakSNR': (pk2[ifield]/pk2['Sigma(I)']).fillna(0).values, 582 | 'indexH': pk2['h'].fillna(0).values.astype(np.int), 583 | 'indexK': pk2['k'].fillna(0).values.astype(np.int), 584 | 'indexL': pk2['l'].fillna(0).values.astype(np.int)}) 585 | 586 | return cxidat 587 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | import recommonmark 16 | from recommonmark.transform import AutoStructify 17 | sys.path.insert(0, os.path.abspath('.')) 18 | sys.path.insert(0, os.path.abspath('..')) 19 | 20 | 21 | # -- Project information ----------------------------------------------------- 22 | 23 | project = 'diffractem' 24 | copyright = '2020, Robert Bücker' 25 | author = 'Robert Bücker' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | master_doc = 'index' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = ['sphinx.ext.autodoc', 36 | 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', 37 | 'sphinx.ext.autosectionlabel', 38 | # 'jupyter_sphinx.execute', 39 | # 'nbsphinx', 40 | 'm2r2', 41 | 'sphinx.ext.mathjax', 42 | 'nbsphinx_link', 'sphinx_autodoc_typehints' 43 | ] 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ['_templates'] 47 | source_suffix = ['.rst', '.md'] 48 | # List of patterns, relative to source directory, that match files and 49 | # directories to ignore when looking for source files. 50 | # This pattern also affects html_static_path and html_extra_path. 51 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 52 | autosectionlabel_maxdepth=2 53 | autosectionlabel_prefix_document=True 54 | autodoc_mock_imports = ['PyQt5'] 55 | 56 | nbsphinx_execute = 'never' 57 | 58 | # -- Options for HTML output ------------------------------------------------- 59 | 60 | # The theme to use for HTML and HTML Help pages. See the documentation for 61 | # a list of builtin themes. 62 | # 63 | html_theme = 'sphinx_rtd_theme' 64 | 65 | # Add any paths that contain custom static files (such as style sheets) here, 66 | # relative to this directory. They are copied after the builtin static files, 67 | # so a file named "default.css" will overwrite the builtin "default.css". 68 | html_static_path = ['_static'] 69 | -------------------------------------------------------------------------------- /docs/crystfel.rst: -------------------------------------------------------------------------------- 1 | CrystFEL integration 2 | ==================== 3 | 4 | Stream parsing 5 | -------------- 6 | 7 | Peak finders 8 | ------------ 9 | 10 | Calling indexamajig 11 | ------------------- -------------------------------------------------------------------------------- /docs/dataset.rst: -------------------------------------------------------------------------------- 1 | The Dataset object 2 | ================== 3 | 4 | A diffractem data set is represented by a :class:`Dataset ` object, which manages all diffraction and meta data 5 | from an electron diffraction set, and provides a plethora of features to work with them. This comprises: 6 | 7 | * Automatic management of the HDF5 files containing the diffraction and meta data (see also 8 | :ref:`file_format`). 9 | * A framework to apply massively parallel computations on larger-than-memory diffraction data stacks 10 | using `dask `_, on a local machine or even remote clusters. 11 | * Handling of meta data for each single recorded diffraction pattern using an embedded `pandas.DataFrame` 12 | as a "shot list". 13 | * Methods for quick and transparent creation of sub-sets through complex queries on metadata. 14 | 15 | To learn how to handle `Dataset` objects, we'd recommend the `tutorials `_. 16 | 17 | Shot list 18 | --------- 19 | 20 | Data stacks 21 | ----------- 22 | 23 | Chunking 24 | ^^^^^^^^ 25 | 26 | Lazy evaluation and persisting 27 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 28 | -------------------------------------------------------------------------------- /docs/diffractem.adxv.rst: -------------------------------------------------------------------------------- 1 | diffractem.adxv module 2 | ====================== 3 | 4 | .. automodule:: diffractem.adxv 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/diffractem.compute.rst: -------------------------------------------------------------------------------- 1 | diffractem.compute module 2 | ========================= 3 | 4 | .. automodule:: diffractem.compute 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/diffractem.dataset.rst: -------------------------------------------------------------------------------- 1 | diffractem.dataset module 2 | ========================= 3 | 4 | .. automodule:: diffractem.dataset 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/diffractem.io.rst: -------------------------------------------------------------------------------- 1 | diffractem.io module 2 | ==================== 3 | 4 | .. automodule:: diffractem.io 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/diffractem.map_image.rst: -------------------------------------------------------------------------------- 1 | diffractem.map\_image module 2 | ============================ 3 | 4 | .. automodule:: diffractem.map_image 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/diffractem.nexus.rst: -------------------------------------------------------------------------------- 1 | diffractem.nexus module 2 | ======================= 3 | 4 | .. automodule:: diffractem.nexus 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/diffractem.pre_proc_opts.rst: -------------------------------------------------------------------------------- 1 | diffractem.pre\_proc\_opts module 2 | ================================= 3 | 4 | .. automodule:: diffractem.pre_proc_opts 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/diffractem.proc2d.rst: -------------------------------------------------------------------------------- 1 | diffractem.proc2d module 2 | ======================== 3 | 4 | .. automodule:: diffractem.proc2d 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/diffractem.proc_peaks.rst: -------------------------------------------------------------------------------- 1 | diffractem.proc\_peaks module 2 | ============================= 3 | 4 | .. automodule:: diffractem.proc_peaks 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/diffractem.rst: -------------------------------------------------------------------------------- 1 | 2 | .. mdinclude:: ../README.md 3 | 4 | Submodules 5 | ---------- 6 | 7 | diffractem's functionality is contained in various submodules, click below for their API documentation. 8 | The ones you'll likely deal with are :mod:`diffractem.dataset`, :mod:`diffractem.proc2d`, and :mod:`diffractem.tools`. 9 | 10 | .. toctree:: 11 | :maxdepth: 4 12 | 13 | diffractem.adxv 14 | diffractem.compute 15 | diffractem.dataset 16 | diffractem.io 17 | diffractem.map_image 18 | diffractem.nexus 19 | diffractem.pre_proc_opts 20 | diffractem.proc2d 21 | diffractem.proc_peaks 22 | diffractem.stream_parser 23 | diffractem.tools 24 | 25 | Module contents 26 | --------------- 27 | 28 | .. automodule:: diffractem 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | -------------------------------------------------------------------------------- /docs/diffractem.stream_parser.rst: -------------------------------------------------------------------------------- 1 | diffractem.stream\_parser module 2 | ================================ 3 | 4 | .. automodule:: diffractem.stream_parser 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/diffractem.tools.rst: -------------------------------------------------------------------------------- 1 | diffractem.tools module 2 | ======================= 3 | 4 | .. automodule:: diffractem.tools 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/edview.rst: -------------------------------------------------------------------------------- 1 | EDview 2 | ====== 3 | 4 | Viewer for diffractem-format files (see :doc:`file_format`) and/or CrystFEL stream files. -------------------------------------------------------------------------------- /docs/file_format.rst: -------------------------------------------------------------------------------- 1 | Diffractem NeXus files 2 | ====================== -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. diffractem documentation master file, created by 2 | sphinx-quickstart on Fri Apr 24 17:13:42 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to diffractem's documentation! 7 | ====================================== 8 | 9 | diffractem is a package for processing Serial Electron Diffraction data, following the protocols as outlined in `Bücker et al., Front. Mol. Biosci., 2021 `_. 10 | See this paper for a general introduction and documentation 11 | diffractem is mostly intended to be used from within Jupyter notebooks, such as those available from `here `_. 12 | 13 | Please see :ref:`diffractem:Installation` for how to install diffractem and CrystFEL such that you can get started. 14 | 15 | Of particular interest might be the documentation of :class:`PreProcOpts `, which explains the various options you can define for preprocessing. 16 | 17 | For the full API documentation, see :ref:`here ` 18 | 19 | Table of contents 20 | ----------------- 21 | 22 | .. toctree:: 23 | :maxdepth: 4 24 | 25 | Overview 26 | dataset 27 | file_format 28 | edview 29 | CrystFEL integration 30 | 31 | 32 | Indices and tables 33 | ================== 34 | 35 | * :ref:`genindex` 36 | * :ref:`modindex` 37 | * :ref:`search` 38 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | .. mdinclude:: ../README.md 5 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/map_image.rst: -------------------------------------------------------------------------------- 1 | Crystal-map images 2 | ================== 3 | 4 | * Messy relation to :ref:`dataset` -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | diffractem 2 | ========== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | diffractem 8 | -------------------------------------------------------------------------------- /docs/pre_processing.rst: -------------------------------------------------------------------------------- 1 | Pre-processing with diffractem 2 | ============================== 3 | 4 | There are some things to make your life easier. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | :caption: Preprocessing 9 | 10 | Pre-processing options 11 | Pre-processing macros (old) -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | h5py 2 | pandas 3 | tables 4 | hdf5plugin 5 | dask 6 | distributed 7 | tifffile 8 | scipy 9 | astropy 10 | matplotlib 11 | numba 12 | pyqtgraph 13 | pyyaml 14 | scikit-learn 15 | scikit-image 16 | opencv-python-headless 17 | PyQt5 18 | ipykernel 19 | nbsphinx 20 | jupyter-sphinx 21 | nbsphinx-link 22 | sphinx-autodoc-typehints 23 | m2r2 -------------------------------------------------------------------------------- /ideas.md: -------------------------------------------------------------------------------- 1 | # Ideas for diffractem features... 2 | 3 | ## Plotting 4 | 5 | * high-level plot functions (in general) from older notebooks 6 | * distribute in clever way between overview and dataset 7 | 8 | ## Maps 9 | 10 | * map plot with unique axis projected length and orientation -> overview of e.g. preferred orientation. 11 | Could be represented as complex image. Fill areas e.g. using watershed in intensity space. 12 | * lattice orientation clustering in maps 13 | 14 | ## Viewer 15 | 16 | * jump to arbitrary ID/serial 17 | * real-/rec- space calibration, with diffraction rings and scale bar 18 | * rudimentary keyboard operation 19 | * line profiles,... check out glueviz? 20 | * direct transfer to Fiji (check how scipion does it) 21 | 22 | ## Pre-processing 23 | 24 | * function/script to automatically run full pre-proc pipeline, including some heuristics 25 | * connect to running experiment, e.g. using socket interface or ZeroMQ 26 | 27 | ## Dataset 28 | 29 | * direct CXI format deposition 30 | * export to other formats (TIF, MRC, CBF, cctbx Pickle,...) 31 | 32 | ## Stream parser -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | astropy 2 | Cython 3 | dask 4 | distributed 5 | h5py 6 | hdf5plugin 7 | ipython 8 | ipywidgets 9 | matplotlib 10 | numba 11 | numpy 12 | pandas 13 | PyQt5 14 | pyqtgraph 15 | PyYAML 16 | recommonmark 17 | scikit_image 18 | scikit_learn 19 | scipy 20 | setuptools 21 | skimage 22 | tifffile 23 | tqdm 24 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # This file is part of OnDA. 2 | # 3 | # OnDA is free software: you can redistribute it and/or modify it under the terms of 4 | # the GNU General Public License as published by the Free Software Foundation, either 5 | # version 3 of the License, or (at your option) any later version. 6 | # 7 | # OnDA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 8 | # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 9 | # PURPOSE. See the GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License along with OnDA. 12 | # If not, see . 13 | # 14 | # Copyright 2014-2019 Deutsches Elektronen-Synchrotron DESY, 15 | # a research centre of the Helmholtz Association. 16 | [build-system] 17 | requires = ["setuptools", "wheel", "Cython", "numpy"] 18 | 19 | [build_ext] 20 | inplace=1 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | import os 3 | import platform 4 | 5 | # DIFFRACTEM - tools for processing Serial Electron Diffraction Data 6 | # Copyright (C) 2020 Robert Bücker 7 | 8 | # This library is free software; you can redistribute it and/or 9 | # modify it under the terms of the GNU Lesser General Public 10 | # License as published by the Free Software Foundation; either 11 | # version 2.1 of the License, or (at your option) any later version. 12 | 13 | # This library is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 | # Lesser General Public License for more details. 17 | 18 | # You should have received a copy of the GNU Lesser General Public 19 | # License along with this library; if not, write to the Free Software 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 | 22 | # This library uses peakfinder8 for peak finding, written by Anton Barty, 23 | # Valerio Mariani, and Oleksandr Yefanov; 24 | # Copyright 2014-2019 Deutsches Elektronen-Synchrotron DESY 25 | 26 | 27 | ### --- 28 | # peakfinder8 Cython version adapted from OnDA: https://github.com/ondateam/onda 29 | 30 | try: 31 | import numpy 32 | except (ModuleNotFoundError, NameError): 33 | print('NumPy is not installed. Please install it before diffractem via:\n' 34 | 'pip install numpy') 35 | 36 | 37 | DIFFRACTEM_USE_CYTHON = os.getenv("DIFFRACTEM_USE_CYTHON") 38 | 39 | ext = ".pyx" if DIFFRACTEM_USE_CYTHON else ".c" # pylint: disable=invalid-name 40 | 41 | 42 | if platform.uname().system == 'Windows': 43 | libraries = [] 44 | pass 45 | else: 46 | libraries = ["stdc++"] 47 | pass 48 | 49 | peakfinder8_ext = Extension( # pylint: disable=invalid-name 50 | name="diffractem.peakfinder8_extension", 51 | include_dirs=[numpy.get_include()], 52 | libraries=libraries, 53 | sources=[ 54 | "src/peakfinder8_extension/peakfinder8.cpp", 55 | "src/peakfinder8_extension/peakfinder8_extension.pyx", 56 | ] 57 | if DIFFRACTEM_USE_CYTHON 58 | else [ 59 | "src/peakfinder8_extension/peakfinder8_extension.cpp", 60 | "src/peakfinder8_extension/peakfinder8.cpp", 61 | ], 62 | language="c++", 63 | ) 64 | 65 | if DIFFRACTEM_USE_CYTHON: 66 | from Cython.Build import cythonize 67 | print('USING CYTHON') 68 | extensions = cythonize(peakfinder8_ext) # pylint: disable=invalid-name 69 | else: 70 | extensions = [peakfinder8_ext] # pylint: disable=invalid-name 71 | 72 | ### --- 73 | 74 | setup( 75 | name='diffractem', 76 | version='0.4.1', 77 | packages=['diffractem'], 78 | url='https://github.com/robertbuecker/diffractem', 79 | license='', 80 | scripts=['bin/nxs2tif.py', 'bin/edview.py'], 81 | # scripts=['bin/nxs2tif.py', 'bin/edview.py', 'bin/quick_proc.py'], 82 | entry_points={ 83 | 'console_scripts': [ 84 | 'quick_proc = diffractem.quick_proc:main', 85 | 'stream2sol = diffractem.stream2sol:main' 86 | ], 87 | }, 88 | author='Robert Buecker', 89 | author_email='robert.buecker@cssb-hamburg.de', 90 | description='Some tools for working with serial electron microscopy data.', 91 | install_requires=['h5py', 'numpy', 'pandas', 'hdf5plugin', 92 | 'dask[complete]', 'tifffile', 'scipy', 'astropy', 93 | 'matplotlib', 'numba', 'pyqtgraph', 'pyyaml', 'scikit-learn', 94 | 'scikit-image', 'PyQt5'], 95 | classifiers=[ 96 | "Programming Language :: Python :: 3", 97 | "License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)", 98 | ], 99 | ext_modules = extensions, 100 | include_package_data = True 101 | ) 102 | -------------------------------------------------------------------------------- /src/peakfinder8_extension/peakfinder8.hh: -------------------------------------------------------------------------------- 1 | // This file is originally part of OnDA, available at 2 | // , which is released under the terms 3 | // of the GNU General Public License. It has been adapted for 4 | // use in diffractem. 5 | // 6 | // In agreement with the authors, you can redistribute it and/or modify 7 | // this file under the terms of the GNU Lesser General Public License 8 | // as published by the Free Software Foundation, either version 3 9 | // of the License, or (at your option) any later version. 10 | // 11 | // It is distributed in the hope that it will be useful, 12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | // GNU Lesser General Public License for more details. 15 | // 16 | // You should have received a copy of the GNU Lesser General Public License 17 | // along with diffractem. If not, see . 18 | // 19 | // Copyright 2014-2019 Deutsches Elektronen-Synchrotron DESY, 20 | // a research centre of the Helmholtz Association. 21 | 22 | #ifndef PEAKFINDER8_H 23 | #define PEAKFINDER8_H 24 | 25 | typedef struct { 26 | public: 27 | long nPeaks; 28 | long nHot; 29 | float peakResolution; // Radius of 80% of peaks 30 | float peakResolutionA; // Radius of 80% of peaks 31 | float peakDensity; // Density of peaks within this 80% figure 32 | float peakNpix; // Number of pixels in peaks 33 | float peakTotal; // Total integrated intensity in peaks 34 | int memoryAllocated; 35 | long nPeaks_max; 36 | 37 | float *peak_maxintensity; // Maximum intensity in peak 38 | float *peak_totalintensity; // Integrated intensity in peak 39 | float *peak_sigma; // Signal-to-noise ratio of peak 40 | float *peak_snr; // Signal-to-noise ratio of peak 41 | float *peak_npix; // Number of pixels in peak 42 | float *peak_com_x; // peak center of mass x (in raw layout) 43 | float *peak_com_y; // peak center of mass y (in raw layout) 44 | long *peak_com_index; // closest pixel corresponding to peak 45 | float *peak_com_x_assembled; // peak center of mass x (in assembled layout) 46 | float *peak_com_y_assembled; // peak center of mass y (in assembled layout) 47 | float *peak_com_r_assembled; // peak center of mass r (in assembled layout) 48 | float *peak_com_q; // Scattering vector of this peak 49 | float *peak_com_res; // REsolution of this peak 50 | } tPeakList; 51 | 52 | void allocatePeakList(tPeakList *peak, long NpeaksMax); 53 | void freePeakList(tPeakList peak); 54 | 55 | int peakfinder8(tPeakList *peaklist, float *data, char *mask, float *pix_r, 56 | long asic_nx, long asic_ny, long nasics_x, long nasics_y, 57 | float ADCthresh, float hitfinderMinSNR, 58 | long hitfinderMinPixCount, long hitfinderMaxPixCount, 59 | long hitfinderLocalBGRadius, char* outliersMask); 60 | 61 | #endif // PEAKFINDER8_H 62 | -------------------------------------------------------------------------------- /src/peakfinder8_extension/peakfinder8_extension.pyx: -------------------------------------------------------------------------------- 1 | # This file is originally part of OnDA, available at 2 | # , which is released under the terms 3 | # of the GNU General Public License. It has been adapted for 4 | # use in diffractem. 5 | # 6 | # In agreement with the authors, you can redistribute it and/or modify 7 | # this file under the terms of the GNU Lesser General Public License 8 | # as published by the Free Software Foundation, either version 3 9 | # of the License, or (at your option) any later version. 10 | # 11 | # It is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU Lesser General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU Lesser General Public License 17 | # along with diffractem. If not, see . 18 | # 19 | # Copyright 2014-2019 Deutsches Elektronen-Synchrotron DESY, 20 | # a research centre of the Helmholtz Association. 21 | 22 | from libcpp.vector cimport vector 23 | from libc.stdlib cimport malloc, free 24 | from libc.stdint cimport int8_t 25 | 26 | import numpy 27 | 28 | cdef extern from "peakfinder8.hh": 29 | 30 | ctypedef struct tPeakList: 31 | long nPeaks 32 | long nHot 33 | float peakResolution 34 | float peakResolutionA 35 | float peakDensity 36 | float peakNpix 37 | float peakTotal 38 | int memoryAllocated 39 | long nPeaks_max 40 | 41 | float *peak_maxintensity 42 | float *peak_totalintensity 43 | float *peak_sigma 44 | float *peak_snr 45 | float *peak_npix 46 | float *peak_com_x 47 | float *peak_com_y 48 | long *peak_com_index 49 | float *peak_com_x_assembled 50 | float *peak_com_y_assembled 51 | float *peak_com_r_assembled 52 | float *peak_com_q 53 | float *peak_com_res 54 | 55 | void allocatePeakList(tPeakList* peak_list, long max_num_peaks) 56 | void freePeakList(tPeakList peak_list) 57 | 58 | cdef extern from "peakfinder8.hh": 59 | 60 | int peakfinder8(tPeakList *peaklist, float *data, char *mask, float *pix_r, 61 | long asic_nx, long asic_ny, long nasics_x, long nasics_y, 62 | float ADCthresh, float hitfinderMinSNR, 63 | long hitfinderMinPixCount, long hitfinderMaxPixCount, 64 | long hitfinderLocalBGRadius, char *outliersMask) 65 | 66 | 67 | def peakfinder_8(int max_num_peaks, float[:,::1] data, char[:,::1] mask, 68 | float[:,::1] pix_r, long asic_nx, long asic_ny, long nasics_x, 69 | long nasics_y, float adc_thresh, float hitfinder_min_snr, 70 | long hitfinder_min_pix_count, long hitfinder_max_pix_count, 71 | long hitfinder_local_bg_radius): 72 | 73 | cdef tPeakList peak_list 74 | allocatePeakList(&peak_list, max_num_peaks) 75 | 76 | peakfinder8(&peak_list, &data[0, 0], &mask[0,0], &pix_r[0, 0], asic_nx, asic_ny, 77 | nasics_x, nasics_y, adc_thresh, hitfinder_min_snr, 78 | hitfinder_min_pix_count, hitfinder_max_pix_count, 79 | hitfinder_local_bg_radius, NULL) 80 | 81 | cdef int i 82 | cdef float peak_x, peak_y, peak_value 83 | cdef vector[double] peak_list_x 84 | cdef vector[double] peak_list_y 85 | cdef vector[long] peak_list_index 86 | cdef vector[double] peak_list_value 87 | cdef vector[double] peak_list_npix 88 | cdef vector[double] peak_list_maxi 89 | cdef vector[double] peak_list_sigma 90 | cdef vector[double] peak_list_snr 91 | 92 | num_peaks = peak_list.nPeaks 93 | 94 | if num_peaks > max_num_peaks: 95 | num_peaks = max_num_peaks 96 | 97 | for i in range(0, num_peaks): 98 | 99 | peak_x = peak_list.peak_com_x[i] 100 | peak_y = peak_list.peak_com_y[i] 101 | peak_index = peak_list.peak_com_index[i] 102 | peak_value = peak_list.peak_totalintensity[i] 103 | peak_npix = peak_list.peak_npix[i] 104 | peak_maxi = peak_list.peak_maxintensity[i] 105 | peak_sigma = peak_list.peak_sigma[i] 106 | peak_snr = peak_list.peak_snr[i] 107 | 108 | peak_list_x.push_back(peak_x) 109 | peak_list_y.push_back(peak_y) 110 | peak_list_index.push_back(peak_index) 111 | peak_list_value.push_back(peak_value) 112 | peak_list_npix.push_back(peak_npix) 113 | peak_list_maxi.push_back(peak_maxi) 114 | peak_list_sigma.push_back(peak_sigma) 115 | peak_list_snr.push_back(peak_snr) 116 | 117 | freePeakList(peak_list) 118 | 119 | return (peak_list_x, peak_list_y, peak_list_value, peak_list_index, 120 | peak_list_npix, peak_list_maxi, peak_list_sigma, peak_list_snr) 121 | 122 | 123 | def peakfinder_8_with_pixel_information(int max_num_peaks, float[:,::1] data, 124 | char[:,::1] mask, float[:,::1] pix_r, 125 | long asic_nx, long asic_ny, long nasics_x, 126 | long nasics_y, float adc_thresh, 127 | float hitfinder_min_snr, 128 | long hitfinder_min_pix_count, 129 | long hitfinder_max_pix_count, 130 | long hitfinder_local_bg_radius, 131 | char[:,::1] outlier_mask): 132 | 133 | cdef tPeakList peak_list 134 | allocatePeakList(&peak_list, max_num_peaks) 135 | 136 | peakfinder8(&peak_list, &data[0, 0], &mask[0, 0], &pix_r[0, 0], asic_nx, asic_ny, 137 | nasics_x, nasics_y, adc_thresh, hitfinder_min_snr, 138 | hitfinder_min_pix_count, hitfinder_max_pix_count, 139 | hitfinder_local_bg_radius, &outlier_mask[0, 0]) 140 | 141 | cdef int i 142 | cdef float peak_x, peak_y, peak_value 143 | cdef vector[double] peak_list_x 144 | cdef vector[double] peak_list_y 145 | cdef vector[long] peak_list_index 146 | cdef vector[double] peak_list_value 147 | cdef vector[double] peak_list_npix 148 | cdef vector[double] peak_list_maxi 149 | cdef vector[double] peak_list_sigma 150 | cdef vector[double] peak_list_snr 151 | 152 | num_peaks = peak_list.nPeaks 153 | 154 | if num_peaks > max_num_peaks: 155 | num_peaks = max_num_peaks 156 | 157 | for i in range(0, num_peaks): 158 | 159 | peak_x = peak_list.peak_com_x[i] 160 | peak_y = peak_list.peak_com_y[i] 161 | peak_index = peak_list.peak_com_index[i] 162 | peak_value = peak_list.peak_totalintensity[i] 163 | peak_npix = peak_list.peak_npix[i] 164 | peak_maxi = peak_list.peak_maxintensity[i] 165 | peak_sigma = peak_list.peak_sigma[i] 166 | peak_snr = peak_list.peak_snr[i] 167 | 168 | peak_list_x.push_back(peak_x) 169 | peak_list_y.push_back(peak_y) 170 | peak_list_index.push_back(peak_index) 171 | peak_list_value.push_back(peak_value) 172 | peak_list_npix.push_back(peak_npix) 173 | peak_list_maxi.push_back(peak_maxi) 174 | peak_list_sigma.push_back(peak_sigma) 175 | peak_list_snr.push_back(peak_snr) 176 | 177 | freePeakList(peak_list) 178 | 179 | return (peak_list_x, peak_list_y, peak_list_value, peak_list_index, 180 | peak_list_npix, peak_list_maxi, peak_list_sigma, peak_list_snr) 181 | 182 | 183 | -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | v0.4.1 2 | --------------------------------------------------------------------------------