├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── bin
    ├── edview.py
    └── nxs2tif.py
├── diffractem
    ├── __init__.py
    ├── adxv.py
    ├── compute.py
    ├── dataset.py
    ├── io.py
    ├── map_image.py
    ├── nexus.py
    ├── pre_proc_opts.py
    ├── proc2d.py
    ├── proc_peaks.py
    ├── quick_proc.py
    ├── stream2sol.py
    ├── stream_parser.py
    └── tools.py
├── docs
    ├── Makefile
    ├── conf.py
    ├── crystfel.rst
    ├── dataset.rst
    ├── diffractem.adxv.rst
    ├── diffractem.compute.rst
    ├── diffractem.dataset.rst
    ├── diffractem.io.rst
    ├── diffractem.map_image.rst
    ├── diffractem.nexus.rst
    ├── diffractem.pre_proc_opts.rst
    ├── diffractem.proc2d.rst
    ├── diffractem.proc_peaks.rst
    ├── diffractem.rst
    ├── diffractem.stream_parser.rst
    ├── diffractem.tools.rst
    ├── edview.rst
    ├── file_format.rst
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    ├── map_image.rst
    ├── modules.rst
    ├── pre_processing.rst
    └── requirements.txt
├── ideas.md
├── requirements.txt
├── setup.cfg
├── setup.py
├── src
    └── peakfinder8_extension
    │   ├── peakfinder8.cpp
    │   ├── peakfinder8.hh
    │   ├── peakfinder8_extension.cpp
    │   └── peakfinder8_extension.pyx
└── version.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | debug.py
 2 | *.egg-info
 3 | .idea/
 4 | *-checkpoint.py
 5 | .ipynb_checkpoints
 6 | __pycache__
 7 | bin/modify_stream.py
 8 | .vscode
 9 | .DS_Store
10 | playbook.ipynb
11 | dist/
12 | build/
13 | playground.py
14 | pre_proc_play.py
15 | notebooks/dask-worker-space
16 | notebooks/proc_data
17 | *.h5
18 | *.nxs
19 | *.so
20 | *.dll
21 | *.dylib
22 | notebooks/
23 | conda-recipe/
24 | docs/_*
25 | publish.sh
26 | *.pyd
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                   GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 2.1, February 1999
  3 | 
  4 |  Copyright (C) 1991, 1999 Free Software Foundation, Inc.
  5 |  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 | [This is the first released version of the Lesser GPL.  It also counts
 10 |  as the successor of the GNU Library Public License, version 2, hence
 11 |  the version number 2.1.]
 12 | 
 13 |                             Preamble
 14 | 
 15 |   The licenses for most software are designed to take away your
 16 | freedom to share and change it.  By contrast, the GNU General Public
 17 | Licenses are intended to guarantee your freedom to share and change
 18 | free software--to make sure the software is free for all its users.
 19 | 
 20 |   This license, the Lesser General Public License, applies to some
 21 | specially designated software packages--typically libraries--of the
 22 | Free Software Foundation and other authors who decide to use it.  You
 23 | can use it too, but we suggest you first think carefully about whether
 24 | this license or the ordinary General Public License is the better
 25 | strategy to use in any particular case, based on the explanations below.
 26 | 
 27 |   When we speak of free software, we are referring to freedom of use,
 28 | not price.  Our General Public Licenses are designed to make sure that
 29 | you have the freedom to distribute copies of free software (and charge
 30 | for this service if you wish); that you receive source code or can get
 31 | it if you want it; that you can change the software and use pieces of
 32 | it in new free programs; and that you are informed that you can do
 33 | these things.
 34 | 
 35 |   To protect your rights, we need to make restrictions that forbid
 36 | distributors to deny you these rights or to ask you to surrender these
 37 | rights.  These restrictions translate to certain responsibilities for
 38 | you if you distribute copies of the library or if you modify it.
 39 | 
 40 |   For example, if you distribute copies of the library, whether gratis
 41 | or for a fee, you must give the recipients all the rights that we gave
 42 | you.  You must make sure that they, too, receive or can get the source
 43 | code.  If you link other code with the library, you must provide
 44 | complete object files to the recipients, so that they can relink them
 45 | with the library after making changes to the library and recompiling
 46 | it.  And you must show them these terms so they know their rights.
 47 | 
 48 |   We protect your rights with a two-step method: (1) we copyright the
 49 | library, and (2) we offer you this license, which gives you legal
 50 | permission to copy, distribute and/or modify the library.
 51 | 
 52 |   To protect each distributor, we want to make it very clear that
 53 | there is no warranty for the free library.  Also, if the library is
 54 | modified by someone else and passed on, the recipients should know
 55 | that what they have is not the original version, so that the original
 56 | author's reputation will not be affected by problems that might be
 57 | introduced by others.
 58 | 
 59 |   Finally, software patents pose a constant threat to the existence of
 60 | any free program.  We wish to make sure that a company cannot
 61 | effectively restrict the users of a free program by obtaining a
 62 | restrictive license from a patent holder.  Therefore, we insist that
 63 | any patent license obtained for a version of the library must be
 64 | consistent with the full freedom of use specified in this license.
 65 | 
 66 |   Most GNU software, including some libraries, is covered by the
 67 | ordinary GNU General Public License.  This license, the GNU Lesser
 68 | General Public License, applies to certain designated libraries, and
 69 | is quite different from the ordinary General Public License.  We use
 70 | this license for certain libraries in order to permit linking those
 71 | libraries into non-free programs.
 72 | 
 73 |   When a program is linked with a library, whether statically or using
 74 | a shared library, the combination of the two is legally speaking a
 75 | combined work, a derivative of the original library.  The ordinary
 76 | General Public License therefore permits such linking only if the
 77 | entire combination fits its criteria of freedom.  The Lesser General
 78 | Public License permits more lax criteria for linking other code with
 79 | the library.
 80 | 
 81 |   We call this license the "Lesser" General Public License because it
 82 | does Less to protect the user's freedom than the ordinary General
 83 | Public License.  It also provides other free software developers Less
 84 | of an advantage over competing non-free programs.  These disadvantages
 85 | are the reason we use the ordinary General Public License for many
 86 | libraries.  However, the Lesser license provides advantages in certain
 87 | special circumstances.
 88 | 
 89 |   For example, on rare occasions, there may be a special need to
 90 | encourage the widest possible use of a certain library, so that it becomes
 91 | a de-facto standard.  To achieve this, non-free programs must be
 92 | allowed to use the library.  A more frequent case is that a free
 93 | library does the same job as widely used non-free libraries.  In this
 94 | case, there is little to gain by limiting the free library to free
 95 | software only, so we use the Lesser General Public License.
 96 | 
 97 |   In other cases, permission to use a particular library in non-free
 98 | programs enables a greater number of people to use a large body of
 99 | free software.  For example, permission to use the GNU C Library in
100 | non-free programs enables many more people to use the whole GNU
101 | operating system, as well as its variant, the GNU/Linux operating
102 | system.
103 | 
104 |   Although the Lesser General Public License is Less protective of the
105 | users' freedom, it does ensure that the user of a program that is
106 | linked with the Library has the freedom and the wherewithal to run
107 | that program using a modified version of the Library.
108 | 
109 |   The precise terms and conditions for copying, distribution and
110 | modification follow.  Pay close attention to the difference between a
111 | "work based on the library" and a "work that uses the library".  The
112 | former contains code derived from the library, whereas the latter must
113 | be combined with the library in order to run.
114 | 
115 |                   GNU LESSER GENERAL PUBLIC LICENSE
116 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
117 | 
118 |   0. This License Agreement applies to any software library or other
119 | program which contains a notice placed by the copyright holder or
120 | other authorized party saying it may be distributed under the terms of
121 | this Lesser General Public License (also called "this License").
122 | Each licensee is addressed as "you".
123 | 
124 |   A "library" means a collection of software functions and/or data
125 | prepared so as to be conveniently linked with application programs
126 | (which use some of those functions and data) to form executables.
127 | 
128 |   The "Library", below, refers to any such software library or work
129 | which has been distributed under these terms.  A "work based on the
130 | Library" means either the Library or any derivative work under
131 | copyright law: that is to say, a work containing the Library or a
132 | portion of it, either verbatim or with modifications and/or translated
133 | straightforwardly into another language.  (Hereinafter, translation is
134 | included without limitation in the term "modification".)
135 | 
136 |   "Source code" for a work means the preferred form of the work for
137 | making modifications to it.  For a library, complete source code means
138 | all the source code for all modules it contains, plus any associated
139 | interface definition files, plus the scripts used to control compilation
140 | and installation of the library.
141 | 
142 |   Activities other than copying, distribution and modification are not
143 | covered by this License; they are outside its scope.  The act of
144 | running a program using the Library is not restricted, and output from
145 | such a program is covered only if its contents constitute a work based
146 | on the Library (independent of the use of the Library in a tool for
147 | writing it).  Whether that is true depends on what the Library does
148 | and what the program that uses the Library does.
149 | 
150 |   1. You may copy and distribute verbatim copies of the Library's
151 | complete source code as you receive it, in any medium, provided that
152 | you conspicuously and appropriately publish on each copy an
153 | appropriate copyright notice and disclaimer of warranty; keep intact
154 | all the notices that refer to this License and to the absence of any
155 | warranty; and distribute a copy of this License along with the
156 | Library.
157 | 
158 |   You may charge a fee for the physical act of transferring a copy,
159 | and you may at your option offer warranty protection in exchange for a
160 | fee.
161 | 
162 |   2. You may modify your copy or copies of the Library or any portion
163 | of it, thus forming a work based on the Library, and copy and
164 | distribute such modifications or work under the terms of Section 1
165 | above, provided that you also meet all of these conditions:
166 | 
167 |     a) The modified work must itself be a software library.
168 | 
169 |     b) You must cause the files modified to carry prominent notices
170 |     stating that you changed the files and the date of any change.
171 | 
172 |     c) You must cause the whole of the work to be licensed at no
173 |     charge to all third parties under the terms of this License.
174 | 
175 |     d) If a facility in the modified Library refers to a function or a
176 |     table of data to be supplied by an application program that uses
177 |     the facility, other than as an argument passed when the facility
178 |     is invoked, then you must make a good faith effort to ensure that,
179 |     in the event an application does not supply such function or
180 |     table, the facility still operates, and performs whatever part of
181 |     its purpose remains meaningful.
182 | 
183 |     (For example, a function in a library to compute square roots has
184 |     a purpose that is entirely well-defined independent of the
185 |     application.  Therefore, Subsection 2d requires that any
186 |     application-supplied function or table used by this function must
187 |     be optional: if the application does not supply it, the square
188 |     root function must still compute square roots.)
189 | 
190 | These requirements apply to the modified work as a whole.  If
191 | identifiable sections of that work are not derived from the Library,
192 | and can be reasonably considered independent and separate works in
193 | themselves, then this License, and its terms, do not apply to those
194 | sections when you distribute them as separate works.  But when you
195 | distribute the same sections as part of a whole which is a work based
196 | on the Library, the distribution of the whole must be on the terms of
197 | this License, whose permissions for other licensees extend to the
198 | entire whole, and thus to each and every part regardless of who wrote
199 | it.
200 | 
201 | Thus, it is not the intent of this section to claim rights or contest
202 | your rights to work written entirely by you; rather, the intent is to
203 | exercise the right to control the distribution of derivative or
204 | collective works based on the Library.
205 | 
206 | In addition, mere aggregation of another work not based on the Library
207 | with the Library (or with a work based on the Library) on a volume of
208 | a storage or distribution medium does not bring the other work under
209 | the scope of this License.
210 | 
211 |   3. You may opt to apply the terms of the ordinary GNU General Public
212 | License instead of this License to a given copy of the Library.  To do
213 | this, you must alter all the notices that refer to this License, so
214 | that they refer to the ordinary GNU General Public License, version 2,
215 | instead of to this License.  (If a newer version than version 2 of the
216 | ordinary GNU General Public License has appeared, then you can specify
217 | that version instead if you wish.)  Do not make any other change in
218 | these notices.
219 | 
220 |   Once this change is made in a given copy, it is irreversible for
221 | that copy, so the ordinary GNU General Public License applies to all
222 | subsequent copies and derivative works made from that copy.
223 | 
224 |   This option is useful when you wish to copy part of the code of
225 | the Library into a program that is not a library.
226 | 
227 |   4. You may copy and distribute the Library (or a portion or
228 | derivative of it, under Section 2) in object code or executable form
229 | under the terms of Sections 1 and 2 above provided that you accompany
230 | it with the complete corresponding machine-readable source code, which
231 | must be distributed under the terms of Sections 1 and 2 above on a
232 | medium customarily used for software interchange.
233 | 
234 |   If distribution of object code is made by offering access to copy
235 | from a designated place, then offering equivalent access to copy the
236 | source code from the same place satisfies the requirement to
237 | distribute the source code, even though third parties are not
238 | compelled to copy the source along with the object code.
239 | 
240 |   5. A program that contains no derivative of any portion of the
241 | Library, but is designed to work with the Library by being compiled or
242 | linked with it, is called a "work that uses the Library".  Such a
243 | work, in isolation, is not a derivative work of the Library, and
244 | therefore falls outside the scope of this License.
245 | 
246 |   However, linking a "work that uses the Library" with the Library
247 | creates an executable that is a derivative of the Library (because it
248 | contains portions of the Library), rather than a "work that uses the
249 | library".  The executable is therefore covered by this License.
250 | Section 6 states terms for distribution of such executables.
251 | 
252 |   When a "work that uses the Library" uses material from a header file
253 | that is part of the Library, the object code for the work may be a
254 | derivative work of the Library even though the source code is not.
255 | Whether this is true is especially significant if the work can be
256 | linked without the Library, or if the work is itself a library.  The
257 | threshold for this to be true is not precisely defined by law.
258 | 
259 |   If such an object file uses only numerical parameters, data
260 | structure layouts and accessors, and small macros and small inline
261 | functions (ten lines or less in length), then the use of the object
262 | file is unrestricted, regardless of whether it is legally a derivative
263 | work.  (Executables containing this object code plus portions of the
264 | Library will still fall under Section 6.)
265 | 
266 |   Otherwise, if the work is a derivative of the Library, you may
267 | distribute the object code for the work under the terms of Section 6.
268 | Any executables containing that work also fall under Section 6,
269 | whether or not they are linked directly with the Library itself.
270 | 
271 |   6. As an exception to the Sections above, you may also combine or
272 | link a "work that uses the Library" with the Library to produce a
273 | work containing portions of the Library, and distribute that work
274 | under terms of your choice, provided that the terms permit
275 | modification of the work for the customer's own use and reverse
276 | engineering for debugging such modifications.
277 | 
278 |   You must give prominent notice with each copy of the work that the
279 | Library is used in it and that the Library and its use are covered by
280 | this License.  You must supply a copy of this License.  If the work
281 | during execution displays copyright notices, you must include the
282 | copyright notice for the Library among them, as well as a reference
283 | directing the user to the copy of this License.  Also, you must do one
284 | of these things:
285 | 
286 |     a) Accompany the work with the complete corresponding
287 |     machine-readable source code for the Library including whatever
288 |     changes were used in the work (which must be distributed under
289 |     Sections 1 and 2 above); and, if the work is an executable linked
290 |     with the Library, with the complete machine-readable "work that
291 |     uses the Library", as object code and/or source code, so that the
292 |     user can modify the Library and then relink to produce a modified
293 |     executable containing the modified Library.  (It is understood
294 |     that the user who changes the contents of definitions files in the
295 |     Library will not necessarily be able to recompile the application
296 |     to use the modified definitions.)
297 | 
298 |     b) Use a suitable shared library mechanism for linking with the
299 |     Library.  A suitable mechanism is one that (1) uses at run time a
300 |     copy of the library already present on the user's computer system,
301 |     rather than copying library functions into the executable, and (2)
302 |     will operate properly with a modified version of the library, if
303 |     the user installs one, as long as the modified version is
304 |     interface-compatible with the version that the work was made with.
305 | 
306 |     c) Accompany the work with a written offer, valid for at
307 |     least three years, to give the same user the materials
308 |     specified in Subsection 6a, above, for a charge no more
309 |     than the cost of performing this distribution.
310 | 
311 |     d) If distribution of the work is made by offering access to copy
312 |     from a designated place, offer equivalent access to copy the above
313 |     specified materials from the same place.
314 | 
315 |     e) Verify that the user has already received a copy of these
316 |     materials or that you have already sent this user a copy.
317 | 
318 |   For an executable, the required form of the "work that uses the
319 | Library" must include any data and utility programs needed for
320 | reproducing the executable from it.  However, as a special exception,
321 | the materials to be distributed need not include anything that is
322 | normally distributed (in either source or binary form) with the major
323 | components (compiler, kernel, and so on) of the operating system on
324 | which the executable runs, unless that component itself accompanies
325 | the executable.
326 | 
327 |   It may happen that this requirement contradicts the license
328 | restrictions of other proprietary libraries that do not normally
329 | accompany the operating system.  Such a contradiction means you cannot
330 | use both them and the Library together in an executable that you
331 | distribute.
332 | 
333 |   7. You may place library facilities that are a work based on the
334 | Library side-by-side in a single library together with other library
335 | facilities not covered by this License, and distribute such a combined
336 | library, provided that the separate distribution of the work based on
337 | the Library and of the other library facilities is otherwise
338 | permitted, and provided that you do these two things:
339 | 
340 |     a) Accompany the combined library with a copy of the same work
341 |     based on the Library, uncombined with any other library
342 |     facilities.  This must be distributed under the terms of the
343 |     Sections above.
344 | 
345 |     b) Give prominent notice with the combined library of the fact
346 |     that part of it is a work based on the Library, and explaining
347 |     where to find the accompanying uncombined form of the same work.
348 | 
349 |   8. You may not copy, modify, sublicense, link with, or distribute
350 | the Library except as expressly provided under this License.  Any
351 | attempt otherwise to copy, modify, sublicense, link with, or
352 | distribute the Library is void, and will automatically terminate your
353 | rights under this License.  However, parties who have received copies,
354 | or rights, from you under this License will not have their licenses
355 | terminated so long as such parties remain in full compliance.
356 | 
357 |   9. You are not required to accept this License, since you have not
358 | signed it.  However, nothing else grants you permission to modify or
359 | distribute the Library or its derivative works.  These actions are
360 | prohibited by law if you do not accept this License.  Therefore, by
361 | modifying or distributing the Library (or any work based on the
362 | Library), you indicate your acceptance of this License to do so, and
363 | all its terms and conditions for copying, distributing or modifying
364 | the Library or works based on it.
365 | 
366 |   10. Each time you redistribute the Library (or any work based on the
367 | Library), the recipient automatically receives a license from the
368 | original licensor to copy, distribute, link with or modify the Library
369 | subject to these terms and conditions.  You may not impose any further
370 | restrictions on the recipients' exercise of the rights granted herein.
371 | You are not responsible for enforcing compliance by third parties with
372 | this License.
373 | 
374 |   11. If, as a consequence of a court judgment or allegation of patent
375 | infringement or for any other reason (not limited to patent issues),
376 | conditions are imposed on you (whether by court order, agreement or
377 | otherwise) that contradict the conditions of this License, they do not
378 | excuse you from the conditions of this License.  If you cannot
379 | distribute so as to satisfy simultaneously your obligations under this
380 | License and any other pertinent obligations, then as a consequence you
381 | may not distribute the Library at all.  For example, if a patent
382 | license would not permit royalty-free redistribution of the Library by
383 | all those who receive copies directly or indirectly through you, then
384 | the only way you could satisfy both it and this License would be to
385 | refrain entirely from distribution of the Library.
386 | 
387 | If any portion of this section is held invalid or unenforceable under any
388 | particular circumstance, the balance of the section is intended to apply,
389 | and the section as a whole is intended to apply in other circumstances.
390 | 
391 | It is not the purpose of this section to induce you to infringe any
392 | patents or other property right claims or to contest validity of any
393 | such claims; this section has the sole purpose of protecting the
394 | integrity of the free software distribution system which is
395 | implemented by public license practices.  Many people have made
396 | generous contributions to the wide range of software distributed
397 | through that system in reliance on consistent application of that
398 | system; it is up to the author/donor to decide if he or she is willing
399 | to distribute software through any other system and a licensee cannot
400 | impose that choice.
401 | 
402 | This section is intended to make thoroughly clear what is believed to
403 | be a consequence of the rest of this License.
404 | 
405 |   12. If the distribution and/or use of the Library is restricted in
406 | certain countries either by patents or by copyrighted interfaces, the
407 | original copyright holder who places the Library under this License may add
408 | an explicit geographical distribution limitation excluding those countries,
409 | so that distribution is permitted only in or among countries not thus
410 | excluded.  In such case, this License incorporates the limitation as if
411 | written in the body of this License.
412 | 
413 |   13. The Free Software Foundation may publish revised and/or new
414 | versions of the Lesser General Public License from time to time.
415 | Such new versions will be similar in spirit to the present version,
416 | but may differ in detail to address new problems or concerns.
417 | 
418 | Each version is given a distinguishing version number.  If the Library
419 | specifies a version number of this License which applies to it and
420 | "any later version", you have the option of following the terms and
421 | conditions either of that version or of any later version published by
422 | the Free Software Foundation.  If the Library does not specify a
423 | license version number, you may choose any version ever published by
424 | the Free Software Foundation.
425 | 
426 |   14. If you wish to incorporate parts of the Library into other free
427 | programs whose distribution conditions are incompatible with these,
428 | write to the author to ask for permission.  For software which is
429 | copyrighted by the Free Software Foundation, write to the Free
430 | Software Foundation; we sometimes make exceptions for this.  Our
431 | decision will be guided by the two goals of preserving the free status
432 | of all derivatives of our free software and of promoting the sharing
433 | and reuse of software generally.
434 | 
435 |                             NO WARRANTY
436 | 
437 |   15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
438 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
439 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
440 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
441 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
442 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
443 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
444 | LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
445 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
446 | 
447 |   16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
448 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
449 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
450 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
451 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
452 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
453 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
454 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
455 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
456 | DAMAGES.
457 | 
458 |                      END OF TERMS AND CONDITIONS
459 | 
460 |            How to Apply These Terms to Your New Libraries
461 | 
462 |   If you develop a new library, and you want it to be of the greatest
463 | possible use to the public, we recommend making it free software that
464 | everyone can redistribute and change.  You can do so by permitting
465 | redistribution under these terms (or, alternatively, under the terms of the
466 | ordinary General Public License).
467 | 
468 |   To apply these terms, attach the following notices to the library.  It is
469 | safest to attach them to the start of each source file to most effectively
470 | convey the exclusion of warranty; and each file should have at least the
471 | "copyright" line and a pointer to where the full notice is found.
472 | 
473 |     <one line to give the library's name and a brief idea of what it does.>
474 |     Copyright (C) <year>  <name of author>
475 | 
476 |     This library is free software; you can redistribute it and/or
477 |     modify it under the terms of the GNU Lesser General Public
478 |     License as published by the Free Software Foundation; either
479 |     version 2.1 of the License, or (at your option) any later version.
480 | 
481 |     This library is distributed in the hope that it will be useful,
482 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
483 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
484 |     Lesser General Public License for more details.
485 | 
486 |     You should have received a copy of the GNU Lesser General Public
487 |     License along with this library; if not, write to the Free Software
488 |     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
489 | 
490 | Also add information on how to contact you by electronic and paper mail.
491 | 
492 | You should also get your employer (if you work as a programmer) or your
493 | school, if any, to sign a "copyright disclaimer" for the library, if
494 | necessary.  Here is a sample; alter the names:
495 | 
496 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the
497 |   library `Frob' (a library for tweaking knobs) written by James Random Hacker.
498 | 
499 |   <signature of Ty Coon>, 1 April 1990
500 |   Ty Coon, President of Vice
501 | 
502 | That's all there is to it!


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include src/peakfinder8_extension/peakfinder8.hh


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # diffractem
 2 | 
 3 | Pre-processing software for serial electron diffraction (SerialED) data.
 4 | See https://doi.org/10.1101/682575 for example results.
 5 | 
 6 | Diffractem is intended for usage within Jupyter notebooks - get a set of examples here: https://github.com/robertbuecker/serialed-examples.
 7 | 
 8 | ## Installation
 9 | _diffractem_ is tailored to pre-processing SerialED data primarily for crystallographic analysis using _CrystFEL_, version 0.10.0 or higher: `https://www.desy.de/~twhite/crystfel/index.html`.
10 | To make most of _diffractem_'s functionality, if you do not have it already, please download and install _CrystFEL_ following the installation instructions given on its homepage.
11 | During the build process of _CrystFEL_ using _meson_, the _pinkIndexer_ component will automatically be downloaded and installed.
12 | 
13 | ### Create conda enivronment
14 | We _strongly_ suggest to use the Anaconda3 Python distribution/package manager, and create a dedicated environment within it for diffractem.
15 | If you do not have Anaconda installed, it is sufficient to obtain the minimal _Miniconda_  of the `conda` package manager at https://docs.conda.io/en/latest/miniconda.
16 | 
17 | Once installed, please create a new anaconda environment for diffractem, and activate it:
18 | ```
19 | conda create -n diffractem -c conda-forge python=3.10 numpy scipy pandas dask distributed jupyterlab ipywidgets ipympl tifffile h5py
20 | conda activate diffractem
21 | ```
22 | 
23 | ### Install diffractem
24 | Finally install diffractem itself, either from PyPi:
25 | ```
26 | pip install diffractem
27 | ```
28 | or, if you want to play/develop a bit more and stay up-to-date, you can clone this git repository and install diffractem in developer mode:
29 | ```
30 | git clone https://github.com/robertbuecker/diffractem
31 | cd diffractem
32 | pip install -e .
33 | ```
34 | 
35 | Now you should be ready to go! To get started, why don't you download the example notebooks:
36 | ```
37 | git clone https://github.com/robertbuecker/serialed-examples
38 | ```
39 | And get example raw data at MPDL Edmond: https://edmond.mpdl.mpg.de/imeji/collection/32lI6YJ7DZaF5L_K.
40 | 
41 | And when you're ready to go: just make your own branches of the notebooks for your own projects, and have fun!
42 | 
43 | ---
44 | diffractem, (C) 2019-2022 Robert Bücker, robert.buecker@rigaku.com
45 | 
46 | peakfinder8, (C) 2014-2019 Deutsches Elektronen-Synchrotron DESY
47 | 


--------------------------------------------------------------------------------
/bin/edview.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import hdf5plugin
  3 | from diffractem.stream_parser import StreamParser
  4 | from diffractem.dataset import Dataset
  5 | from diffractem.proc_peaks import get_pk_data
  6 | import argparse
  7 | import pandas as pd
  8 | import numpy as np
  9 | import h5py
 10 | import pyqtgraph as pg
 11 | from PyQt5 import QtGui, QtWidgets, QtCore
 12 | from PyQt5.QtWidgets import (QPushButton, QSpinBox, QCheckBox,
 13 |                              QTextEdit, QWidget, QApplication, QGridLayout, QTableWidget, QTableWidgetItem)
 14 | from diffractem.adxv import Adxv
 15 | from warnings import warn
 16 | from typing import Optional, Union
 17 | from time import sleep
 18 | 
 19 | # non-trivial detector geometries are currently not supported (licensing trouble)
 20 | # from cfelpyutils.crystfel_utils import load_crystfel_geometry
 21 | # from cfelpyutils.geometry_utils import apply_geometry_to_data, compute_visualization_pix_maps
 22 | 
 23 | pg.setConfigOptions(imageAxisOrder='row-major')
 24 | 
 25 | app = pg.mkQApp()
 26 | 
 27 | class EDViewer(QWidget):
 28 | 
 29 |     def __init__(self, args):
 30 | 
 31 |         super().__init__()
 32 |         self.dataset = Dataset()
 33 |         self.args = args
 34 |         self.data_path = None
 35 |         self.current_shot = pd.Series()
 36 |         self.diff_image = np.empty((0,0))
 37 |         self.map_image = np.empty((0,0))
 38 |         self.init_widgets()
 39 |         self.adxv = None
 40 |         self.geom = None
 41 | 
 42 |         self.read_files()
 43 |         self.switch_shot(0)
 44 | 
 45 |         if self.args.internal:
 46 |             self.hist_img.setLevels(np.quantile(self.diff_image, 0.02), np.quantile(self.diff_image, 0.98))
 47 | 
 48 |         self.update()
 49 | 
 50 |         self.show()
 51 | 
 52 |     def closeEvent(self, a0: QtGui.QCloseEvent) -> None:
 53 |         if not self.args.internal:
 54 |             self.adxv.exit()
 55 |         a0.accept()
 56 | 
 57 |     def read_files(self):
 58 | 
 59 |         file_type = args.filename.rsplit('.', 1)[-1]
 60 | 
 61 |         if file_type == 'stream':
 62 |             print(f'Parsing stream file {args.filename}...')
 63 |             stream = StreamParser(args.filename)
 64 |             # with open('tmp.geom', 'w') as fh:
 65 |             #     fh.write('\n'.join(stream._geometry_string))
 66 |             # self.geom = load_crystfel_geometry('tmp.geom')
 67 |             # os.remove('tmp.geom')
 68 |             # if len(self.geom['panels']) == 1:
 69 |             #     print('Single-panel geometry, so ignoring transforms for now.')
 70 |             #     #TODO make this more elegant, e.g. by overwriting image transform func with identity
 71 |             #     self.geom = None
 72 |             self.geom = None
 73 |             
 74 |             try:
 75 |                 self.data_path = stream.geometry['data']
 76 |             except KeyError:
 77 |                 if args.geometry is None:
 78 |                     raise ValueError('No data location specified in geometry file. Please use -d parameter.')
 79 | 
 80 |             files = sorted(list(stream.shots['file'].unique()))
 81 |             # print('Loading data files found in stream... \n', '\n'.join(files))
 82 |             try:
 83 |                 self.dataset = Dataset.from_files(files, load_tables=False, init_stacks=False, open_stacks=False)
 84 |                 self.dataset.load_tables(features=True)
 85 |                 # print(self.dataset.shots.columns)
 86 |                 self.dataset.merge_stream(stream)
 87 |                 # get_selection would not be the right method to call (changes IDs), instead do...
 88 |                 self.dataset._shots = self.dataset._shots.loc[self.dataset._shots.selected,:].reset_index(drop=True)
 89 |                 # TODO get subset for incomplete coverage
 90 |                 print('Merged stream and hdf5 shot lists')
 91 |             except Exception as err:
 92 |                 self.dataset = Dataset()
 93 |                 self.dataset._shots = stream.shots
 94 |                 self.dataset._peaks = stream.peaks
 95 |                 self.dataset._predict = stream.indexed
 96 |                 self.dataset._shots['selected'] = True
 97 |                 print('Could not load shot lists from H5 files, but have that from the stream file.')
 98 |                 print(f'Reason: {err}')
 99 | 
100 |         if args.geometry is not None:
101 |             raise ValueError('Geometry files are currently not supported.')
102 |             # self.geom = load_crystfel_geometry(args.geometry)
103 | 
104 |         if file_type in ['lst', 'h5', 'hdf', 'nxs']:
105 |             self.dataset = Dataset.from_list(args.filename, load_tables=True, init_stacks=False, open_stacks=False)
106 |             if not self.dataset.shots.selected.all():
107 |                 # dirty removal of unwanted shots is sufficient in this case:
108 |                 self.dataset._shots = self.dataset._shots.loc[self.dataset._shots.selected,:].reset_index(drop=True)
109 | 
110 |         if args.data_path is not None:
111 |             self.data_path = args.data_path
112 | 
113 |         if self.data_path is None:
114 |             # data path neither set via stream file, nor explicitly. We have to guess.
115 |             try:
116 |                 with h5py.File(self.dataset.shots.file.iloc[0], 'r') as fh:
117 |                     base = '/%/data'.replace('%', self.dataset.shots.subset.iloc[0])
118 |                     self.data_path = '/%/data/' + fh[base].attrs['signal']
119 |                 print('Found data path', self.data_path)
120 |             except Exception as err:
121 |                 warn(str(err), RuntimeWarning)
122 |                 print('Could not find out data path. Assuming /%/data/raw_counts')
123 |                 self.data_path = '/%/data/raw_counts'
124 | 
125 |         if self.args.query:
126 |             print('Only showing shots with', self.args.query)
127 |             #self.dataset.select(self.args.query)
128 |             #self.dataset = self.dataset.get_selection(self.args.query, file_suffix=None, reset_id=False)
129 |             #print('cutting shot list only')
130 |             self.dataset._shots = self.dataset._shots.query(args.query)
131 | 
132 |         if self.args.sort_crystals:
133 |             print('Re-sorting shots by region/crystal/run.')
134 |             self.dataset._shots = self.dataset._shots.sort_values(by=['sample', 'region', 'crystal_id', 'run'])
135 | 
136 |         if not self.args.internal:
137 |             #adxv_args = {'wavelength': 0.0251, 'distance': 2280, 'pixelsize': 0.055}
138 |             adxv_args = {}
139 |             self.adxv = Adxv(hdf5_path=self.data_path.replace('%', 'entry'),
140 |                              adxv_bin=self.args.adxv_bin, **adxv_args)
141 | 
142 |         self.b_goto.setMaximum(self.dataset.shots.shape[0]-1)
143 |         self.b_goto.setMinimum(0)
144 | 
145 |     def update_image(self):
146 |         print(self.current_shot)
147 |         with h5py.File(self.current_shot['file'], mode='r') as f:
148 | 
149 |             if self.args.internal:
150 |                 path = self.data_path.replace('%', self.current_shot.subset)
151 |                 print('Loading {}:{} from {}'.format(path,
152 |                                                      self.current_shot['shot_in_subset'], self.current_shot['file']))                
153 |                 if len(f[path].shape) == 3:
154 |                     self.diff_image = f[path][int(self.current_shot['shot_in_subset']), ...]
155 |                 elif len(f[path].shape) == 2:
156 |                     self.diff_image = f[path][:]
157 |                     
158 |                 self.diff_image[np.isnan(self.diff_image)] = 0
159 |                 self.hist_img.setHistogramRange(np.partition(self.diff_image.flatten(), 100)[100], np.partition(self.diff_image.flatten(), -100)[-100])
160 | 
161 |                 levels = self.hist_img.getLevels()
162 |                 # levels = (max(levels[0], -1), levels[1])
163 |                 levels = (levels[0], levels[1])
164 |                 if self.geom is not None:
165 |                     raise RuntimeError('This should not happen')
166 |                     # self.diff_image = apply_geometry_to_data(self.diff_image, self.geom)
167 |                 self.img.setImage(self.diff_image, autoRange=False)
168 |                 
169 |                 self.img.setLevels(levels)
170 |                 self.hist_img.setLevels(levels[0], levels[1])
171 | 
172 |             if not self.args.no_map:
173 |                 try:
174 |                     path = args.map_path.replace('%', self.current_shot['subset'])
175 |                     self.map_image = f[path][...]
176 |                     self.mapimg.setImage(self.map_image)
177 |                 except KeyError:
178 |                      warn('No map found at {}!'.format(path), Warning)
179 | 
180 |         if not self.args.internal:
181 |             self.adxv.load_image(self.current_shot.file)
182 |             self.adxv.slab(self.current_shot.shot_in_subset + 1)
183 | 
184 |     def update_plot(self):
185 | 
186 |         allpk = []
187 | 
188 |         if self.b_peaks.isChecked():
189 |             
190 |             if (len(self.dataset.peaks) == 0) or args.cxi_peaks:
191 |                 path = args.cxi_peaks_path.replace('%', self.current_shot.subset)
192 |                 print('Loading CXI peaks of {}:{} from {}'.format(path,
193 |                                                         self.current_shot['shot_in_subset'], self.current_shot['file']))     
194 |                 with h5py.File(self.current_shot.file) as fh:
195 |                     ii = int(self.current_shot['shot_in_subset'])
196 |                     Npk = fh[path + '/nPeaks'][ii]
197 |                     x = fh[path + '/peakXPosRaw'][ii, :Npk]
198 |                     y = fh[path + '/peakYPosRaw'][ii, :Npk]
199 | 
200 |                 peaks = pd.DataFrame((x, y), index=['fs/px', 'ss/px']).T
201 | 
202 |             else:
203 |                 peaks = self.dataset.peaks.loc[(self.dataset.peaks.file == self.current_shot.file)
204 |                                            & (self.dataset.peaks.Event == self.current_shot.Event),
205 |                                            ['fs/px', 'ss/px']] - 0.5
206 |                 x = peaks.loc[:,'fs/px']
207 |                 y = peaks.loc[:,'ss/px']
208 | 
209 |             if self.geom is not None:
210 |                 raise RuntimeError('Someone set geom to something. This should not happen.')
211 |                 # print('Projecting peaks...')
212 |                 # maps = compute_visualization_pix_maps(self.geom)
213 |                 # x = maps.x[y.astype(int), x.astype(int)]
214 |                 # y = maps.y[y.astype(int), x.astype(int)]
215 | 
216 |             if self.args.internal:
217 |                 ring_pen = pg.mkPen('g', width=0.8)
218 |                 self.found_peak_canvas.setData(x, y,
219 |                 symbol='o', size=13, pen=ring_pen, brush=(0, 0, 0, 0), antialias=True)
220 |             else:
221 |                 allpk.append(peaks.assign(group=0))
222 | 
223 |         else:
224 |             self.found_peak_canvas.clear()
225 | 
226 |         if self.b_pred.isChecked() and (self.dataset.predict.shape[0] > 0):
227 |             
228 |             pred = self.dataset.predict.loc[(self.dataset.predict.file == self.current_shot.file)
229 |                                            & (self.dataset.predict.Event == self.current_shot.Event),
230 |                                            ['fs/px', 'ss/px']] - 0.5
231 | 
232 |             if self.geom is not None:
233 |                 raise RuntimeError('Someone set geom to not None. This should not happen.')
234 |                 # print('Projecting predictions...')
235 |                 # maps = compute_visualization_pix_maps(self.geom)
236 |                 # x = maps.x[pred.loc[:,'ss/px'].astype(int),
237 |                 #     pred.loc[:,'fs/px'].astype(int)]
238 |                 # y = maps.y[pred.loc[:,'ss/px'].astype(int),
239 |                 #     pred.loc[:,'fs/px'].astype(int)]
240 |             else:
241 |                 x = pred.loc[:,'fs/px']
242 |                 y = pred.loc[:,'ss/px']
243 | 
244 |             if self.args.internal:
245 |                 square_pen = pg.mkPen('r', width=0.8)
246 |                 self.predicted_peak_canvas.setData(x, y,
247 |                                               symbol='s', size=13, pen=square_pen, brush=(0, 0, 0, 0), antialias=True)
248 |             else:
249 |                 allpk.append(pred.assign(group=1))
250 | 
251 |         else:
252 |             self.predicted_peak_canvas.clear()
253 | 
254 |         if not self.args.internal and len(allpk) > 0:
255 |             self.adxv.define_spot('green', 5, 0, 0)
256 |             self.adxv.define_spot('red', 0, 10, 1)
257 |             self.adxv.load_spots(pd.concat(allpk, axis=0, ignore_index=True).values)
258 |         elif not self.args.internal:
259 |             self.adxv.load_spots(np.empty((0,3)))
260 | 
261 |         if self.dataset.features.shape[0] > 0:
262 |             ring_pen = pg.mkPen('g', width=2)
263 |             dot_pen = pg.mkPen('y', width=0.5)
264 | 
265 |             region_feat = self.dataset.features.loc[(self.dataset.features['region'] == self.current_shot['region'])
266 |                                                & (self.dataset.features['sample'] == self.current_shot['sample'])]
267 |             
268 |             print('Number of region features:', region_feat.shape[0])
269 | 
270 |             if self.current_shot['crystal_id'] != -1:
271 |                 single_feat = region_feat.loc[region_feat['crystal_id'] == self.current_shot['crystal_id'], :]
272 |                 x0 = single_feat['crystal_x'].squeeze()
273 |                 y0 = single_feat['crystal_y'].squeeze()
274 |                 if self.b_locations.isChecked():
275 |                     self.found_features_canvas.setData(region_feat['crystal_x'], region_feat['crystal_y'],
276 |                                                 symbol='+', size=7, pen=dot_pen, brush=(0, 0, 0, 0), pxMode=True)
277 |                 else:
278 |                     self.found_features_canvas.clear()
279 | 
280 |                 if self.b_zoom.isChecked():
281 |                     self.map_box.setRange(xRange=(x0 - 5 * args.beam_diam, x0 + 5 * args.beam_diam),
282 |                                      yRange=(y0 - 5 * args.beam_diam, y0 + 5 * args.beam_diam))
283 |                     self.single_feature_canvas.setData([x0], [y0],
284 |                                                   symbol='o', size=args.beam_diam, pen=ring_pen,
285 |                                                   brush=(0, 0, 0, 0), pxMode=False)
286 |                     try:
287 |                         c_real = np.cross([self.current_shot.astar_x, self.current_shot.astar_y, self.current_shot.astar_z],
288 |                                           [self.current_shot.bstar_x, self.current_shot.bstar_y, self.current_shot.bstar_z])
289 |                         b_real = np.cross([self.current_shot.cstar_x, self.current_shot.cstar_y, self.current_shot.cstar_z],
290 |                                           [self.current_shot.astar_x, self.current_shot.astar_y, self.current_shot.astar_z])
291 |                         a_real = np.cross([self.current_shot.bstar_x, self.current_shot.bstar_y, self.current_shot.bstar_z],
292 |                                           [self.current_shot.cstar_x, self.current_shot.cstar_y, self.current_shot.cstar_z])
293 |                         a_real = 20 * a_real / np.sum(a_real ** 2) ** .5
294 |                         b_real = 20 * b_real / np.sum(b_real ** 2) ** .5
295 |                         c_real = 20 * c_real / np.sum(c_real ** 2) ** .5
296 |                         self.a_dir.setData(x=x0 + np.array([0, a_real[0]]), y=y0 + np.array([0, a_real[1]]))
297 |                         self.b_dir.setData(x=x0 + np.array([0, b_real[0]]), y=y0 + np.array([0, b_real[1]]))
298 |                         self.c_dir.setData(x=x0 + np.array([0, c_real[0]]), y=y0 + np.array([0, c_real[1]]))
299 |                     except:
300 |                         print('Could not read lattice vectors.')
301 |                 else:
302 |                     self.single_feature_canvas.setData([x0], [y0],
303 |                                                   symbol='o', size=13, pen=ring_pen, brush=(0, 0, 0, 0), pxMode=True)
304 |                     self.map_box.setRange(xRange=(0, self.map_image.shape[1]), yRange=(0, self.map_image.shape[0]))
305 | 
306 | 
307 | 
308 |             else:
309 |                 self.single_feature_canvas.setData([], [])
310 | 
311 |     def update(self):
312 | 
313 |         self.found_peak_canvas.clear()
314 |         self.predicted_peak_canvas.clear()
315 |         app.processEvents()
316 | 
317 |         self.update_image()   
318 |         if args.cxi_peaks and not args.internal:
319 |             # give adxv some time to display the image before accessing the CXI data
320 |             sleep(0.2)
321 |         self.update_plot()
322 | 
323 |         print(self.current_shot)
324 | 
325 |     # CALLBACK FUNCTIONS
326 | 
327 |     def switch_shot(self, shot_id=None):
328 |         if shot_id is None:
329 |             shot_id = self.b_goto.value()
330 | 
331 |         self.shot_id = max(0, shot_id % self.dataset.shots.shape[0])
332 |         self.current_shot = self.dataset.shots.iloc[self.shot_id, :]
333 |         self.meta_table.setRowCount(self.current_shot.shape[0])
334 |         self.meta_table.setColumnCount(2)
335 | 
336 |         for row, (k, v) in enumerate(self.current_shot.items()):
337 |             self.meta_table.setItem(row, 0, QTableWidgetItem(k))
338 |             self.meta_table.setItem(row, 1, QTableWidgetItem(str(v)))
339 | 
340 |         self.meta_table.resizeRowsToContents()
341 | 
342 |         shot = self.current_shot
343 |         title = {'sample': '', 'region': 'Reg', 'feature': 'Feat', 'frame': 'Frame', 'event': 'Ev', 'file': ''}
344 |         titlestr = ''
345 |         for k, v in title.items():
346 |             titlestr += f'{v} {shot[k]}' if k in shot.keys() else ''
347 |         titlestr += f' ({shot.name} of {self.dataset.shots.shape[0]})'
348 |         print(titlestr)
349 | 
350 |         self.setWindowTitle(titlestr)
351 | 
352 |         self.b_goto.blockSignals(True)
353 |         self.b_goto.setValue(self.shot_id)
354 |         self.b_goto.blockSignals(False)
355 | 
356 |         self.update()
357 | 
358 |     def switch_shot_rel(self, shift):
359 |         self.switch_shot(self.shot_id + shift)
360 | 
361 |     def mouse_moved(self, evt):
362 |         mousePoint = self.img.mapFromDevice(evt[0])
363 |         x, y = round(mousePoint.x()), round(mousePoint.y())
364 |         x = min(max(0, x), self.diff_image.shape[1] - 1)
365 |         y = min(max(0, y), self.diff_image.shape[0] - 1)
366 |         I = self.diff_image[y, x]
367 |         #print(x, y, I)
368 |         self.info_text.setPos(x, y)
369 |         self.info_text.setText(f'{x:0.1f}, {y:0.1f}: {I:0.1f}')
370 | 
371 |     def init_widgets(self):
372 | 
373 |         self.imageWidget = pg.GraphicsLayoutWidget()
374 | 
375 |         # IMAGE DISPLAY
376 | 
377 |         # A plot area (ViewBox + axes) for displaying the image
378 |         self.image_box = self.imageWidget.addViewBox()
379 |         self.image_box.setAspectLocked()
380 | 
381 |         self.img = pg.ImageItem()
382 |         self.img.setZValue(0)
383 |         self.image_box.addItem(self.img)
384 |         self.proxy = pg.SignalProxy(self.img.scene().sigMouseMoved, rateLimit=60, slot=self.mouse_moved)
385 | 
386 |         self.found_peak_canvas = pg.ScatterPlotItem()
387 |         self.image_box.addItem(self.found_peak_canvas)
388 |         self.found_peak_canvas.setZValue(2)
389 |         self.found_peak_canvas.sigClicked.connect(self.onPeakClick)
390 | 
391 |         self.predicted_peak_canvas = pg.ScatterPlotItem()
392 |         self.image_box.addItem(self.predicted_peak_canvas)
393 |         self.predicted_peak_canvas.setZValue(2)
394 |         self.predicted_peak_canvas.sigClicked.connect(self.onPredictionClick)
395 | 
396 |         self.info_text = pg.TextItem(text='')
397 |         self.image_box.addItem(self.info_text)
398 |         self.info_text.setPos(0, 0)
399 | 
400 |         # Contrast/color control
401 |         self.hist_img = pg.HistogramLUTItem(self.img, fillHistogram=False)
402 |         self.imageWidget.addItem(self.hist_img)
403 | 
404 |         # MAP DISPLAY
405 | 
406 |         self.map_widget = pg.GraphicsLayoutWidget()
407 |         self.map_widget.setWindowTitle('region map')
408 | 
409 |         # Map image control
410 |         self.map_box = self.map_widget.addViewBox()
411 |         self.map_box.setAspectLocked()
412 | 
413 |         self.mapimg = pg.ImageItem()
414 |         self.mapimg.setZValue(0)
415 |         self.map_box.addItem(self.mapimg)
416 | 
417 |         self.found_features_canvas = pg.ScatterPlotItem()
418 |         self.map_box.addItem(self.found_features_canvas)
419 |         self.found_features_canvas.setZValue(2)
420 | 
421 |         self.single_feature_canvas = pg.ScatterPlotItem()
422 |         self.map_box.addItem(self.single_feature_canvas)
423 |         self.single_feature_canvas.setZValue(2)
424 | 
425 |         # lattice vectors
426 |         self.a_dir = pg.PlotDataItem(pen=pg.mkPen('r', width=1))
427 |         self.b_dir = pg.PlotDataItem(pen=pg.mkPen('g', width=1))
428 |         self.c_dir = pg.PlotDataItem(pen=pg.mkPen('b', width=1))
429 |         self.map_box.addItem(self.a_dir)
430 |         self.map_box.addItem(self.b_dir)
431 |         self.map_box.addItem(self.c_dir)
432 | 
433 |         # Contrast/color control
434 |         self.hist_map = pg.HistogramLUTItem(self.mapimg)
435 |         self.map_widget.addItem(self.hist_map)
436 | 
437 |         ### CONTROl BUTTONS
438 | 
439 |         b_rand = QPushButton('rnd')
440 |         b_plus10 = QPushButton('+10')
441 |         b_minus10 = QPushButton('-10')
442 |         b_last = QPushButton('last')
443 |         self.b_peaks = QCheckBox('peaks')
444 |         self.b_pred = QCheckBox('crystal')
445 |         self.b_zoom = QCheckBox('zoom')
446 |         self.b_locations = QCheckBox('locations')
447 |         self.b_locations.setChecked(True)
448 |         b_reload = QPushButton('reload')
449 |         self.b_goto = QSpinBox()
450 | 
451 |         b_rand.clicked.connect(lambda: self.switch_shot(np.random.randint(0, self.dataset.shots.shape[0] - 1)))
452 |         b_plus10.clicked.connect(lambda: self.switch_shot_rel(+10))
453 |         b_minus10.clicked.connect(lambda: self.switch_shot_rel(-10))
454 |         b_last.clicked.connect(lambda: self.switch_shot(self.dataset.shots.index.max()))
455 |         self.b_peaks.stateChanged.connect(self.update)
456 |         self.b_pred.stateChanged.connect(self.update)
457 |         self.b_zoom.stateChanged.connect(self.update)
458 |         self.b_locations.stateChanged.connect(self.update)
459 |         b_reload.clicked.connect(lambda: self.read_files())
460 |         self.b_goto.valueChanged.connect(lambda: self.switch_shot(None))
461 | 
462 |         self.button_layout = QtGui.QGridLayout()
463 |         self.button_layout.addWidget(b_plus10, 0, 2)
464 |         self.button_layout.addWidget(b_minus10, 0, 1)
465 |         self.button_layout.addWidget(b_rand, 0, 4)
466 |         self.button_layout.addWidget(b_last, 0, 3)
467 |         self.button_layout.addWidget(self.b_goto, 0, 0)
468 |         self.button_layout.addWidget(b_reload, 0, 10)
469 |         self.button_layout.addWidget(self.b_peaks, 0, 21)
470 |         self.button_layout.addWidget(self.b_pred, 0, 22)
471 |         self.button_layout.addWidget(self.b_zoom, 0, 23)
472 |         self.button_layout.addWidget(self.b_locations, 0, 24)
473 | 
474 |         self.meta_table = QTableWidget()
475 |         self.meta_table.verticalHeader().setVisible(False)
476 |         self.meta_table.horizontalHeader().setVisible(False)
477 |         self.meta_table.setFont(QtGui.QFont('Helvetica', 10))
478 | 
479 |         # --- TOP-LEVEL ARRANGEMENT
480 |         self.top_layout = QGridLayout()
481 |         self.setLayout(self.top_layout)
482 | 
483 |         if self.args.internal:
484 |             self.top_layout.addWidget(self.imageWidget, 0, 0)
485 |             self.top_layout.setColumnStretch(0, 2)
486 |             
487 |         if not self.args.no_map:
488 |             self.top_layout.addWidget(self.map_widget, 0, 1)
489 |             self.top_layout.setColumnStretch(1, 1.5)
490 |             
491 |         self.top_layout.addWidget(self.meta_table, 0, 2)
492 |         self.top_layout.addLayout(self.button_layout, 1, 0, 1, 3)
493 |         
494 |         self.top_layout.setColumnStretch(2, 0)
495 |         
496 |     def onPeakClick(self, points, ev):
497 |         x, y = np.array([pt.pos().x() for pt in ev]).reshape(1,-1), \
498 |             np.array([pt.pos().y() for pt in ev]).reshape(1,-1)
499 |         n = np.array([len(x)])
500 |         ctr_x, ctr_y = np.array(self.current_shot.center_x).reshape(1), \
501 |             np.array(self.current_shot.center_y).reshape(1)
502 |         #TODO GET THE PROPER VALUES HERE, DUMMY
503 |         cl = 3.06
504 |         px = 55e-6
505 |         pkd = get_pk_data(n, x, y, ctr_x, ctr_y, pxs=px, clen=cl, wl=0.0251)
506 |         print('Clicked peak:\n'
507 |             f'Raw position (px): {pkd["peakXPosRaw"][0,0]:.1f}, {pkd["peakYPosRaw"][0,0]:.1f}\n'
508 |             f'Corrected position (px): {pkd["peakXPosCor"][0,0]:.1f}, {pkd["peakYPosCor"][0,0]:.1f}\n'
509 |             f'Corrected position (mm): {1000*px*pkd["peakXPosCor"][0,0]:.2f}, {1000*px*pkd["peakYPosCor"][0,0]:.2f}\n'
510 |             f'd vector (1/A), azimuth (deg): {pkd["peakD"][0,0]:.2f}, {180/np.pi*pkd["peakAzimuth"][0,0]:.1f}\n')
511 |     
512 |     def onPredictionClick(self, points, ev):
513 |         x, y = np.array([pt.pos().x() for pt in ev]).reshape(1,-1), \
514 |             np.array([pt.pos().y() for pt in ev]).reshape(1,-1)
515 |         n = np.array([len(x)])
516 |         ctr_x, ctr_y = np.array(self.current_shot.center_x).reshape(1), \
517 |             np.array(self.current_shot.center_y).reshape(1)
518 |         #TODO GET THE PROPER VALUES HERE, DUMMY
519 |         cl = 3.06
520 |         px = 55e-6
521 |         pkd = get_pk_data(n, x, y, ctr_x, ctr_y, pxs=px, clen=cl, wl=0.0251)
522 |         print('Clicked prediction:\n'
523 |             'TODO: GET HKL\n'
524 |             f'Raw position (px): {pkd["peakXPosRaw"][0,0]:.1f}, {pkd["peakYPosRaw"][0,0]:.1f}\n'
525 |             f'Corrected position (px): {pkd["peakXPosCor"][0,0]:.1f}, {pkd["peakYPosCor"][0,0]:.1f}\n'
526 |             f'Corrected position (mm): {1000*px*pkd["peakXPosCor"][0,0]:.2f}, {1000*px*pkd["peakYPosCor"][0,0]:.2f}\n'
527 |             f'd vector (1/A), azimuth (deg): {pkd["peakD"][0,0]:.2f}, {180/np.pi*pkd["peakAzimuth"][0,0]:.1f}\n')
528 | 
529 | if __name__ == '__main__':
530 | 
531 |     parser = argparse.ArgumentParser(description='Viewer for Serial Electron Diffraction data')
532 |     parser.add_argument('filename', type=str, help='Stream file, list file, or HDF5')
533 |     parser.add_argument('-g', '--geometry', type=str, help='CrystFEL geometry file, might be helpful')
534 |     parser.add_argument('-q', '--query', type=str, help='Query string to filter shots by column values')
535 |     parser.add_argument('-d', '--data_path', type=str, help='Data field in HDF5 file(s). Defaults to stream file or tries a few.')
536 |     parser.add_argument('--internal', help='Use internal diffraction viewer instead of adxv', action='store_true')
537 |     parser.add_argument('--adxv-bin', help='Location/command string of adxv binary', default='adxv')
538 |     parser.add_argument('--map-path', type=str, help='Path to map image', default='/%/map/image')
539 |     parser.add_argument('--feature-path', type=str, help='Path to map feature table', default='/%/map/features')
540 |     parser.add_argument('--cxi-peaks', help='Prefer CXI-format peaks in HDF5 files over stream/HDF5 table', action='store_true')
541 |     parser.add_argument('--cxi-peaks-path', type=str, help='Path to CXI peaks table', default='/%/data')
542 |     parser.add_argument('--peaks-path', type=str, help='Path to peaks table in HDF5 files', default='/%/results/peaks')
543 |     parser.add_argument('--predict-path', type=str, help='Path to prediction table', default='/%/results/predict')
544 |     parser.add_argument('--no-map', help='Hide map, even if we had it', action='store_true')
545 |     parser.add_argument('--beam-diam', type=int, help='Beam size displayed in real space, in pixels', default=5)
546 |     parser.add_argument('--sort-crystals', help='Sort shots by crystal IDs', action='store_true')
547 | 
548 |     args = parser.parse_args()
549 | 
550 |     # operation modes:
551 |     # (1) file list (+ geometry) + nxs: estimate geometry from nxs if geometry is absent
552 |     # (2) expanded file list (+ geometry) + nxs: first match nxs self.current_shot lists vs expanded file list
553 |     # (3) (expanded) file list + geometry + hdf5: omit map image automatically
554 |     # (4) stream + nxs: as (2), peaks/predict in stream take precedence over nxs
555 |     # (5) stream + hdf5: as (3)
556 | 
557 |     # TODO next: work on read_file
558 |     viewer = EDViewer(args)
559 | 
560 |     import sys
561 |     if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
562 |         app.instance().exec_()
563 | 


--------------------------------------------------------------------------------
/bin/nxs2tif.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from tifffile import imsave
 3 | import h5py
 4 | import sys
 5 | import numpy as np
 6 | 
 7 | fh = h5py.File(sys.argv[1])
 8 | ds = fh['/entry/instrument/detector/data']
 9 | if len(sys.argv) > 2:
10 |     fn = sys.argv[2]
11 | else:
12 |     fn = sys.argv[1].rsplit('.', 1)[0] + '.tif'
13 | if ds.dtype == np.int32:
14 |     ds = ds[:].astype(np.float32)
15 | imsave(fn, ds[:, :, :])
16 | print('Wrote ' + fn)


--------------------------------------------------------------------------------
/diffractem/__init__.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | __all__ = ['compute', 'io', 'proc2d', 'tools', 
 4 |            'map_image', 'pre_proc_opts', 'proc_peaks', 
 5 |            'nexus', 'stream_parser', 'adxv']
 6 | 
 7 | def version():
 8 |     try:
 9 |         with open(__file__.rsplit('/',1)[0] + '/../version.txt') as fh:
10 |             return fh.readline().strip()
11 |     except FileNotFoundError:
12 |         return 'Could not determine diffractem version'
13 | 
14 | 
15 | def gap_pixels(detector='Lambda750k'):
16 |     """Returns the gap pixels of the Lambda detector as binary mask"""
17 |     if detector == 'Lambda750k':
18 |         gaps = np.zeros((516, 1556), dtype=np.bool)
19 |         for k in range(255, 1296, 260):
20 |             gaps[:, k:k+6] = True
21 |         gaps[255:261] = True
22 |     else:
23 |         raise ValueError(f'Unknown detector: {detector}')
24 |     return gaps
25 | 
26 | 
27 | def panel_pix(panel_id=1, pxmask=None, img=None, 
28 |               detector='Lambda750k', include_gap=True):
29 |     
30 |     if detector == 'Lambda750k':
31 |         shape = (1556, 516)
32 |         panel_size = 256 if include_gap else 255
33 |         panel_gap = 4 if include_gap else 6
34 |         cutoff = (60, 0)
35 |         row, col = divmod(panel_id-1, 6)
36 |         if panel_id > 6:
37 |             col = 5-col        
38 |         if panel_id > 12:
39 |             raise ValueError('panel_id cannot be larger than 12')
40 |     else:
41 |         raise ValueError(f'Unknown detector {detector}')
42 |     
43 |     mask = np.zeros((shape[1], shape[0]))
44 |     #print(row,col)
45 |     cstart = col*(panel_size + panel_gap)
46 |     rstart = row*(panel_size + panel_gap)
47 |     mask[rstart:rstart+panel_size, cstart:cstart+panel_size] = 1
48 |     mask[:(cutoff[1]+1), :(cutoff[0]+1)] = 0
49 |     mask[-(cutoff[1]+1):, -(cutoff[0]+1):] = 0
50 |     if pxmask is not None:
51 |         mask = mask - pxmask
52 |     if img is None:
53 |         return mask == 1
54 |     else:
55 |         cimg = img[rstart:rstart+panel_size, cstart:cstart+panel_size]
56 |         if pxmask is not None:
57 |             pm = pxmask[rstart:rstart+panel_size, cstart:cstart+panel_size]
58 |         else:
59 |             pm = np.zeros_like(cimg)
60 |         cimg[pm != 0] = -1
61 |         return cimg
62 | 
63 | 
64 | def normalize_names(strin):
65 |     strout = strin
66 |     for character in [' ', '/', '(', ')', '-']:
67 |         strout = strout.replace(character, '_')
68 |     return strout
69 | 
70 | 
71 | def normalize_keys(dictionary):
72 |     d = {}
73 |     for k, v in dictionary.items():
74 |         if isinstance(v, dict):
75 |             d[normalize_names(k)] = normalize_keys(v)
76 |         else:
77 |             d[normalize_names(k)] = v
78 |     return d
79 | 


--------------------------------------------------------------------------------
/diffractem/adxv.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adxv remote control.
  3 | 
  4 | Inspired by:
  5 | https://github.com/erikssod/adxv_class by Daniel Eriksson (MIT license)
  6 | https://github.com/keitaroyam/yamtbx by Keitaro Yamashita (BSD license)
  7 | 
  8 | """
  9 | import socket
 10 | import subprocess
 11 | import time
 12 | import logging
 13 | 
 14 | class Adxv:
 15 | 
 16 |     def __init__(self, adxv_bin=None, hdf5_path='/entry/data/raw_counts', **kwargs):
 17 | 
 18 |         self.logger = logging.getLogger()
 19 |         handler = logging.StreamHandler()
 20 |         handler.setFormatter(
 21 |             logging.Formatter(fmt=('[%(levelname)s] %(name)s ''%(funcName)s | %(message)s')))
 22 |         self.logger.handlers = [handler]
 23 |         self.logger.setLevel('INFO') # or INFO, or DEBUG, etc
 24 | 
 25 |         self.logger = logging.getLogger(__name__)
 26 | 
 27 |         self.adxv_bin = adxv_bin
 28 |         self.adxv_opts = kwargs
 29 | 
 30 |         if self.adxv_bin is None:
 31 |             self.adxv_bin = "adxv"
 32 | 
 33 |         self.hdf5_path = hdf5_path
 34 |         self.adxv_proc = None  # subprocess object
 35 |         self.adxv_port = 8100  # adxv's default port. overridden later.
 36 |         self.sock = None
 37 | 
 38 |         self.spot_type_counter = -1
 39 | 
 40 |     def start(self, cwd=None):
 41 | 
 42 |         if not self.is_alive():
 43 | 
 44 |             # find available port number
 45 |             self.logger.debug('Searching for available port number')
 46 |             sock_test = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 47 |             sock_test.bind(("localhost", 0))
 48 |             self.adxv_port = sock_test.getsockname()[1]
 49 |             sock_test.close()
 50 |             self.logger.debug(f'Port {self.adxv_port} will be used for adxv. Attempting to connect.')
 51 | 
 52 |             # build adxv start command
 53 |             adxv_comm = self.adxv_bin + ' -socket {} -hdf5dataset {}'.format(self.adxv_port, self.hdf5_path)
 54 | 
 55 |             for opt, val in self.adxv_opts.items():
 56 |                 adxv_comm += ' -{} {}'.format(opt, val)
 57 | 
 58 |             # start adxv
 59 |             self.logger.debug(f'adxv command is: \n {adxv_comm}')
 60 |             self.adxv_proc = subprocess.Popen(adxv_comm, shell=True, cwd=cwd)
 61 | 
 62 |             for i in range(10):  # try for 5 seconds.
 63 |                 try:
 64 |                     self.sock = socket.socket(socket.AF_INET,
 65 |                                               socket.SOCK_STREAM)  # On OSX(?), need to re-create object when failed
 66 |                     self.sock.connect(("localhost", self.adxv_port))
 67 |                     self.logger.info('Connected to Port {}'.format(self.adxv_port))
 68 |                     break
 69 |                 except socket.error as err:
 70 |                     self.logger.debug('Waiting for socket connection...')
 71 |                     time.sleep(.5)
 72 |                     continue
 73 | 
 74 |     def is_alive(self):
 75 |         return self.adxv_proc is not None and self.adxv_proc.poll() is None  # None means still running.
 76 | 
 77 |     def send(self, payload):
 78 |         '''
 79 |         Takes command, encodes it, and sends it down the socket.
 80 |         '''
 81 | 
 82 |         self.start()
 83 | 
 84 |         try:
 85 |             self.logger.debug("payload = {}".format(payload))
 86 |             self.sock.sendall(payload.encode())
 87 | 
 88 |         except Exception as e:
 89 |             self.logger.error(e)
 90 | 
 91 |     def load_image(self, image_file: str):
 92 |         '''
 93 |         Load an image file
 94 |         '''
 95 |         payload = 'load_image %s\n' % (image_file)
 96 |         self.send(payload)
 97 | 
 98 |     def raise_window(self, window: str):
 99 |         '''
100 |         Raises a Window. <window> must be one of
101 |         'Control', 'Image', 'Magnify', 'Line', or
102 |         'Load'.
103 |         '''
104 |         payload = 'raise_window %s\n' % (window)
105 |         self.send(payload)
106 | 
107 |     def raise_image(self):
108 |         '''
109 |         Raises image window; see raise_window for
110 |         additional options but this seems like the
111 |         most common one.
112 |         '''
113 |         payload = 'raise_window Image\n'
114 |         self.send(payload)
115 | 
116 |     def save_image(self, path_name_format: str):
117 |         '''
118 |         Save an image file (jpeg or tiff)
119 |         '''
120 |         payload = 'save_image %s\n' % (path_name_format)
121 |         self.send(payload)
122 | 
123 |     def slab(self, N: int):
124 |         '''
125 |         Display slab N
126 |         '''
127 |         payload = 'slab %i\n' % (N)
128 |         self.send(payload)
129 | 
130 |     def set_slab(self, N: int):
131 |         '''
132 |         Same as slab, but don’t load the image
133 |         '''
134 |         payload = 'set_slab %i\n' % (N)
135 |         self.send(payload)
136 | 
137 |     def slabs(self, N: int):
138 |         '''
139 |         Slab thickness to display
140 |         '''
141 |         payload = 'slabs %i\n' % (N)
142 |         self.send(payload)
143 | 
144 |     def set_slabs(self, N: int):
145 |         '''
146 |         Same as slabs, but don’t load the image
147 |         '''
148 |         payload = 'set_slabs %i\n' % (N)
149 |         self.send(payload)
150 | 
151 |     def exit(self):
152 |         '''
153 |         Exit Adxv
154 |         '''
155 |         payload = 'exit\n'
156 |         self.send(payload)
157 | 
158 |     def stride(self, N: int):
159 |         """
160 |         stride - sets Stride in the Load Window
161 |         """
162 |         payload = 'stride %i\n' % (N)
163 |         self.send(payload)
164 | 
165 |     def increment_slabs(self):
166 |         """
167 |         increment_slabs - checks the +Slabs checkbox in the Load Window
168 |         """
169 |         payload = 'increment_slabs\n'
170 |         self.send(payload)
171 | 
172 |     def increment_files(self):
173 |         """
174 |         increment_files - unchecks the +Slabs checkbox in the Load Window
175 |         """
176 |         payload = 'increment_files\n'
177 |         self.send(payload)
178 | 
179 |     def contrast_min(self, N: int):
180 |         """
181 |         contrast_min - sets the min contrast value
182 |         """
183 |         payload = 'contrast_min %i\n' % (N)
184 |         self.send(payload)
185 | 
186 |     def contrast_max(self, N: int):
187 |         """
188 |         contrast_max - sets the max contrast value
189 |         """
190 |         payload = 'contrast_max %i\n' % (N)
191 |         self.send(payload)
192 | 
193 |     def define_spot(self, color, radius=0, box=0, group=None):
194 | 
195 |         if group is None:
196 |             self.spot_type_counter += 1
197 |         else:
198 |             self.spot_type_counter = group
199 | 
200 |         self.send('box %d %d\n' % (box, box))  # seems ignored?
201 |         self.send('define_type %d color %s radius %d\n' % (group, color, radius))
202 | 
203 |         return self.spot_type_counter
204 | 
205 |     def load_spots(self, spots):
206 |         #if len(spots) == 0:
207 |         #    return
208 | 
209 |         self.send("load_spots %d\n" % len(spots))
210 | 
211 |         for x, y, t in spots:
212 |             self.send("%.2f %.2f %d\n" % (x, y, t))
213 | 
214 |         self.send("end_of_pack\n")
215 | 


--------------------------------------------------------------------------------
/diffractem/compute.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import dask.array as da
 3 | 
 4 | 
 5 | def map_reduction_func(imgs, fun, *args, output_len=1, dtype=np.float, **kwargs):
 6 |     """
 7 |     Use dask array map blocks for functions that return a numpy vector of values (e.g. fit functions or 1D profiles)
 8 |     :param imgs: image stack as dask array, stacked along dimension 0
 9 |     :param fun: function to apply, needs to be able to process image stacks
10 |     :param args: positional arguments to be supplied to the function. Note that these have to have three dimensions
11 |     :param output_len: length of output numpy vector
12 |     :param dtype: data type of output numpy vector
13 |     :param kwargs: keyword arguments to be supplied to the function
14 |     :return:
15 |     """
16 | 
17 |     assert isinstance(imgs, da.core.Array)
18 | 
19 |     args_new = []
20 |     for arg in args:
21 |         # broadcasting on arrays works on the last dimension, whereas the stack is in the first. This may cause trouble
22 |         # if a parameter array is 1D or 2D
23 |         if isinstance(arg, da.core.Array) or isinstance(arg, np.ndarray):
24 |             if arg.ndim == 1:
25 |                 #print('upcasting 1D')
26 |                 arg = arg[:, np.newaxis, np.newaxis]
27 |             elif arg.ndim == 2:
28 |                 #print('upcasting 2D')
29 |                 arg = arg[:, :, np.newaxis]
30 |         args_new.append(arg)
31 |     # print(fun)
32 |     # print([type(a) for a in args_new])
33 |     # print({kw: type(v) for kw, v in kwargs.items()})
34 |     out = imgs.map_blocks(fun, *args_new, chunks=(imgs.chunks[0], output_len),
35 |                           drop_axis=(1, 2), new_axis=1, dtype=dtype, **kwargs)
36 |     return out
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/diffractem/io.py:
--------------------------------------------------------------------------------
  1 | import h5py
  2 | import numpy as np
  3 | import pandas as pd
  4 | from dask import array as da
  5 | from collections import defaultdict
  6 | import dask.diagnostics
  7 | import os.path
  8 | from diffractem import normalize_names
  9 | import warnings
 10 | from typing import Union
 11 | from glob import glob
 12 | 
 13 | 
 14 | def expand_files(file_list: Union[str, list], scan_shots=False, validate=False):
 15 | 
 16 |     def remove_bs(fns):
 17 |         return [fn.replace('\\', '/') for fn in fns]
 18 |     
 19 |     if isinstance(file_list, list) or isinstance(file_list, tuple):
 20 |         fl = remove_bs(file_list)
 21 |         if scan_shots:
 22 |             fl = pd.DataFrame(fl, columns=['file'])
 23 | 
 24 |     elif isinstance(file_list, str) and file_list.endswith('.lst'):
 25 |         if scan_shots:
 26 |             fl = pd.read_csv(file_list, sep=' ', header=None, engine='python',
 27 |                              names=['file', 'Event'])
 28 |             fl['file'] = remove_bs(fl['file'])
 29 |             if fl.Event.isna().all():
 30 |                 fl.drop('Event', axis=1, inplace=True)
 31 |         else:
 32 |             fl = []
 33 |             for s in open(file_list, 'r').readlines():
 34 |                 if '//' in s:
 35 |                     raise RuntimeError('Shot identifier found in list file. You may want to set scan_shots=True')
 36 |                 fl.append(s.split(' ', 1)[0].strip())
 37 |             fl = remove_bs(fl)
 38 | 
 39 |     elif isinstance(file_list, str) and (file_list.endswith('.h5') or file_list.endswith('.nxs')):
 40 |         fl = remove_bs(sorted(glob(file_list)))
 41 |         if scan_shots:
 42 |             fl = pd.DataFrame(fl, columns=['file'])
 43 | 
 44 |     else:
 45 |         raise TypeError('file_list must be a list file, single or glob pattern of h5/nxs files, or a list of filenames')
 46 | 
 47 |     if (not scan_shots) and (not len(fl) == len(set(fl))):
 48 |         raise ValueError('File identifiers are not unique, most likely because the file names are not.')
 49 |         
 50 |     if validate:
 51 |         if scan_shots:
 52 |             raise ValueError('Validation is only allowed if scan_shot=False.')
 53 |         valid_files = []
 54 |         for r in fl:
 55 |             try:
 56 |                 with h5py.File(r, 'r') as fh:
 57 |                     
 58 |                     for k in fh.keys():
 59 |                     
 60 |                         if (f'/{k}/shots' in fh) and (f'/{k}/map/features' in fh) and (f'/{k}/data' in fh):
 61 |                             # print(r,': file validated!')
 62 |                             valid_files.append(r)
 63 |                         else:
 64 |                             print(r, k, ': invalid file/subset!')       
 65 |             except (OSError, IOError) as err:
 66 |                 print('Could not open file', r, 'for validation because:')
 67 |                 print(err)
 68 |                     
 69 |         return valid_files
 70 | 
 71 |     else:
 72 |         return fl
 73 | 
 74 | 
 75 | def dict_to_h5(grp, data, exclude=()):
 76 |     """
 77 |     Write dictionary into HDF group (or file) object
 78 |     :param grp: HDF group or file object
 79 |     :param data: dictionary to be written into HDF5
 80 |     :param exclude: dataset or group names to be excluded
 81 |     :return:
 82 |     """
 83 |     for k, v in data.items():
 84 |         nk = normalize_names(k)
 85 |         if k in exclude:
 86 |             continue
 87 |         elif isinstance(v, dict):
 88 |             dict_to_h5(grp.require_group(nk), v, exclude=exclude)
 89 |         else:
 90 |             if nk in grp.keys():
 91 |                 grp[nk][...] = v
 92 |             else:
 93 |                 grp.create_dataset(nk, data=v)
 94 | 
 95 | 
 96 | def h5_to_dict(grp, exclude=('data', 'image'), max_len=100):
 97 |     """
 98 |     Get dictionary from HDF group (or file) object
 99 |     :param grp: HDF group or file
100 |     :param exclude: (sub-)group or dataset names to be excluded; by default 'data' and 'image
101 |     :param max_len: maximum length of data field to be included (along first direction)
102 |     :return: dictionary corresponding to HDF group
103 |     """
104 |     d = {}
105 |     for k, v in grp.items():
106 |         if k in exclude:
107 |             continue
108 |         if isinstance(v, h5py.Group):
109 |             d[k] = h5_to_dict(v, exclude=exclude, max_len=max_len)
110 |         elif isinstance(v, h5py.Dataset):
111 |             if (len(v.shape) > 0) and (len(v) > max_len):
112 |                 print('Skipping', v.shape, len(v), max_len, v)
113 |                 continue
114 |             d[k] = v.value
115 |     return d
116 | 
117 | def make_master_h5(file_list, file_name=None, abs_path=False, local_group='/',
118 |                    remote_group='/entry', verbose=False):
119 |     fns, ids = expand_files(file_list, True)
120 | 
121 |     if isinstance(file_list, str) and file_list.endswith('.lst'):
122 |         if file_name is None:
123 |             file_name = file_list.rsplit('.', 1)[0] + '.h5'
124 |     else:
125 |         if file_name is None:
126 |             raise ValueError('Please provide output file name explicitly, if input is not a file list.')
127 | 
128 |     f = h5py.File(file_name, 'w')
129 | 
130 |     try:
131 | 
132 |         subsets = []
133 | 
134 |         for fn, id in zip(fns, ids):
135 | 
136 |             subset = id
137 | 
138 |             if subset in subsets:
139 |                 raise KeyError('File names are not unique!')
140 |             else:
141 |                 subsets.append(subset)
142 | 
143 |             if abs_path:
144 |                 fn2 = os.getcwd() + '/' + fn
145 |             else:
146 |                 fn2 = fn
147 | 
148 |             if not os.path.isfile(fn2):
149 |                 raise FileNotFoundError(f'File {fn2} present in {file_list} not found!')
150 | 
151 |             if verbose:
152 |                 print(f'Referencing file {fn2} as {subset}')
153 |             if local_group != '/':
154 |                 f.require_group(local_group)
155 | 
156 |             f[local_group + '/' + subset] = h5py.ExternalLink(fn2, remote_group)
157 | 
158 |     except Exception as err:
159 |         f.close()
160 |         os.remove(file_name)
161 |         raise err
162 | 
163 |     f.close()
164 | 
165 |     return file_name
166 | 
167 | 


--------------------------------------------------------------------------------
/diffractem/nexus.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import re
  3 | from collections import defaultdict
  4 | from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, wait, FIRST_EXCEPTION
  5 | from itertools import repeat
  6 | from typing import Union, List, Tuple, Optional
  7 | import os
  8 | import h5py
  9 | import numpy as np
 10 | import pandas as pd
 11 | from warnings import warn
 12 | from .io import expand_files, dict_to_h5
 13 | from distributed.lock import Lock
 14 | 
 15 | 
 16 | def _get_table_from_single_file(fn: str, path: str) -> pd.DataFrame:
 17 |     identifiers = path.rsplit('%', 1)
 18 |     lists = []
 19 |     with h5py.File(fn, 'r') as fh:
 20 | 
 21 |         try:
 22 |             if len(identifiers) == 1:
 23 |                 subsets = ['']
 24 |             else:
 25 |                 subsets = fh[identifiers[0]].keys()
 26 | 
 27 |             for subset in subsets:
 28 |                 tbl_path = path.replace('%', subset)
 29 |                 if tbl_path not in fh:
 30 |                     # warn(f'Group {tbl_path} not found in {fn}.')
 31 |                     raise KeyError(f'Group {tbl_path} not found in {fn}.')
 32 |                     # newlist = None
 33 |                     
 34 |                 if 'pandas_type' in fh[tbl_path].attrs:
 35 |                     warn(f'{fn}:{tbl_path} in Pandas/PyTables format. Please consider converting.', DeprecationWarning)
 36 |                     try:
 37 |                         newlist = pd.read_hdf(fn, tbl_path)
 38 |                     except Exception as err:
 39 |                         print('Tried to load a table in old-style diffractem format (pytables-style):\n'
 40 |                               f'{fn}:{tbl_path} in Pandas/PyTables format.\n'
 41 |                               'For this to work, you need to install the pytables package.\n'
 42 |                               'Also, please consider converting the files to new form.')
 43 |                         raise err
 44 |                 else:
 45 |                     dt = {}
 46 |                     for key, val in fh[tbl_path].items():
 47 |                         if val.ndim != 1:
 48 |                             warn('Data fields in list group must be 1-D, {} is {}-D. Skipping.'.format(key, val.ndim))
 49 |                             continue
 50 |                         dt_field = val.dtype
 51 |                         if 'label' in val.attrs:
 52 |                             k = val.attrs['label']
 53 |                         else:
 54 |                             k = key
 55 |                         if dt_field.type == np.string_:
 56 |                             try:
 57 |                                 dt[k] = val[:].astype(np.str)
 58 |                             except UnicodeDecodeError as err:
 59 |                                 print(f'Field {key} of type {dt_field} gave decoding trouble:')
 60 |                                 raise err
 61 |                         else:
 62 |                             dt[k] = val[:]              
 63 |                     newlist = pd.DataFrame().from_dict(dt)
 64 | 
 65 |                 newlist['subset'] = subset
 66 |                 newlist['file'] = fn
 67 |                 lists.append(newlist)
 68 |                 
 69 |         except KeyError as kerr:
 70 |             raise KeyError(f'{path} not found in {fn}.')
 71 | 
 72 |         return pd.concat(lists, axis=0, ignore_index=True)
 73 | 
 74 | 
 75 | def get_table(files: Union[list, str], path='/%/shots', parallel=True) -> pd.DataFrame:
 76 | 
 77 |     files = expand_files(files)
 78 | 
 79 |     if parallel:
 80 |         with ProcessPoolExecutor() as p:
 81 |             out = p.map(_get_table_from_single_file, files, repeat(path))
 82 |             # ftrs = []
 83 |             # for fn in files:
 84 |             #     ftrs.append(p.submit(_get_table_from_single_file, fn, path))
 85 |             # TODO make this more robust against errors by changing to submit instead of map and handling single-file errors
 86 |                 
 87 |     else:
 88 |         out = map(_get_table_from_single_file, files, repeat(path))
 89 |         
 90 |     out = pd.concat(out, ignore_index=True, sort=False)
 91 | 
 92 |     return out
 93 | 
 94 | 
 95 | def _store_table_to_single_subset(tbl: pd.DataFrame, fn: str, path: str, subset: str, format: str = 'nexus'):
 96 |     """
 97 |     Helper function. Internal use only.
 98 |     """
 99 | 
100 |     tbl_path = path.replace('%', subset)
101 |     if format == 'table':
102 |         try:
103 |             tbl.to_hdf(fn, tbl_path, format='table', data_columns=True)
104 |         except ValueError:
105 |             tbl.to_hdf(fn, tbl_path, format='table')
106 | 
107 |     elif format == 'nexus':
108 |         with h5py.File(fn, 'a') as fh:
109 |             for key, val in tbl.iteritems():
110 |                 #print(f'Storing {key} ({val.shape}, {val.dtype}) to {fn}: {path}')
111 |                 grp = fh.require_group(tbl_path)
112 |                 grp.attrs['NX_class'] = 'NXcollection'
113 |                 k = key.replace('/', '_').replace('.', ' ')
114 |                 try:
115 |                     if k not in grp:
116 |                         ds = grp.require_dataset(k, shape=val.shape, dtype=val.dtype, maxshape=(None,))
117 |                     else:
118 |                         ds = grp[k]
119 |                         if ds.shape[0] != val.shape[0]:
120 |                             ds.resize(val.shape[0], axis=0)
121 |                             #print('resizing', k)
122 |                     ds[:] = val
123 |                 except (TypeError, OSError) as err:
124 |                     if val.dtype == 'O':                        
125 |                         val2 = val.astype('S')
126 |                         if k in grp:
127 |                             del grp[k]
128 |                         ds = grp.require_dataset(k, shape=val.shape, dtype=val2.dtype, maxshape=(None,))
129 |                         ds[:] = val2
130 |                     else:
131 |                         raise err
132 | 
133 |                 ds.attrs['label'] = key
134 |     else:
135 |         raise ValueError('Storage format must be "table" or "nexus".')
136 | 
137 | 
138 | def store_table(table: pd.DataFrame, path: str, 
139 |                 parallel: bool = True, format: str = 'nexus',
140 |                 file: Optional[str] = None, subset: Optional[str] = None):
141 |     """
142 |     Stores a pandas DataFrame containing 'file' and 'subset' columns to multiple HDF5 files. Essentially a
143 |     multi-file, multi-processed wrapper to pd.to_hdf
144 |     :param table: DataFrame to be stored
145 |     :param path: path in HDF5 files. % will be substituted by the respective subset name
146 |     :param parallel: if True (default), writes files in parallel
147 |     :param format: can be 'nexus' to write columns of table in separate arrays, or 'tables' to use PyTables to write
148 |             a HDF5 table object.
149 |     :return: list of futures (see documentation of concurrent.futures). [None] if parallel=False
150 |     """
151 | 
152 |     # TODO: could be that parallel execution with multiple subsets/table/types will not work
153 | 
154 |     if (file is None) and parallel:
155 | 
156 |         with ProcessPoolExecutor() as exec:
157 |             futures = []
158 |             try:
159 |                 for (fn, ssn), ssdat in table.groupby(['file', 'subset']):
160 |                     futures.append(exec.submit(_store_table_to_single_subset, ssdat, fn, path, ssn, format))
161 |             except Exception as err:
162 |                 print('Error during storing table in', path)
163 |                 print('Table columns are:', ', '.join(table.columns))
164 |                 # print(table)
165 |                 raise err
166 | 
167 |             wait(futures, return_when=FIRST_EXCEPTION)
168 | 
169 |             for f in futures:
170 |                 if f.exception():
171 |                     raise f.exception()
172 | 
173 |             return futures
174 | 
175 |     else:
176 |         #print(path)
177 |         #print(table.columns)
178 | 
179 |         if file is not None:
180 |             _store_table_to_single_subset(table, file, path, subset, format)
181 | 
182 |         else:
183 |             for (fn, ssn), ssdat in table.groupby(['file', 'subset']):
184 |                 _store_table_to_single_subset(ssdat, fn, path, ssn, format)
185 | 
186 |         return [None]
187 | 
188 | def _save_single_chunk(dat: np.ndarray, file: str, subset: str, label: str, 
189 |                        idcs: Union[list, np.ndarray], data_pattern: str, lock):   
190 |     lock.acquire()
191 |     with h5py.File(file, 'a') as fh:
192 |         path = f'{data_pattern}/{label}'.replace('%', subset)
193 |         fh[path][idcs,:,:] = dat
194 |     lock.release()
195 |     return file, subset, path, idcs
196 | 
197 | def _save_single_chunk_multi(chks: dict, file: str, subset: str, 
198 |                        idcs: Union[list, np.ndarray], lock: Lock):   
199 |     lock.acquire()
200 |     # print('Have lock: ', lock)
201 |     with h5py.File(file, 'a') as fh:
202 |         for p, d in chks.items():
203 |             fh[p.replace('%', subset)][idcs,...] = d
204 |     lock.release()
205 |     return file, subset, list(chks.keys()), idcs
206 | 
207 | def meta_to_nxs(filename, meta=None, exclude=('Detector',), meta_grp='/entry/instrument',
208 |                 data_grp='/entry/data', data_field='raw_counts', data_location='/entry/instrument/detector/data'):
209 |     """
210 |     Merges a dict containing metadata information for a serial data acquisition into an existing detector nxs file.
211 |     Additionally, it adds a soft link to the actual data for easier retrieval later (typically into /entry/data)
212 |     :param filename: NeXus file or lists
213 |     :param meta: can be set to {} -> no meta action performed. Or a JSON file name. If None, a JSON file name will be
214 |         derived from nxs_file by replacing .nxs by .json (useful in loops)
215 |     :param exclude: names of meta groups or fields to exclude
216 |     :param meta_grp: location in the NeXus, where the metadata should go to
217 |     :param data_grp: location of softlink to the data stack. No softlink action if None.
218 |     :param data_field: name of the softlink to the data stack
219 |     :param data_location: location of the data stack
220 |     :return:
221 |     """
222 | 
223 |     # TODO: add functions to include flat field and pixel mask
224 | 
225 |     if (not isinstance(filename, str)) or filename.endswith('.lst'):
226 |         fns = expand_files(filename)
227 |         for fn in fns:
228 |             meta_to_nxs(fn, meta=meta, exclude=exclude, meta_grp=meta_grp,
229 |                         data_grp=data_grp, data_field=data_field, data_location=data_location)
230 |         return
231 | 
232 |     with h5py.File(filename, 'r+') as f:
233 | 
234 |         if meta is None:
235 |             meta = filename.rsplit('.', 1)[0] + '.json'
236 | 
237 |         if isinstance(meta, str):
238 |             try:
239 |                 meta = json.load(open(meta))
240 |             except FileNotFoundError:
241 |                 print('No metafile found.')
242 |                 meta = {}
243 | 
244 |         elif isinstance(meta, dict):
245 |             pass
246 | 
247 |         elif isinstance(meta, pd.DataFrame):
248 |             meta = next(iter(meta.to_dict('index').values()))
249 | 
250 |         dict_to_h5(f.require_group(meta_grp), meta, exclude=exclude)
251 | 
252 |         if data_grp is not None:
253 |             dgrp = f.require_group(data_grp)
254 |             dgrp.attrs['NX_class'] = np.string_('NXdata')
255 |             dgrp.attrs['signal'] = np.string_(data_field)
256 | 
257 |             if data_field in dgrp.keys():
258 |                 del dgrp[data_field]
259 |             dgrp[data_field] = h5py.SoftLink(data_location)
260 | 
261 | 
262 | def get_meta_fields(files: Union[str, list], dataset_paths: Union[list, str, tuple, dict], shorten_labels=True):
263 |     """
264 |     Get arbitrary meta data from files.
265 |     :param files:
266 |     :param dataset_paths: list of dataset paths, or dict of structure {dataset: default value}
267 |     :param shorten_labels: only use final section of labels for columns of returned DataFrame
268 |     :return: pandas DataFrame of metadata
269 |     """
270 | 
271 |     if isinstance(dataset_paths, str):
272 |         dataset_paths = [dataset_paths]
273 | 
274 |     if isinstance(dataset_paths, list) or isinstance(dataset_paths, tuple):
275 |         dataset_paths = {f: None for f in dataset_paths}
276 | 
277 |     values = defaultdict(dict)
278 |     dtypes = {}
279 |     fns = expand_files(files)
280 | 
281 |     for fn in fns:
282 |         with h5py.File(fn, mode='r') as fh:
283 |             for field, default in dataset_paths.items():
284 | 
285 |                 identifiers = field.rsplit('%', 1)
286 | 
287 |                 if len(identifiers) == 1:
288 |                     subsets = ['']
289 |                 else:
290 |                     subsets = fh[identifiers[0]].keys()
291 | 
292 |                 for subset in subsets:
293 |                     try:
294 |                         # print(f[field])
295 |                         values[field][(fn, subset)] = fh[field.replace('%', subset)][...]
296 |                         dtypes[field] = fh[field.replace('%', subset)].dtype
297 |                         if dtypes[field] == 'O':
298 |                             dtypes[field] = str
299 |                         # print(field, fh[field.replace('%', subset)].dtype)
300 |                     except KeyError:
301 |                         values[field][(fn, subset)] = default
302 | 
303 |     newcols = {'level_0': 'file', 'level_1': 'subset'}
304 |     if shorten_labels:
305 |         newcols.update({k: k.rsplit('/', 1)[-1] for k in dataset_paths})
306 |     return pd.DataFrame(values).astype(dtypes).reset_index().rename(columns=newcols)
307 | 
308 | 
309 | def copy_h5(fn_from, fn_to, exclude=('%/detector/data', '/%/data/%', '/%/results/%'), mode='w-',
310 |             print_skipped=False, h5_folder=None, h5_suffix='.h5'):
311 |     """
312 |     Copies datasets h5/nxs files or lists of them to new ones, with exclusion of datasets.
313 |     :param fn_from: single h5/nxs file or list file
314 |     :param fn_to: new file name, or new list file. If the latter, specify with h5_folder and h5_suffix how the new names
315 |         are supposed to be constructed
316 |     :param exclude: patterns for data sets to be excluded. All regular expressions are allowed, % is mapped to .*
317 |         (i.e., any string of any length), for compatibility with CrystFEL
318 |     :param mode: mode in which new files are opened. By default w-, i.e., files are created, but never overwritten
319 |     :param print_skipped: print the skipped data sets, for debugging
320 |     :param h5_folder: if operating on a list: folder where new h5 files should go
321 |     :param h5_suffix: if operating on a list: suffix appended to old files (after stripping their extension)
322 |     :return:
323 |     """
324 | 
325 |     # multi-file copy, using recursive call.
326 |     if (isinstance(fn_from, str) and fn_from.endswith('.lst')) or isinstance(fn_from, list):
327 |         warn('Calling copy_h5 on a file list is not recommended anymore', DeprecationWarning)
328 |         old_files = expand_files(fn_from)
329 |         new_files = []
330 | 
331 |         for ofn in old_files:
332 |             # print(ofn)
333 |             # this loop could beautifully be parallelized. For later...
334 |             if h5_folder is None:
335 |                 h5_folder = ofn.rsplit('/', 1)[0]
336 |             if h5_suffix is None:
337 |                 h5_suffix = ofn.rsplit('.', 1)[-1]
338 |             nfn = h5_folder + '/' + ofn.rsplit('.', 1)[0].rsplit('/', 1)[-1] + h5_suffix
339 |             new_files.append(nfn)
340 |             # exclude detector data and shot list
341 |             copy_h5(ofn, nfn, exclude, mode, print_skipped)
342 | 
343 |         with open(fn_to, 'w') as f:
344 |             f.write('\n'.join(new_files))
345 | 
346 |         return
347 | 
348 |     # single-file copy
349 |     try:
350 | 
351 |         # no exclusion... simply copy file
352 |         if len(exclude) == 0:
353 |             from shutil import copyfile
354 |             copyfile(fn_from, fn_to)
355 |             return
356 | 
357 |         exclude_regex = [re.compile(ex.replace('%', '.*')) for ex in exclude]
358 | 
359 |         def copy_exclude(key, ds, to):
360 |             # function to copy a single entry within a HDF hierarchy, and do recursive calls
361 |             # if required. If it finds its key in the exclusion patterns, just skips that entry.
362 | 
363 |             for ek in exclude_regex:
364 |                 if ek.fullmatch(ds.name) is not None:
365 |                     if print_skipped:
366 |                         print(f'Skipping key {key} due to {ek}')
367 |                     return
368 | 
369 |             if isinstance(ds, h5py.Dataset):
370 |                 to.copy(ds, key)
371 | 
372 |             elif isinstance(ds, h5py.Group) and 'table_type' in ds.attrs.keys():
373 |                 # pandas table is a group. Do NOT traverse into it (or experience infinite pain)
374 |                 # print(f'Copying table {key}')
375 |                 to.copy(ds, key)
376 | 
377 |             elif isinstance(ds, h5py.Group):
378 |                 # print(f'Creating group {key}')
379 |                 new_grp = to.require_group(key)
380 | 
381 |                 # attribute copying. Lots of error catching required.
382 |                 try:
383 |                     for k, v in ds.attrs.items():
384 |                         try:
385 |                             new_grp.attrs.create(k, v)
386 |                         except TypeError as err:
387 |                             new_grp.attrs.create(k, np.string_(v))
388 |                 except OSError:
389 |                     # some newer HDF5 attribute types (used by pytables) will crash h5py even just listing them
390 |                     # print(f'Could not copy attributes of group {ds.name}')
391 |                     pass
392 | 
393 |                 for k, v in ds.items():
394 |                     lnk = ds.get(k, getlink=True)
395 |                     if isinstance(lnk, h5py.SoftLink):
396 |                         for ek in exclude_regex:
397 |                             if ek.fullmatch(lnk.path) is not None:
398 |                                 if print_skipped:
399 |                                     print(f'Skipping soft link to {ek}')
400 |                                 break
401 |                         else:
402 |                             new_grp[k] = h5py.SoftLink(lnk.path)
403 |                         continue
404 | 
405 |                     copy_exclude(k, v, new_grp)
406 | 
407 |                 # for k, v in ds.items():
408 |                 #     lnk = ds.get(k, getlink=True)
409 |                 #     if isinstance(lnk, h5py.SoftLink):
410 |                 #         new_grp[k] = h5py.SoftLink(lnk.path)
411 |                 #         continue
412 |                 #     copy_exclude(k, v, new_grp)
413 | 
414 |         with h5py.File(fn_from, mode='r') as f, h5py.File(fn_to, mode=mode) as f2:
415 |             copy_exclude('/', f, f2)
416 | 
417 |     except Exception as err:
418 |         if os.path.exists(fn_to):
419 |             os.remove(fn_to)
420 |         print(f'Error occurred while attempting to copy data from {fn_from} to {fn_to}.')
421 |         raise err
422 | 


--------------------------------------------------------------------------------
/diffractem/pre_proc_opts.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import yaml
  3 | import pprint
  4 | import json
  5 | from typing import Union
  6 | 
  7 | #TODO consider to make this a types.SimpleNamespace or use dataclasses.dataclass
  8 | class PreProcOpts:
  9 |     def __init__(self, fn=None):  
 10 | 
 11 |         self._filename = None
 12 |     
 13 |         self.reference: str = 'Ref12_reference.tif'     #: Name of reference image for flat-field correction in TIF format
 14 |         self.pxmask: str = 'Ref12_pxmask.tif'           #: Name of pixelmask TIF image
 15 |         self.correct_saturation: bool = True            #: Correct for detector saturation using paralyzable model
 16 |         self.remove_background: bool = True             #: Determine and subtract background during image correction
 17 |         self.dead_time: float = 1.9e-3                  #: Dead time (in ms) for paralyzable detector model
 18 |         self.dead_time_gap_factor: float = 2            #: Factor for dead time in gap-pixels 
 19 |         self.shutter_time: float = 2                    #: Shutter time (in ms) for paralyzable detector model
 20 |         self.mask_gaps: bool = True                     #: Always mask detector gaps (regardless of dead pixel mask)
 21 |         self.interpolate_dead: bool = False             #: Interpolate dead pixels instead of masking
 22 |         self.float: bool = False                        #: Process images as floating-point
 23 |         self.int_factor: int = 1                        #: Factor to apply to corrected images if float=False
 24 |         self.find_peaks: bool = True                    #: apply peakfinder during image analysis
 25 |         self.compression: Union[int, str] = 32004       #: standard HDF5 compression. Suggested values: gzip, none, 32004 (lz4)
 26 |         self.cam_length: float = 2                      #: Average camera length (in m).
 27 |         self.y_scale: float = 1                         #: Scaling of camera length along y. DEPRECATED!
 28 |         self.ellipse_ratio: float = 1                   #: ellipticity of camera lentgh along arbitrary axis
 29 |         self.ellipse_angle: float = 0                   #: cam length ellipticity angle. (e.g. 0 is x, pi/2 is y,...)
 30 |         self.pixel_size: float = 55e-6                  #: Pixel size (in m)
 31 |         self.wavelength: float = 0.0251                 #: Radiation wave length (in A)
 32 |         self.com_threshold:float = 0.9                  #: minimum counts in a pixel to be considered for the center-of-mass calculation
 33 |         self.com_xrng: int = 800                        #: x range (px) around geometric pattern center in which to look for center of mass
 34 |         self.com_yrng: int = 800                        #: y range (px) around geometric pattern center in which to look for center of mass
 35 |         self.lorentz_radius: int= 30                    #: radius (px) around center of mass for Lorentz fit of zero order
 36 |         self.lorentz_maxshift: float = 36               #: maximum shift (px) of Lorentz fit center from center of mass
 37 |         self.xsize: int = 1556                          #: x image size (px)
 38 |         self.ysize: int = 516                           #: y image size (px)
 39 |         self.r_adf1: tuple = (50, 100)                  #: inner/outer radii for virtual ADF 1 (px)
 40 |         self.r_adf2: tuple = (100, 200)                 #: inner/outer radii for virtual ADF 2 (px)
 41 |         self.select_query: str = 'frame >= 0'           #: query string for selection of shots from raw data
 42 |         self.agg_query: str = 'frame >= 0 and frame <= 5'    #: query string for aggregation of patterns
 43 |         self.agg_file_suffix: str = '_agg.h5'           #: file suffix for aggregated patterns
 44 |         self.aggregate: bool = True                     #: calculate aggregated patterns (only for real-time analysis)
 45 |         self.scratch_dir: str = '/scratch/diffractem'   #: scratch directory for temporary data
 46 |         self.proc_dir: str = 'proc_data'                #: directory for pre-processed data
 47 |         self.peak_data_path: str = '/%/data'            #: path in HDF5 files to peak data in CXI format
 48 |         self.det_shift_x_path: str = 'det_shift_x_mm'   #: path in HDF5 files to lab frame detector shift (x)
 49 |         self.det_shift_y_path: str = 'det_shift_y_mm'   #: path in HDF5 files to lab frame detector shift (y)
 50 |         self.rechunk: bool = None
 51 |         self.peak_search_params: dict = \
 52 |             {'min-res': 5, 'max-res': 600,
 53 |             'local-bg-radius': 5, 'threshold': 8,
 54 |             'min-pix-count': 3, 'max-pix-count': 10000,
 55 |             'min-snr': 3.5,
 56 |             'peaks': 'peakfinder8'}             #: parameters for peak finding using peakfinder8
 57 |         self.indexing_params: dict = \
 58 |             {'indexing': 'pinkIndexer',
 59 |             'integration': 'rings-nograd-nocen',
 60 |             'int-radius': '3,4,6',
 61 |             'peaks': 'cxi',
 62 |             'max-indexer-threads': 2,
 63 |             'min-peaks': 15,
 64 |             'no-refine': True,
 65 |             'no-retry': True,
 66 |             'no-check-peaks': True,
 67 |             'camera-length-estimate': 1,
 68 |             'pinkIndexer-considered-peaks-count': 4,
 69 |             'pinkIndexer-angle-resolution': 4,
 70 |             'pinkIndexer-refinement-type': 5,
 71 |             'pinkIndexer-tolerance': 0.1,
 72 |             'pinkIndexer-reflection-radius': 0.001,
 73 |             'pinkIndexer-max-resolution-for-indexing': 2,
 74 |             'pinkIndexer-no-check-indexed': False
 75 |              }                                  #: indexamajig parameters for indexing
 76 |         self.integration_params: dict = \
 77 |             {'indexing': 'file',
 78 |             'integration': 'rings-nograd-nocen',
 79 |             'int-radius': '3,4,6',
 80 |             'peaks': 'cxi',
 81 |             'max-indexer-threads': 2,
 82 |             'min-peaks': 15,
 83 |             'no-refine': True,
 84 |             'no-retry': True,
 85 |             'no-check-peaks': True,
 86 |             'overpredict': False
 87 |              }                                  #: indexamajig parameters for integration-only (without indexing)
 88 |         self.peak_search_params.update({'temp-dir': self.scratch_dir})
 89 |         self.indexing_params.update({'temp-dir': self.scratch_dir})
 90 |         self.indexing_params.update({'camera-length-estimate': self.cam_length})
 91 |         self.max_peaks: int = 500               #: maximum number of peaks for peak finding
 92 |         self.im_exc = 'indexamajig'             #: default executable for indexamajig, can point to non-standard path
 93 |         self.friedel_refine = True              #: perform Friedel-pair center refinement in get_pattern_info
 94 |         self.min_peaks = 10                     #: minimum peaks for Friedel refinement (and auto-preproc)
 95 |         self.peak_sigma = 2                     #: peak blurring for Friedel refinement (increase for sloppy initial center)
 96 |         self.friedel_max_radius = None          #: maximum radius in pixels of peaks to be considered for Friedel refinement
 97 | 
 98 |         if fn is not None:
 99 |             self.load(fn)
100 | 
101 |     def __str__(self):
102 |         return pprint.pformat(self.__dict__)
103 | 
104 |     def __repr__(self):
105 |         return pprint.pformat(self.__dict__)
106 | 
107 |     def load(self, fn=None):
108 | 
109 |         fn = self._filename if fn is None else fn
110 |         if fn is None:
111 |             raise ValueError('Please set the option file name first')
112 | 
113 |         if fn.endswith('json'):
114 |             config = json.load(open(fn, 'r'))
115 |         elif fn.endswith('yaml'):
116 |             config = yaml.safe_load(open(fn, 'r'))
117 |         else:
118 |             raise ValueError('File extension must be .yaml or .json.')
119 | 
120 |         for k, v in config.items():
121 |             if k in self.__dict__:
122 |                 setattr(self, k, v)
123 |             else:
124 |                 print('Option', k, 'in', fn, 'unknown.')
125 | 
126 |         self._filename = fn
127 | 
128 |     def save(self, fn: str):
129 |         if fn.endswith('json'):
130 |             json.dump(self.__dict__, open(fn, 'w'), skipkeys=True, indent=4)
131 |         elif fn.endswith('yaml'):
132 |             yaml.dump(self.__dict__, open(fn, 'w'), sort_keys=False)


--------------------------------------------------------------------------------
/diffractem/proc_peaks.py:
--------------------------------------------------------------------------------
  1 | # Friedel-pair refinement
  2 | from scipy.optimize import least_squares
  3 | import numpy as np
  4 | import pandas as pd
  5 | from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, wait, ALL_COMPLETED
  6 | from multiprocessing import current_process
  7 | from typing import Optional
  8 | from .pre_proc_opts import PreProcOpts
  9 | from . import tools, proc2d
 10 | from warnings import warn
 11 | 
 12 | 
 13 | def _ctr_from_pks(pkl: np.ndarray, p0: np.ndarray,
 14 |                   int_weight: bool = False, sigma: float = 2.0, bound: float = 5.0, label: str = None):
 15 |     """Gets the refined peak center position from a list of peaks containing Friedel mates
 16 |     
 17 |     Arguments:
 18 |         pkl {np.ndarray} -- [List of peaks, with x and y values in 0th and 1st column, optionally intensity on 2nd]
 19 |         p0 {np.ndarray} -- [Initial position]
 20 |     
 21 |     Keyword Arguments:
 22 |         int_weight {bool} -- [weight peaks by their intensity] (default: {False})
 23 |         sigma {float} -- [assumed peak rms radius for matching] (default: {2.0})
 24 |         bound {float} -- [maximum shift] (default: {5.0})
 25 |         label {str} -- [label to be returned in output] (default: {None})
 26 |     
 27 |     Returns:
 28 |         [tuple] -- [refined position, inverse cost function, label]
 29 |     """
 30 |     if int_weight:
 31 |         corr = lambda p: np.sum(np.matmul(pkl[:, 2:3], pkl[:, 2:3].T)
 32 |                                 * np.exp(-((pkl[:, 0:1] + pkl[:, 0:1].T - 2 * p[0]) ** 2
 33 |                                            + (pkl[:, 1:2] + pkl[:, 1:2].T - 2 * p[1]) ** 2) / (2 * sigma ** 2))) \
 34 |                          / np.sum(np.matmul(pkl[:, 2:3], pkl[:, 2:3].T))
 35 |     else:
 36 |         corr = lambda p: np.sum(np.exp(-((pkl[:, 0:1] + pkl[:, 0:1].T - 2 * p[0]) ** 2
 37 |                                          + (pkl[:, 1:2] + pkl[:, 1:2].T - 2 * p[1]) ** 2) / (2 * sigma ** 2))) \
 38 |                          / (2*pkl.shape[0])
 39 | 
 40 |     fun = lambda p: 1 / max(corr(p), 1e-10)  # prevent infs
 41 |     if np.isnan(fun(p0)):
 42 |         return p0, np.nan, label
 43 |     else:
 44 |         lsq = least_squares(fun, p0, bounds=(p0 - bound, p0 + bound))
 45 |         return lsq.x - 0.5, 1 / lsq.cost, label # -0.5 changes from CrystFEL-like to pixel-center convention
 46 | 
 47 | 
 48 | def center_friedel(peaks: pd.DataFrame, shots: Optional[pd.DataFrame] = None, 
 49 |                     p0=(778, 308), colnames=('fs/px', 'ss/px'), sigma=2,
 50 |                    minpeaks=4, maxres: Optional[float] = None):
 51 |     """[Center refinement of diffraction patterns from a list of peaks, assuming the presence
 52 |         of a significant number of Friedel mates.]
 53 |     
 54 |     Arguments:
 55 |         peaks {[pd.DataFrame]} -- [peaks list for entire data set, as returned by StreamParser. CrystFEL convention!]
 56 |     
 57 |     Keyword Arguments:
 58 |         shots {[pd.DataFrame]} -- [shot list of data set, optional] (default: {None})
 59 |         p0 {tuple} -- [starting position for center search] (default: {(778, 308)})
 60 |         colnames {tuple} -- [column names for x and y coordinate] (default: {('fs/px', 'ss/px')})
 61 |         sigma {int} -- [peak rms radius (determines 'sharpness' of matching)] (default: {2})
 62 |         minpeaks {int} -- [minimum peak number to try matching] (default: {4})
 63 |         maxres {int} -- [maximum radius of peaks to still be considered] (default: {None})
 64 |     """
 65 |     colnames = list(colnames)
 66 |     p0 = np.array(p0)
 67 | 
 68 |     if current_process().daemon:
 69 |         print('Danger, its a Daemon.')
 70 | 
 71 |     with ProcessPoolExecutor() as p:
 72 |         futures = []
 73 |         for grp, pks in peaks.groupby(['file', 'Event']):
 74 |             pkl = pks.loc[:, colnames].values
 75 |             rsq = (pkl[:, 0] - p0[0]) ** 2 + (pkl[:, 1] - p0[1]) ** 2
 76 |             if maxres is not None:
 77 |                 pkl = pkl[rsq < maxres ** 2, :]
 78 |             if (minpeaks is None) or pkl.shape[0] > minpeaks:
 79 |                 futures.append(p.submit(_ctr_from_pks, pkl, p0, sigma=sigma, label=grp))
 80 | 
 81 |     wait(futures, return_when=ALL_COMPLETED)
 82 |     if len(futures) == 0:
 83 |         cpos = shots[['file', 'Event']].copy()
 84 |         cpos['beam_x'] = p0[0]
 85 |         cpos['beam_y'] = p0[1]
 86 |         cpos['friedel_cost'] = np.nan
 87 | 
 88 |         return cpos
 89 | 
 90 |     # reformat result into a dataframe
 91 |     cpos = pd.concat([pd.DataFrame(data=np.array([t.result()[2] for t in futures if t.exception() is None]),
 92 |                                    columns=['file', 'Event']),
 93 |                       pd.DataFrame(data=np.array([t.result()[0] for t in futures if t.exception() is None]),
 94 |                                    columns=['beam_x', 'beam_y']),
 95 |                       pd.DataFrame(data=np.array([t.result()[1] for t in futures if t.exception() is None]),
 96 |                                    columns=['friedel_cost'])],
 97 |                      axis=1)
 98 | 
 99 |     if shots is not None:
100 |         # include shots that were not present in the peaks table
101 |         cpos = shots[['file', 'Event']].merge(cpos, on=['file', 'Event'], how='left'). \
102 |             fillna({'beam_x': p0[0], 'beam_y': p0[1]})
103 | 
104 |     return cpos
105 | 
106 | 
107 | def get_acf(npk, x, y, I=None, roi_length=512, output_radius=256, 
108 |             oversample=4, radial=True, px_ang=None, execution='processes'):
109 |     """Gets the autocorrelation/pair correlation function of Bragg peak positions, 
110 |     optionally with intensity weighting.
111 |     
112 |     It is important to set the computation region properly (i.e., the
113 |     maximum peak positions from the center to take into account), as this affects
114 |     computation speed and impact of non-paraxiality at larger angles. It can
115 |     be defined using the `roi_length` argument.
116 |     
117 |     Peaks must be given in CXI format.
118 |     
119 |     Args:
120 |         npk (np.ndarray, int): number of peaks
121 |         x (np.ndarray): x-coordinates of peaks, *relative to pattern center*
122 |         y (np.ndarray): y-coordinates of peaks, *relative to pattern center*
123 |         I ([type], optional): peak intensities. Set to 1 if None. Defaults to None.
124 |         roi_length (int, optional): edge length of the region around the image
125 |             center that is used for the computation. Defaults to 512.
126 |         output_radius (int, optional): maximum included radius of the output ACF. 
127 |             The size of the 2D output will be 2*output_radius*oversample, 
128 |             the size of the radial average will be output_radius*oversample. Defaults to 600.
129 |         oversample (int, optional): oversampling, that is, by how much smaller the bin
130 |             sizes of the output are than that of the input (usually the pixels). Defaults to 4.
131 |         radial (bool, optional): compute the radial average of the ACF. Defaults to True.
132 |         px_ang (double, optional): diffraction angle corresponding to a distance of 1 pixel
133 |             from the center, given in rad (practically: detector pixel size/cam length). If
134 |             given, non-paraxiality of the geometry is corrected (not tested well yet).
135 |             Defaults to None.
136 |         execution (str, optional): way of parallelism if a stack of pattern peak data
137 |             is supplied. Can be 'single-threaded', 'threads', 'processes'.
138 | 
139 |     Returns:
140 |         np.ndarray: 2D autocorrelation function. 
141 |             Length will be 2 * oversample * output_range
142 |         np.ndarray: 1D radial sum (None for radial=False). 
143 |             Length will be oversample * output_ramge
144 |     """
145 |     
146 |     from numpy import fft
147 |     from itertools import repeat
148 |     
149 |     # if a stack of pattern data is supplied, call recursively on single shots
150 |     if isinstance(npk, np.ndarray) and len(npk) > 1:
151 |         _all_args = zip(npk, x, y, repeat(None) if I is None else I)
152 |         _kwargs = {'roi_length': roi_length, 
153 |                    'output_radius': output_radius,
154 |                    'oversample': oversample,
155 |                    'radial': radial,
156 |                    'px_ang': px_ang}
157 |         if execution == 'single-threaded':
158 |             res = [get_acf(*_args, **_kwargs) for _args in _all_args]
159 |         else:
160 |             with (ProcessPoolExecutor() if execution=='processes' 
161 |               else ThreadPoolExecutor()) as exc:
162 |                 ftrs = [exc.submit(get_acf, *_args, **_kwargs) for _args in _all_args]
163 |                 wait(ftrs, return_when='FIRST_EXCEPTION');
164 |                 # for ftr in ftrs:
165 |                 #     if ftr.exception() is not None:
166 |                 #         raise ftr.exception()
167 |                 res = [f.result() for f in ftrs]
168 |         return (np.stack(stk) for stk in zip(*res))  
169 |       
170 |     sz = roi_length * oversample
171 |     rng = output_radius * oversample
172 |     if rng > sz//2-1:
173 |         raise ValueError(f'Maximum output range is {roi_length//2-1}.')
174 |         
175 |     if px_ang is not None:
176 |         t_par = (x[:npk]**2 + y[:npk]**2)**.5 * px_ang
177 |         acorr = 2*np.sin(np.arctan(t_par)/2) / t_par
178 |     else:
179 |         acorr = 1
180 |             
181 |     pkx = (oversample * acorr * x[:npk]).round().astype(int) + sz//2
182 |     pky = (oversample * acorr * y[:npk]).round().astype(int) + sz//2
183 |     pkI = None if I is None else I[:npk]
184 | 
185 |     valid = (pkx >= 0) & (pkx < sz) & (pky >= 0) & (pky < sz)
186 |     pkx, pky, pkI = pkx[valid], pky[valid], 1 if I is None else pkI[valid]
187 |     dense = np.zeros((sz, sz), dtype=np.float if I is None else np.uint8)
188 |     dense[pky, pkx] = pkI if I is not None else 1
189 |     # print(f'{dense.shape}, {rng}, {sz}')
190 |     acf = fft.ifft2(np.abs(fft.fft2(dense))**2)
191 |     acf = fft.ifftshift(acf).real
192 |     if I is None:
193 |         # if no intensities were given, the result is (should be) 
194 |         # integer, up to numerical noise
195 |         acf = acf.round().astype(np.uint8)
196 |         if acf[sz//2, sz//2] != sum(valid):
197 |             warn(f'Autocorrelation center pixel ({acf[sz//2, sz//2]}) does not equal the peak number ({sum(valid)})!')
198 |     acf[sz//2, sz//2] = 0 # remove self-correlation (which will be equal to the peak number)
199 |     if radial:
200 |         rad = proc2d.radial_proj(acf, min_size=rng, max_size=rng, 
201 |                              my_func=np.sum, x0=sz//2, y0=sz//2)
202 |     else:
203 |         rad = None
204 | 
205 |     return acf[sz//2-rng:sz//2+rng, sz//2-rng:sz//2+rng], rad
206 | 
207 | 
208 | def get_pk_data(n_pk: np.ndarray, pk_x: np.ndarray, pk_y: np.ndarray, 
209 |                 ctr_x: np.ndarray, ctr_y: np.ndarray, pk_I: Optional[np.ndarray] = None,
210 |                 opts: Optional[PreProcOpts] = None,
211 |                 peakmask=None, return_vec=True, pxs=None, 
212 |                 clen=None, wl=None, el_rat=None, el_ang=None):
213 |     
214 |     if peakmask is None:
215 |         peakmask = np.ones_like(pk_x, dtype=np.float)
216 |         for N, row in zip(n_pk, peakmask):
217 |             row[N:] = np.nan
218 |        
219 |     if opts is not None:
220 |         pxs = opts.pixel_size if pxs is None else pxs
221 |         clen = opts.cam_length if clen is None else clen
222 |         wl = opts.wavelength if wl is None else wl
223 |         el_rat = opts.ellipse_ratio if el_rat is None else el_rat
224 |         el_ang = opts.ellipse_angle if el_ang is None else el_ang
225 |         
226 |     #     assert (np.nansum(peakmask, axis=1) == n_pk).all()      
227 |     pk_xr, pk_yr = pk_x - ctr_x.reshape(-1,1), pk_y - ctr_y.reshape(-1,1)
228 |     pk_xr, pk_yr = pk_xr * peakmask, pk_yr * peakmask
229 |     
230 |     # ellipticity correction
231 |     if el_rat is not None and (el_rat != 1):
232 |         c, s = np.cos(np.pi/180*el_ang), np.sin(np.pi/180*el_ang)
233 |         pk_xrc, pk_yrc = 1/el_rat**.5*(c*pk_xr - s*pk_yr), el_rat**.5*(s*pk_xr + c*pk_yr)
234 |         pk_xrc, pk_yrc = c*pk_xrc + s*pk_yrc, - s*pk_xrc + c*pk_yrc
235 |     else:
236 |         pk_xrc, pk_yrc = pk_xr, pk_yr
237 |     
238 |     res = {'peakXPosRaw': pk_x,   'peakYPosRaw': pk_y, 
239 |            'peakXPosRel': pk_xr,  'peakYPosRel': pk_yr,
240 |            'peakXPosCor': pk_xrc, 'peakYPosCor': pk_yrc,
241 |            'nPeaks': n_pk}
242 |     
243 |     if pk_I is not None:
244 |         res['peakTotalIntensity'] = pk_I
245 | 
246 |     if return_vec:
247 |         if (pxs is None) or (clen is None) or (wl is None):
248 |             raise ValueError('Cannot return angle parameters without pxs, clen, wl.')   
249 |         pk_r = (pk_xrc**2 + pk_yrc**2)**.5        
250 |         pk_tt = np.arctan(pxs * pk_r / clen)
251 |         pk_az = np.arctan2(pk_yrc, pk_xrc)
252 |         pk_d = wl/(2*np.sin(pk_tt/2))
253 |         res.update({'peakTwoTheta': pk_tt, 'peakAzimuth': pk_az, 'peakD': pk_d})
254 |     
255 |     return res
256 |     
257 | class Cell(object):
258 |     """
259 |     Partially taken from the PyFAI package, with some simplifications 
260 |     and speed enhancements for d-spacing calculation, as well as a 
261 |     new refinement function.
262 |     
263 |     Calculates d-spacings and cell volume as described in:
264 |     http://geoweb3.princeton.edu/research/MineralPhy/xtalgeometry.pdf
265 |     """
266 |     lattices = ["cubic", "tetragonal", "hexagonal", "rhombohedral", 
267 |                 "orthorhombic", "monoclinic", "triclinic"]
268 |     ctr_types = {"P": "Primitive",
269 |              "I": "Body centered",
270 |              "F": "Face centered",
271 |              "C": "Side centered",
272 |              "R": "Rhombohedral"}
273 | 
274 |     def __init__(self, a=1, b=1, c=1, alpha=90, beta=90, gamma=90, 
275 |                  lattice_type="triclinic", centering="P", 
276 |                  unique_axis="c", d_min=2):
277 |         """Constructor of the Cell class:
278 | 
279 |         Crystallographic units are Angstrom for distances and degrees for angles
280 | 
281 |         :param a,b,c: unit cell length in Angstrom
282 |         :param alpha, beta, gamma: unit cell angle in degrees
283 |         :param lattice: "cubic", "tetragonal", "hexagonal", "rhombohedral", "orthorhombic", "monoclinic", "triclinic"
284 |         :param lattice_type: P, I, F, C or R
285 |         """
286 |         self.a = a
287 |         self.b = b
288 |         self.c = c
289 |         self.alpha = alpha
290 |         self.beta = beta
291 |         self.gamma = gamma
292 |         self.lattice_type = lattice_type if lattice_type in self.lattices else "triclinic"
293 |         self.unique_axis = unique_axis
294 |         self._volume = None
295 |         self.selection_rules = []
296 |         "contains a list of functions returning True(allowed)/False(forbidden)/None(unknown)"
297 |         self.centering = centering
298 |         self.hkl = None
299 |         self._d_min = d_min
300 |         self.init_hkl(d_min)
301 |  
302 |     def __repr__(self, *args, **kwargs):
303 |         return "%s %s cell (unique %s) a=%.4f b=%.4f c=%.4f alpha=%.3f beta=%.3f gamma=%.3f" % \
304 |             (self.ctr_types[self.centering], self.lattice_type, self.unique_axis,
305 |              self.a, self.b, self.c, self.alpha, self.beta, self.gamma)
306 | 
307 |     @classmethod
308 |     def cubic(cls, a, centering="P"):
309 |         """Factory for cubic lattice_types
310 | 
311 |         :param a: unit cell length
312 |         """
313 |         a = float(a)
314 |         self = cls(a, a, a, 90, 90, 90,
315 |                    lattice_type="cubic", centering=centering)
316 |         return self
317 | 
318 |     @classmethod
319 |     def tetragonal(cls, a, c, centering="P"):
320 |         """Factory for tetragonal lattice_types
321 | 
322 |         :param a: unit cell length
323 |         :param c: unit cell length
324 |         """
325 |         a = float(a)
326 |         self = cls(a, a, float(c), 90, 90, 90,
327 |                    lattice_type="tetragonal", centering=centering)
328 |         return self
329 | 
330 |     @classmethod
331 |     def orthorhombic(cls, a, b, c, centering="P"):
332 |         """Factory for orthorhombic lattice_types
333 | 
334 |         :param a: unit cell length
335 |         :param b: unit cell length
336 |         :param c: unit cell length
337 |         """
338 |         self = cls(float(a), float(b), float(c), 90, 90, 90,
339 |                    lattice_type="orthorhombic", centering=centering)
340 |         return self
341 | 
342 |     @classmethod
343 |     def hexagonal(cls, a, c, centering="P"):
344 |         """Factory for hexagonal lattice_types
345 | 
346 |         :param a: unit cell length
347 |         :param c: unit cell length
348 |         """
349 |         a = float(a)
350 |         self = cls(a, a, float(c), 90, 90, 120,
351 |                    lattice_type="hexagonal", centering=centering)
352 |         return self
353 | 
354 |     @classmethod
355 |     def monoclinic(cls, a, b, c, beta, centering="P"):
356 |         """Factory for hexagonal lattice_types
357 | 
358 |         :param a: unit cell length
359 |         :param b: unit cell length
360 |         :param c: unit cell length
361 |         :param beta: unit cell angle
362 |         """
363 |         self = cls(float(a), float(b), float(c), 90, float(beta), 90,
364 |                    centering=centering, lattice_type="monoclinic", 
365 |                    unique_axis='b')
366 |         return self
367 | 
368 |     @classmethod
369 |     def rhombohedral(cls, a, alpha, centering="P"):
370 |         """Factory for hexagonal lattice_types
371 | 
372 |         :param a: unit cell length
373 |         :param alpha: unit cell angle
374 |         """
375 |         a = float(a)
376 |         alpha = float(a)
377 |         self = cls(a, a, a, alpha, alpha, alpha,
378 |                    lattice_type="rhombohedral", centering=centering)
379 |         return self
380 | 
381 |     @classmethod
382 |     def diamond(cls, a):
383 |         """Factory for Diamond type FCC like Si and Ge
384 | 
385 |         :param a: unit cell length
386 |         """
387 |         self = cls.cubic(a, centering="F")
388 |         self.selection_rules.append(lambda h, k, l: not((h % 2 == 0) and (k % 2 == 0) and (l % 2 == 0) and ((h + k + l) % 4 != 0)))
389 |         return self
390 |     
391 |     @classmethod
392 |     def triclinic(cls, a, b, c, alpha, beta, gamma, centering="P"):
393 |         a, b, c, alpha, beta, gamma = (float(p) for p in [a, b, c, alpha, beta, gamma])
394 |         self = cls(a, b, c, alpha, beta, gamma, lattice_type='triclinic', centering='P')
395 |         return self
396 |        
397 |     @property
398 |     def volume(self):
399 |         if self._volume is None:
400 |             self._volume = self.a * self.b * self.c
401 |             if self.lattice_type not in ["cubic", "tetragonal", "orthorhombic"]:
402 |                 cosa = np.cos(self.alpha * np.pi / 180.)
403 |                 cosb = np.cos(self.beta * np.pi / 180.)
404 |                 cosg = np.cos(self.gamma * np.pi / 180.)
405 |                 self._volume *= np.sqrt(1 - cosa ** 2 - cosb ** 2 - cosg ** 2 
406 |                                         + 2 * cosa * cosb * cosg)
407 |         return self._volume
408 | 
409 |     @property
410 |     def centering(self):
411 |         return self._centering
412 | 
413 |     @centering.setter
414 |     def centering(self, centering):
415 |         self._centering = centering if centering in self.ctr_types else "P"
416 |         self.selection_rules = [lambda h, k, l: ~((h == 0) & (k == 0) & (l == 0))]
417 |         if self._centering == "I":
418 |             self.selection_rules.append(lambda h, k, l: (h + k + l) % 2 == 0)
419 |         if self._centering == "F":
420 |             self.selection_rules.append(lambda h, k, l: np.isin(h % 2 + k % 2 + l % 2, (0, 3)))
421 |         if self._centering == "R":
422 |             self.selection_rules.append(lambda h, k, l: ((h - k + l) % 3 == 0))
423 |         if self._centering == "C":
424 |             self.selection_rules.append(lambda h, k, l: ((h + k) % 2 == 0))
425 |         
426 |     def init_hkl(self, d_min: float = 5.):
427 |         """Sets up a grid with valid Miller indices for this lattice.
428 |         Useful to pre-compute the indices before running any optimization,
429 |         which speeds up the computation.
430 | 
431 |         Args:
432 |             d_min (float, optional): Minimum d-spacing, in A. Defaults to 5.
433 |         """
434 |         hmax = int(np.ceil(self.a / d_min))
435 |         kmax = int(np.ceil(self.b / d_min))
436 |         lmax = int(np.ceil(self.c / d_min))
437 |         hkl = np.mgrid[-hmax:hmax+1, -kmax:kmax+1, -lmax:lmax+1]
438 |         valid = np.stack([r(*hkl) for r in self.selection_rules], axis=0).all(axis=0)
439 |         self.hkl = tuple(H[valid].ravel() for H in hkl)
440 |         d = self.d(d_min=None)
441 |         self.hkl = tuple(H[d >= d_min] for H in self.hkl)
442 |         self._d_min = d_min
443 |         
444 |     def d(self, d_min=None, unique=False, a=None, b=None, c=None, 
445 |           alpha=None, beta=None, gamma=None):
446 |         """Calculates d-spacings for the cell. Cell parameters can
447 |         transiently be changed, which does *not* affect the values
448 |         stored with the cell object. This is useful in the context
449 |         of optimization.
450 | 
451 |         Args:
452 |             d_min (float, optional): Minimum d-spacing. If None, uses
453 |                 the stored value of the object which can be set using 
454 |                 init_hkl. Leaving it at None significantly speeds
455 |                 up the computation, which is recommended for
456 |                 refinements. Defaults to None.
457 |             unique (bool, optional): if True, only unique d-spacings
458 |                 are returned. Otherwise, all spacings are returned which
459 |                 are ordered the same way as in the object's hkl attribute. 
460 |                 Defaults to False.
461 |             a (float, optional): Temporary cell length. Defaults to None.
462 |             b (float, optional): Temporary cell length. Defaults to None.
463 |             c (float, optional): Temporary cell length. Defaults to None.
464 |             alpha (float, optional): Temporary cell angle. Defaults to None.
465 |             beta  (float, optional): Temporary cell angle. Defaults to None.
466 |             gamma (float, optional): Temporary cell angle. Defaults to None.
467 | 
468 |         Returns:
469 |             np.array: Array of d-spacings
470 |         """
471 | 
472 |         
473 |         a = self.a if a is None else a
474 |         b = self.b if b is None else b
475 |         c = self.c if c is None else c
476 |         alpha = self.alpha if alpha is None else alpha
477 |         beta = self.beta if beta is None else beta
478 |         gamma = self.gamma if gamma is None else gamma
479 |         
480 |         if (d_min is not None) and (d_min != self._d_min):
481 |             self.init_hkl(d_min)
482 |         
483 |         h, k, l = self.hkl
484 |         
485 |         if self.lattice_type in ["cubic", "tetragonal", "orthorhombic"]:
486 |             invd2 = (h / a) ** 2 + (k / b) ** 2 + (l / c) ** 2
487 |         else:
488 |             cosa, sina = np.cos(alpha * np.pi / 180), np.sin(alpha * np.pi / 180)
489 |             cosb, sinb = np.cos(beta * np.pi / 180), np.sin(beta * np.pi / 180)
490 |             cosg, sing = np.cos(gamma * np.pi / 180), np.sin(gamma * np.pi / 180)
491 |             S11 = (b * c * sina) ** 2
492 |             S22 = (a * c * sinb) ** 2
493 |             S33 = (a * b * sing) ** 2
494 |             S12 = a * b * c * c * (cosa * cosb - cosg)
495 |             S23 = a * a * b * c * (cosb * cosg - cosa)
496 |             S13 = a * b * b * c * (cosg * cosa - cosb)
497 | 
498 |             invd2 = (S11 * h * h +
499 |                      S22 * k * k +
500 |                      S33 * l * l +
501 |                      2 * S12 * h * k +
502 |                      2 * S23 * k * l +
503 |                      2 * S13 * h * l)
504 |             invd2 /= (self.volume) ** 2
505 |             
506 |         return np.sqrt(1 / (np.unique(invd2) if unique else invd2)) 
507 |     
508 |     d_spacing = d  
509 |     
510 |     def export(self, filename='refined.cell'):
511 |         from textwrap import dedent
512 |         """Exports the cell to a CrystFEL cell file.
513 | 
514 |         Args:
515 |             filename (str, optional): Cell file name. Defaults to 'refined.cell'.
516 |         """
517 |         
518 |         cellfile = dedent(f'''
519 |         CrystFEL unit cell file version 1.0
520 |         
521 |         lattice_type = {self.lattice_type}
522 |         centering = {self.centering}
523 |         unique_axis = {self.unique_axis}
524 |         
525 |         a = {self.a:.3f} A
526 |         b = {self.b:.3f} A
527 |         c = {self.c:.3f} A
528 |         
529 |         al = {self.alpha:.2f} deg
530 |         be = {self.beta:.2f} deg
531 |         ga = {self.gamma:.2f} deg
532 |         ''').strip()
533 |         
534 |         with open(filename, 'w') as fh:
535 |             fh.write(cellfile)
536 |             
537 |     def refine_powder(self, svec, pattern, method='distance',
538 |                   fill=0.1, min_prom=0., min_height=0., 
539 |                   weights='prom', length_bound=2., angle_bound=3.,
540 |                   **kwargs):
541 |         """Refine unit cell parameters against a powder pattern.
542 |         The refinement is done using a least-squares fit, where you can
543 |         pick three different cost functions:
544 |         
545 |         * 'distance': the positions of the peaks in the powder pattern
546 |             are detected. For each peak, the distance to the closest
547 |             d-spacing is computed.
548 |         * 'xcorr': the inverse values of the powder pattern at the
549 |             d-spacings are computed.
550 |         * 'derivative': the derivative of the powder pattern at the
551 |             d-spacings are computed
552 |             
553 |         Depending on the chosen method, further parameters can be set.
554 |         The function returns a new Cell object with refined parameters, and
555 |         a structure with some useful information.
556 | 
557 |         Args:
558 |             svec (np.ndarray): scattering vector (x-axis) of the powder pattern,
559 |                 expressed in inverse nanometer (not angstrom - following
560 |                 CrystFEL convention).
561 |             pattern (np.ndarray): powder pattern at values svec (y-axis)
562 |             method (str, optional): Cost function. See description. 
563 |                 Defaults to 'distance'.
564 |             fill (float, optional): Fill value for out-of-range or zero-count
565 |                 s-vectors if method is 'derivative' or 'xcorr'. Defaults to 0.1.
566 |             min_prom (float, optional): Minimum prominence of peaks (that is,
567 |                 height relative to its vicinity) if method is 'distance'. Increase
568 |                 if too many small peaks are spuriously detected. Usually it is
569 |                 a good idea. Defaults to 0.
570 |             min_height (float, optional): Minimum peak height to be detected. 
571 |                 Usually min_prom is the better parameter. Defaults to 0.
572 |             weights (str, optional): Weights of the peaks for the least-squares
573 |                 optimization if method is 'derivative'. Can be 'prom' or 'height'.
574 |                 Defaults to 'prom'.
575 |             length_bound (float, optional): Bound range for cell lengths, in A. 
576 |                 Defaults to 2.
577 |             angle_bound (float, optional): Bound range for cell angles. Defaults to 3.
578 |             **kwargs: Further arguments will be passed on to scipy.least_sqaures
579 | 
580 |         Returns:
581 |             tuple: 2-Tuple of a new Cell object with the refined parameters, and
582 |                 a structure with useful information from the optimization, including
583 |                 the peak positions and heights if method was 'distance'.
584 |         """
585 |         
586 |         from scipy.interpolate import interp1d
587 |         from scipy.optimize import least_squares
588 |         
589 |         # find out which parameters should be optimized
590 |         if self.lattice_type == 'triclinic':
591 |             parameters = ['a', 'b', 'c', 'alpha', 'beta', 'gamma']
592 |         elif self.lattice_type == 'monoclinic':
593 |             parameters = ['a', 'b', 'c', 'beta']
594 |         elif self.lattice_type == 'orthorhombic':
595 |             parameters = ['a', 'b', 'c']
596 |         elif self.lattice_type == 'tetragonal':
597 |             parameters = ['a', 'c']
598 |         elif self.lattice_type == 'cubic':
599 |             parameters = ['a']
600 |         elif self.lattice_type == 'hexagonal':
601 |             parameters = ['a', 'c']
602 |         elif self.lattice_type == 'rhombohedral':
603 |             parameters = ['a', 'alpha']
604 |         else:
605 |             raise Exception(f'This should not happen (lattice type is set to {self.lattice_type}). Yell at Robert.')
606 |         
607 |         _, unique_pos = np.unique(self.d(), return_index=True) # unique d-spacings
608 |         p0 = [getattr(self, cpar) for cpar in parameters]
609 |         dsp = lambda p: self.d(**{cpar: p[ii] for ii, cpar in enumerate(parameters)})[unique_pos]
610 |         bounds = ([getattr(self, cpar) - (length_bound if cpar in 'abc' else angle_bound) 
611 |                   for cpar in parameters],
612 |                   [getattr(self, cpar) + (length_bound if cpar in 'abc' else angle_bound) 
613 |                   for cpar in parameters])
614 | 
615 |         if method == 'xcorr':
616 |             cost_profile = interp1d(svec, 1/np.where(pattern != 0, pattern, fill), 
617 |                                     bounds_error=False, fill_value=fill)
618 |             cost = lambda p: cost_profile(10/dsp(p))
619 |             pk_pos = pk_height = pk_prom = []
620 | 
621 |         elif method == 'derivative':
622 |             cost_profile = interp1d(svec[1:]/2+svec[:-1]/2, np.diff(pattern),
623 |                                     bounds_error=False, fill_value=0)
624 |             cost = lambda p: cost_profile(10/dsp(p))
625 |             pk_pos = pk_height = pk_prom = []
626 | 
627 |         elif method == 'distance':
628 |             from scipy.signal import find_peaks        
629 |             lim = 0.95 * 10/dsp(p0).min()
630 |             pkdat = find_peaks(pattern[svec < lim], height=min_height, prominence=min_prom)
631 |             pk_pos = svec[pkdat[0]]
632 |             pk_height = pkdat[1]['peak_heights']
633 |             pk_prom = pkdat[1]['prominences']
634 |             w = pk_prom if weights == 'prom' else 1
635 |             w = pk_height if weights == 'height' else 1
636 |             cost = lambda p: 100*w * (np.abs(10/dsp(p).reshape(1,-1) 
637 |                                              - pk_pos.reshape(-1,1))).min(axis=1)
638 |             
639 |         else:
640 |             raise ValueError(f'Unknown refinement method {method}')
641 | 
642 |         cost_init = 0.5 * (cost(p0)**2).sum()
643 |         lsq = least_squares(cost, p0, bounds=bounds, **kwargs)
644 |         
645 |         # return a new cell with the optimized parameters
646 |         C_ref = getattr(self, self.lattice_type)(centering=self._centering,
647 |             **{cpar: lsq.x[ii] for ii, cpar in enumerate(parameters)})
648 |         C_ref.selection_rulse = self.selection_rules
649 |         C_ref.init_hkl(self._d_min)
650 |         
651 |         info = {'lsq_result': lsq,
652 |                 'initial_cost': cost_init}
653 |         if method == 'distance':
654 |             info.update({'peak_position': pk_pos,
655 |                          'peak_height': pk_height,
656 |                          'peak_prominence': pk_prom})
657 |             
658 |         if not lsq.success:
659 |             warn('Powder refinement did not converge!')
660 |         
661 |         return C_ref, info
662 | 


--------------------------------------------------------------------------------
/diffractem/quick_proc.py:
--------------------------------------------------------------------------------
  1 | import hdf5plugin # required to access LZ4-encoded HDF5 data sets
  2 | from diffractem import version, proc2d, pre_proc_opts, nexus, io
  3 | from diffractem.dataset import Dataset
  4 | from tifffile import imread
  5 | import numpy as np
  6 | import os
  7 | import dask.array as da
  8 | import h5py
  9 | from dask.distributed import Client, LocalCluster, Lock
 10 | import dask
 11 | import argparse
 12 | import subprocess
 13 | import pandas as pd
 14 | import random
 15 | from warnings import warn
 16 | from time import sleep
 17 | 
 18 | def _fast_correct(*args, data_key='/%/data/corrected', 
 19 |                   shots_grp='/%/shots', 
 20 |                   peaks_grp='/%/data', **kwargs):
 21 |     
 22 |     imgs, info = proc2d.analyze_and_correct(*args, **kwargs)
 23 |     store_dat = {shots_grp + '/' + k: v for k, v in info.items() if k != 'peak_data'}
 24 |     store_dat.update({peaks_grp + '/' + k: v for k, v in info['peak_data'].items()})
 25 |     store_dat[data_key] = imgs
 26 |     
 27 |     return store_dat
 28 | 
 29 | def quick_proc(ds, opts, label_raw, label, client, reference=None, pxmask=None):
 30 |     
 31 |     reference = imread(opts.reference) if reference is None else reference
 32 |     pxmask = imread(opts.pxmask) if pxmask is None else pxmask
 33 | 
 34 |     stack = ds.stacks[label_raw]
 35 | #     stk_del = ds.stacks['label_raw'].to_delayed().ravel()
 36 | 
 37 |     # get array names and shapes by correcting a single image (the last one)
 38 |     sample_res = _fast_correct(stack[-1:,...].compute(scheduler='threading'), 
 39 |                                opts=opts,
 40 |                               data_key=ds.data_pattern + '/' + label,
 41 |                               shots_grp=ds.shots_pattern,
 42 |                               peaks_grp=ds.data_pattern)
 43 |     
 44 | #     print({k: v.dtype for k, v in sample_res.items()})
 45 |     
 46 |     # initialize file structure
 47 |     for (file, subset), grp in ds.shots.groupby(['file', 'subset']):
 48 |         with h5py.File(file, 'a') as fh:
 49 |             for pattern, data in sample_res.items():
 50 |                 path = pattern.replace('%', subset)
 51 | #                 print('Initializing', file, path)
 52 |                 fh.require_dataset(path, 
 53 |                                     shape=(len(grp),) + data.shape[1:], 
 54 |                                     dtype=data.dtype, 
 55 |                                     chunks=(1,) + data.shape[1:], 
 56 |                                     compression=opts.compression)
 57 |             fh[ds.data_pattern.replace('%', subset)].attrs['signal'] = label
 58 |     
 59 |     # array of integers corresponding to the chunk number
 60 |     chunk_label = np.concatenate([np.repeat(ii, cs) 
 61 |                                   for ii, cs in enumerate(stack.chunks[0])])
 62 |     
 63 |     # delay objects returning the image and info dictionary
 64 |     cmp_del = [dask.delayed(_fast_correct)(raw_chk, opts) 
 65 |                for raw_chk in ds.raw_counts.to_delayed().ravel()]
 66 |     
 67 |     # file lock objects
 68 |     locks = {fn: Lock() for fn in ds.files}
 69 | 
 70 |     # make delay objects for writing results to file (= maximum side effects!)
 71 |     dels = []
 72 |     for chks, (cl, sht) in zip(cmp_del, ds.shots.groupby(chunk_label)):
 73 |         assert len(sht.drop_duplicates(['file','subset'])) == 1
 74 |         ii_to = sht.shot_in_subset.values
 75 |         dels.append(dask.delayed(nexus._save_single_chunk_multi)(chks,
 76 |                                                                  file=sht.file.values[0], 
 77 |                                                                  subset=sht.subset.values[0], 
 78 |                                                                  idcs=ii_to,
 79 |                                                                  lock=locks[sht.file.values[0]]
 80 |                                                                 ))
 81 | 
 82 |     # random.shuffle(dels) # shuffling tasks to minimize concurrent file access
 83 |     chunk_info = client.compute(dels, sync=True)
 84 |     return pd.DataFrame(chunk_info, columns=['file', 'subset', 'path', 'shot_in_subset'])
 85 | 
 86 | def main():
 87 | 
 88 |     parser = argparse.ArgumentParser(description='Quick and dirty pre-processing for Serial Electron Diffraction data', 
 89 |                                      allow_abbrev=False, epilog='Any other options are passed on as modification to the option file')
 90 |     parser.add_argument('filename', type=str, nargs='*', help='List or HDF5 file or glob pattern. Glob pattern must be given in SINGLE quotes.')
 91 |     parser.add_argument('-s', '--settings', type=str, help='Option YAML file. Defaults to \'preproc.yaml\'.', default='preproc.yaml')
 92 |     parser.add_argument('-A', '--address', type=str, help='Address of an existing dask.distributed cluster to use instead of making a new one. Defaults to making a new one.', default=None)
 93 |     parser.add_argument('-N', '--nprocs', type=int, help='Number of processes of a new dask.distributed cluster. Defaults to letting dask decide.', default=None)
 94 |     parser.add_argument('-L', '--local-directory', type=str, help='Fast (scratch) directory for computations. Defaults to the current directory.', default=None)
 95 |     parser.add_argument('-c', '--chunksize', type=int, help='Chunk size of raw data stack. Should be integer multiple of movie stack frames! Defaults to 100.', default=100)
 96 |     parser.add_argument('-l', '--list-file', type=str, help='Name of output list file', default='processed.lst')
 97 |     parser.add_argument('-w', '--wait-for-files', help='Wait for files matching wildcard pattern', action='store_true')
 98 |     parser.add_argument('--include-existing', help='When using -w/--wait-for-file, also include existing files', action='store_true')
 99 |     parser.add_argument('--append', help='Append to list instead of overwrite', action='store_true')
100 |     parser.add_argument('-d', '--data-path-old', type=str, help='Raw data field in HDF5 file(s). Defaults to /entry/data/raw_data', default='/%/data/raw_counts')
101 |     parser.add_argument('-n', '--data-path-new', type=str, help='Corrected data field in HDF5 file(s). Defaults to /entry/data/corrected', default='/%/data/corrected')
102 |     parser.add_argument('--no-bgcorr', help='Skip background correction', action='store_true')
103 |     parser.add_argument('--no-validate', help='Do not validate files before attempting to process', action='store_true')
104 |     # parser.add_argument('ppopt', nargs=argparse.REMAINDER, help='Preprocessing options to be overriden')
105 | 
106 |     args, extra = parser.parse_known_args()
107 |     # print(args, extra)
108 |     # raise RuntimeError('thus far!')
109 |     opts = pre_proc_opts.PreProcOpts(args.settings)
110 |      
111 |     label_raw = args.data_path_old.rsplit('/', 1)[-1]
112 |     label = args.data_path_new.rsplit('/', 1)[-1]
113 |        
114 |     if extra:
115 |         # If extra arguments have been supplied, overwrite existing values
116 |         opt_parser = argparse.ArgumentParser()
117 |         for k, v in opts.__dict__.items():
118 |             opt_parser.add_argument('--' + k, type=type(v), default=None)
119 |         opts2 = opt_parser.parse_args(extra)
120 |         
121 |         for k, v in vars(opts2).items():
122 |             if v is not None:
123 |                 if type(v) != type(opts.__dict__[k]):
124 |                     warn('Mismatch of data types in overriden argument!', RuntimeWarning)
125 |                 print(f'Overriding option file setting {k} = {opts.__dict__[k]} ({type(opts.__dict__[k])}). ',
126 |                     f'New value is {v} ({type(v)})')
127 |                 opts.__dict__[k] = v
128 |     
129 |     # raise RuntimeError('thus far!')
130 |     print(f'Running on diffractem:', version())
131 |     print(f'Current path is:', os.getcwd())
132 |     
133 |     # client = Client()
134 |     if args.address is not None:
135 |         print('Connecting to cluster scheduler at', args.address)
136 |     
137 |         try:
138 |             client = Client(address=args.address, timeout=3)
139 |         except:
140 |             print(f'\n----\nThere seems to be no dask.distributed scheduler running at {args.address}.\n'
141 |                 f'Please double-check or start one by either omitting the --address option.')
142 |             return
143 |     else:
144 |         print('Creating a dask.distributed cluster...')
145 |         client = Client(n_workers=args.nprocs, local_directory=args.local_directory, processes=True)   
146 |         print('\n\n---\nStarted dask.distributed cluster:')
147 |         print(client)
148 |         print('You can access the dashboard for monitoring at: ', client.dashboard_link)
149 |         
150 |     
151 |     client.run(os.chdir, os.getcwd())
152 |     
153 |     if len(args.filename) == 1:
154 |         args.filename = args.filename[0]
155 |     
156 |     # print(args.filename)
157 |     seen_raw_files = [] if args.include_existing else io.expand_files(args.filename)
158 | 
159 |     while True:
160 |     
161 |         if args.wait_for_files:
162 |             
163 |             # slightly awkward sequence to only open finished files... (but believe me - it works!)
164 |             
165 |             fns = io.expand_files(args.filename)
166 |             # print(fns)
167 |             fns = [fn for fn in fns if fn not in seen_raw_files]
168 |             # validation...
169 |             try:
170 |                 fns = io.expand_files(fns, validate=not args.no_validate)
171 |             except (OSError, IOError, RuntimeError) as err:
172 |                 print(f'Could not open file(s) {" ".join(fns)} because of', err)
173 |                 print('Possibly, it is still being written to. Waiting a bit...')
174 |                 sleep(5)
175 |                 continue
176 |                 
177 |             if not fns:
178 |                 print('No new files, waiting a bit...')
179 |                 sleep(5)
180 |                 continue
181 |             else:
182 |                 print(f'Found new files(s):\n', '\n'.join(fns))
183 |                 try:
184 |                     ds_raw = Dataset.from_files(fns, chunking=args.chunksize)
185 |                 except Exception as err:
186 |                     print(f'Could not open file(s) {" ".join(fns)} because of', err)
187 |                     print('Possibly, it is still being written to. Waiting a bit...')
188 |                     sleep(5)
189 |                     continue
190 |         
191 |         else:
192 |             fns = io.expand_files(args.filename, validate=not args.no_validate)
193 |             if fns:
194 |                 ds_raw = Dataset.from_files(fns, chunking=args.chunksize)
195 |             else:
196 |                 print(f'\n---\n\nFile(s) {args.filename} not found or (all of them) invalid.')
197 |                 return
198 |             
199 |         seen_raw_files.extend(ds_raw.files)
200 |         
201 |         print('---- Have dataset ----')
202 |         print(ds_raw)
203 |         
204 |         # delete undesired stacks
205 |         delstacks = [sn for sn in ds_raw.stacks.keys() if sn != args.data_path_old.rsplit('/', 1)[-1]]
206 |         for sn in delstacks:
207 |             ds_raw.delete_stack(sn)
208 | 
209 |         if opts.aggregate:
210 |             print('---- Aggregating raw data ----')
211 |             ds_compute = ds_raw.aggregate(query=opts.agg_query, 
212 |                                     by=['sample', 'region', 'run', 'crystal_id'], 
213 |                                     how='sum', new_folder=opts.proc_dir, 
214 |                                     file_suffix=opts.agg_file_suffix)
215 |         else:
216 |             ds_compute = ds_raw.get_selection(query=opts.select_query,
217 |                                         file_suffix=opts.agg_file_suffix)
218 |         
219 |         print('Initializing data files...')
220 |         os.makedirs(opts.proc_dir, exist_ok=True)
221 |         ds_compute.init_files(overwrite=True)
222 | 
223 |         print('Storing meta tables...')
224 |         ds_compute.store_tables(shots=True, features=True)
225 | 
226 |         print(f'Processing diffraction data... monitor progress at {client.dashboard_link} (or forward port if remote)')
227 |         chunk_info = quick_proc(ds_compute, opts, label_raw, label, client)
228 |         
229 |         # make sure that the calculation went consistent with the data set
230 |         for (sh, sh_grp), (ch, ch_grp) in zip(ds_compute.shots.groupby(['file', 'subset']), chunk_info.groupby(['file', 'subset'])):
231 |             if any(sh_grp.shot_in_subset.values != np.sort(np.concatenate(ch_grp.shot_in_subset.values))):
232 |                 raise ValueError(f'Incosistency between calculated data and shot list in {sh[0]}: {sh[1]} found. Please investigate.')
233 |     
234 |         ds_compute.write_list(args.list_file, append = args.append)
235 |         
236 |         print(f'Computation done. Processed files are in {args.list_file}')
237 |                
238 |         if not args.wait_for_files:
239 |             break
240 | 
241 | if __name__ == '__main__':
242 |     main()


--------------------------------------------------------------------------------
/diffractem/stream2sol.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Create solution file for '--indexing=file' from a stream
  4 | #
  5 | # Copyright © 2020-2021 Max-Planck-Gesellschaft
  6 | #                       zur Förderung der Wissenschaften e.V.
  7 | # Copyright © 2021 Deutsches Elektronen-Synchrotron DESY,
  8 | #                  a research centre of the Helmholtz Association.
  9 | #
 10 | # Authors:
 11 | #   2020 Robert Bücker <robert.buecker@cssb-hamburg.de>
 12 | #   2021 Thomas White <thomas.white@desy.de>
 13 | 
 14 | from io import StringIO
 15 | import re
 16 | from warnings import warn
 17 | from copy import deepcopy
 18 | 
 19 | BEGIN_GEOM = '----- Begin geometry file -----'
 20 | END_GEOM = '----- End geometry file -----'
 21 | BEGIN_CELL = '----- Begin unit cell -----'
 22 | END_CELL = '----- End unit cell -----'
 23 | BEGIN_CHUNK = '----- Begin chunk -----'
 24 | END_CHUNK = '----- End chunk -----'
 25 | BEGIN_CRYSTAL = '--- Begin crystal'
 26 | END_CRYSTAL = '--- End crystal'
 27 | BEGIN_PEAKS = 'Peaks from peak search'
 28 | END_PEAKS = 'End of peak list'
 29 | BEGIN_REFLECTIONS = 'Reflections measured after indexing'
 30 | END_REFLECTIONS = 'End of reflections'
 31 | HEAD = 'CrystFEL stream format {}.{}'.format(2, 3)
 32 | GENERATOR = 'Generated by diffractem StreamParser'
 33 | PEAK_COLUMNS = ['fs/px', 'ss/px', '(1/d)/nm^-1', 'Intensity', 'Panel']
 34 | REFLECTION_COLUMNS = ['h', 'k', 'l', 'I', 'Sigma(I)', 'Peak', 'Background', 'fs/px', 'ss/px', 'Panel']
 35 | ID_FIELDS = ['file', 'Event', 'serial']
 36 | CRYSTAL_DATA_FIELS = ['astar', 'bstar', 'cstar', 'predict_refine/det_shift', 
 37 |                       'profile_radius', 'diffraction_resolution_limit']
 38 | 
 39 | args = None
 40 | class Crystal:
 41 |     
 42 |     def __init__(self, line):
 43 |         self.astar = (None, None, None)
 44 |         self.bstar = (None, None, None)
 45 |         self.cstar = (None, None, None)
 46 |         self.lattice_type = None
 47 |         self.centering = None
 48 |         self.unique_axis = None
 49 |         self.det_shift = (None, None)
 50 |         self.start_line = line
 51 | 
 52 |     @property
 53 |     def initialized(self):
 54 |         global legacy
 55 |         required_fields = [*self.astar, *self.bstar, *self.cstar,
 56 |                               *self.det_shift]
 57 |         if not legacy:
 58 |             required_fields += [self.lattice_type, self.centering]
 59 |         return all([x is not None for x in required_fields])
 60 | 
 61 |     @property
 62 |     def lattice_type_sym(self):
 63 |         if self.lattice_type == 'triclinic':
 64 |             return 'a' + self.centering
 65 |         elif self.lattice_type == 'monoclinic':
 66 |             return 'm' + self.centering + self.unique_axis
 67 |         elif self.lattice_type == 'orthorhombic':
 68 |             return 'o' + self.centering
 69 |         elif self.lattice_type == 'tetragonal':
 70 |             return 't' + self.centering + self.unique_axis
 71 |         elif self.lattice_type == 'cubic':
 72 |             return 'c' + self.centering
 73 |         elif self.lattice_type == 'hexagonal':
 74 |             return 'h' + self.centering + self.unique_axis
 75 |         elif self.lattice_type == 'rhombohedral':
 76 |             return 'r' + self.centering
 77 |         else:
 78 |             warn('Invalid lattice type {}'.format(self.lattice_type))
 79 |             return 'invalid'
 80 |     
 81 |     def __str__(self):
 82 |         global legacy
 83 |         if not self.initialized:
 84 |             warn('Trying to get string from non-initialized crystal from line {}.'.format(self.start_line))
 85 |             return None        
 86 |         else:
 87 |             cs = ' '.join(['{0[0]} {0[1]} {0[2]}'.format(vec) 
 88 |                             for vec in [self.astar, self.bstar, self.cstar]])
 89 |             cs += ' {0[0]} {0[1]}'.format(self.det_shift)
 90 |             if not legacy:
 91 |                 cs += ' ' + self.lattice_type_sym
 92 |             return cs
 93 | 
 94 | class Chunk:
 95 | 
 96 |     def __init__(self, line):
 97 |         self.file = None
 98 |         self.Event = None
 99 |         self.crystals = []
100 |         self.start_line = line
101 |         self.x_shift = 0
102 |         self.y_shift = 0
103 |         
104 |     @property
105 |     def n_cryst(self):
106 |         return len(self.crystals)
107 |     
108 |     @property 
109 |     def initialized(self):
110 |         return (self.file is not None) and (self.Event is not None)
111 |         
112 |     def add_crystal(self, crystal):
113 |         if (not crystal.initialized) or (crystal is None):
114 |             raise RuntimeError('Trying to add non-initialied crystal to chunk from line {}.'.format(self.start_line))
115 |         self.crystals.append(deepcopy(crystal))
116 |         # print(crystal)
117 |         
118 |     def __str__(self):
119 |         if not self.initialized:
120 |             warn('Trying to get string from non-initialized chunk from line {}.'.format(self.start_line))
121 |             return None
122 |         else:
123 |             # return '\n'.join([' '.join([self.file, *self.Event.split('//'), str(cryst)])
124 |             #             for ii, cryst in enumerate(self.crystals)])
125 |             # new-style (not working yet)
126 |             return '\n'.join([' '.join([self.file, self.Event, str(cryst)])
127 |                         for ii, cryst in enumerate(self.crystals)])
128 | 
129 | 
130 | def parse_stream(stream, sol=None, return_meta=True, 
131 |                  file_label='Image filename', event_label='Event',
132 |                  x_shift_label=None, y_shift_label=None, omit_cell=False):
133 |     
134 |     global legacy
135 |     legacy = omit_cell
136 |     if legacy:
137 |         print('Generating legacy solution file (omitting cell type). Do you really want that?')
138 |     
139 |     curr_chunk = None
140 |     curr_cryst = None
141 |     geom = ''
142 |     cell = ''
143 |     command = ''
144 |     parsing_geom = False
145 |     parsing_cell = False
146 |     parsing_peaks = False
147 |     have_cell = False
148 |     have_geom = False
149 |     have_command = False
150 |     parsing_reflections = False
151 |     parse_vec = lambda l: tuple(float(k) for k in re.findall(r'[+-]?\d*\.\d*', l))
152 |     
153 |     with open(stream, 'r') as fh_in, (StringIO() if sol is None else open(sol,'w')) as fh_out:
154 | 
155 |         for ln, l in enumerate(fh_in):
156 |             
157 |             if parsing_reflections:
158 |                 if l.startswith(END_REFLECTIONS):
159 |                     parsing_reflections = False
160 |                 else:
161 |                     # here, any reflection parsing would go
162 |                     pass
163 |                     
164 |             elif parsing_peaks:
165 |                 if l.startswith(END_PEAKS):
166 |                     parsing_peaks = False
167 |                 else:
168 |                     # here, any peak parsing would go
169 |                     pass
170 | 
171 |             elif l.startswith(BEGIN_CHUNK):
172 |                 curr_chunk = Chunk(ln)
173 |                 
174 |             elif (curr_chunk is not None) and (curr_cryst is None):
175 |                 # parsing chunks (= events = shots) _outside_ crystals
176 |                 
177 |                 if l.startswith(END_CHUNK):
178 |                     if not curr_chunk.initialized:
179 |                         raise RuntimeError('Incomplete chunk found before line ' + str(ln))
180 |                     if curr_chunk.n_cryst:
181 |                         fh_out.write(str(curr_chunk) + '\n')
182 |                         # print(str(curr_chunk))
183 |                     curr_chunk = None
184 |                     
185 |                 elif l.startswith(file_label):
186 |                     curr_chunk.file = l.split(' ', 2)[-1].strip()
187 |                     
188 |                 elif l.startswith(event_label):
189 |                     curr_chunk.Event = l.split(' ')[-1].strip()
190 | 
191 |                 elif x_shift_label and l.startswith(x_shift_label):
192 |                     curr_chunk.x_shift = float(l.split(' ')[-1].strip())
193 | 
194 |                 elif y_shift_label and l.startswith(y_shift_label):
195 |                     curr_chunk.y_shift = float(l.split(' ')[-1].strip())
196 | 
197 |                 elif l.startswith(BEGIN_CRYSTAL):
198 |                     if not curr_chunk.initialized:
199 |                         raise RuntimeError('Crystal for incomplete chunk in ' + str(ln))                
200 |                     curr_cryst = Crystal(ln)
201 |                 
202 |             elif curr_cryst is not None:
203 |                 # parsing a (single) crystal
204 |                 
205 |                 if l.startswith(END_CRYSTAL):
206 |                     curr_chunk.add_crystal(curr_cryst)
207 |                     curr_cryst = None
208 |                         
209 |                 elif l.startswith('astar'):
210 |                     curr_cryst.astar = parse_vec(l)
211 |                     
212 |                 elif l.startswith('bstar'):
213 |                     curr_cryst.bstar = parse_vec(l)
214 |                     
215 |                 elif l.startswith('cstar'):
216 |                     curr_cryst.cstar = parse_vec(l)
217 |  
218 |                 elif l.startswith('lattice_type'):
219 |                     curr_cryst.lattice_type = l.split(' ')[2].strip()
220 | 
221 |                 elif l.startswith('centering'):
222 |                     curr_cryst.centering = l.split(' ')[2].strip()
223 | 
224 |                 elif l.startswith('unique_axis'):
225 |                     curr_cryst.unique_axis = l.split(' ')[2].strip()
226 |                     
227 |                 elif l.startswith('predict_refine/det_shift'):
228 |                     curr_cryst.det_shift = parse_vec(l)
229 |                     curr_cryst.det_shift = (curr_cryst.det_shift[0] + curr_chunk.x_shift,
230 |                                             curr_cryst.det_shift[1] + curr_chunk.y_shift)
231 |                     
232 |             elif  l.startswith(BEGIN_GEOM) and not have_geom:
233 |                 parsing_geom = True
234 |                 
235 |             elif parsing_geom:
236 |                 if not l.startswith(END_GEOM):
237 |                     geom += l
238 |                 else:
239 |                     parsing_geom = False
240 |                     have_geom = True
241 |                 
242 |             elif l.startswith(BEGIN_CELL) and not have_cell:
243 |                 parsing_cell = True
244 |                 
245 |             elif parsing_cell:
246 |                 if not l.startswith(END_CELL):
247 |                     cell += l
248 |                 else:
249 |                     parsing_cell = False
250 |                     have_cell = True
251 | 
252 |             elif ('indexamajig' in l) and not have_command:
253 |                 command = l
254 |                 have_command = True
255 |                                     
256 |             elif l.startswith(BEGIN_PEAKS):
257 |                 parsing_peaks = True
258 |                 
259 |             elif l.startswith(BEGIN_REFLECTIONS):
260 |                 parsing_reflections = True                    
261 |                 
262 |         if sol is None:
263 |             out = fh_out.getvalue()
264 |             if return_meta:
265 |                 return out, (command, geom, cell)
266 |             else:
267 |                 return out
268 |         
269 |         else:
270 |             if return_meta:
271 |                 return command, geom, cell
272 | 
273 | def main():
274 |     global args
275 |     
276 |     from argparse import ArgumentParser
277 |     parser = ArgumentParser(description='Conversion tool from stream to solution file(s) for re-integration/-refinement.')
278 |     
279 |     parser.add_argument('-i', '--input', type=str, help='Input stream file', required=True)
280 |     parser.add_argument('-o', '--output', type=str, help='Output solution file', required=True)
281 |     parser.add_argument('-g', '--geometry-out', type=str, help='Output geometry file (optional)')
282 |     parser.add_argument('-p', '--cell-out', type=str, help='Output cell file (optional)')
283 |     parser.add_argument('-L', '--legacy', help='Legacy file format: omit cell info', action='store_true')
284 |     parser.add_argument('--file-field', type=str, help='Field in chunks for image filename', default='Image filename')
285 |     parser.add_argument('--event-field', type=str, help='Field in chunk for event identifier', default='Event')
286 |     parser.add_argument('--x-shift-field', type=str, help='Field in chunk for x-shift identifier', default='')
287 |     parser.add_argument('--y-shift-field', type=str, help='Field in chunk for y-shift identifier', default='')
288 | 
289 |     args = parser.parse_args()
290 |     
291 |     meta = parse_stream(args.input, args.output, return_meta=True, 
292 |                         file_label=args.file_field, event_label=args.event_field,
293 |                         x_shift_label=args.x_shift_field, y_shift_label=args.y_shift_field,
294 |                         omit_cell=args.legacy)
295 |     # print('Original indexamajig call was: \n' + meta[0])
296 |     if args.geometry_out:
297 |         with open(args.geometry_out, 'w') as fh:
298 |             fh.write(meta[1])
299 |     
300 |     if args.cell_out:
301 |         if not meta[1]:
302 |             print('No cell found in stream file. Not writing cell file.')
303 |         else:
304 |             with open(args.cell_out, 'w') as fh:
305 |                 fh.write(meta[2])
306 |      
307 | if __name__ == '__main__':
308 |     main()
309 |     


--------------------------------------------------------------------------------
/diffractem/stream_parser.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from io import StringIO
  3 | import numpy as np
  4 | import re
  5 | from typing import Union, Optional
  6 | 
  7 | BEGIN_GEOM = '----- Begin geometry file -----'
  8 | END_GEOM = '----- End geometry file -----'
  9 | BEGIN_CELL = '----- Begin unit cell -----'
 10 | END_CELL = '----- End unit cell -----'
 11 | BEGIN_CHUNK = '----- Begin chunk -----'
 12 | END_CHUNK = '----- End chunk -----'
 13 | BEGIN_CRYSTAL = '--- Begin crystal'
 14 | END_CRYSTAL = '--- End crystal'
 15 | BEGIN_PEAKS = 'Peaks from peak search'
 16 | END_PEAKS = 'End of peak list'
 17 | BEGIN_REFLECTIONS = 'Reflections measured after indexing'
 18 | END_REFLECTIONS = 'End of reflections'
 19 | HEAD = 'CrystFEL stream format {}.{}'.format(2, 3)
 20 | GENERATOR = 'Generated by diffractem StreamParser'
 21 | PEAK_COLUMNS = ['fs/px', 'ss/px', '(1/d)/nm^-1', 'Intensity', 'Panel']
 22 | REFLECTION_COLUMNS = ['h', 'k', 'l', 'I', 'Sigma(I)', 'Peak', 'Background', 'fs/px', 'ss/px', 'Panel']
 23 | ID_FIELDS = ['file', 'Event', 'serial']
 24 | 
 25 | 
 26 | def make_substream(stream: 'StreamParser', Ncryst: int, seed: Optional[int] = None, 
 27 |                    filename: Optional[str] = None, query: Optional[str] = None):
 28 |     """Write a stream file containing a sub-set of events to a new stream file.
 29 | 
 30 |     Args:
 31 |         stream (StreamParser): StreamParser object holding the original stream
 32 |         Ncryst (int): Number of events to sample
 33 |         seed (Optional[int], optional): Seed of the random generator. Defaults to None.
 34 |         filename (Optional[str], optional): Output stream filename. Defaults to filename of original stream,
 35 |             with '-N_{Ncryst}' appended, where {Ncryst} is the number of sampled crystals.
 36 |         query (str, optional): query to pre-select events. A sensible choice might be to
 37 |             only pick indexed events by setting query='indexed_by != "none"'. Defaults to None.
 38 | 
 39 |     Returns:
 40 |         [type]: [description]
 41 |     """
 42 |     
 43 |     fn2 = 'subsets/' + stream.filename.rsplit('.',1)[0] + f'-N_{Ncryst}.stream' \
 44 |         if filename is None else filename
 45 |     
 46 |     sel = stream.shots if query is None else stream.shots.query(query)
 47 |     sel = sel.sample(n=Ncryst, random_state=seed)
 48 |     sel.sort_values(by='first_line', ascending=False, inplace=True)
 49 | 
 50 |     first = list(sel.first_line)
 51 |     last = list(sel.last_line)
 52 |     first.append(0)
 53 |     last.append(stream.shots.first_line.min() - 1)
 54 | 
 55 |     copying = False
 56 |     section = (first.pop(), last.pop())
 57 |     
 58 |     with open(stream.filename,'r') as fh_from, open(fn2,'w') as fh_to:
 59 |         for ln, l in enumerate(fh_from):
 60 |             if not copying:
 61 |                 if ln == section[0]:
 62 |                     copying = True
 63 |                     #print(section[0], ln)
 64 |             if copying:
 65 |                 fh_to.write(l)
 66 |                 if ln == section[1]:
 67 |                     copying = False
 68 |                     try:
 69 |                         section = (first.pop(), last.pop())
 70 |                     except IndexError:
 71 |                         break
 72 |                     
 73 |     print('Wrote subset with', len(sel), 'events to', fn2)
 74 | 
 75 |     return fn2
 76 | 
 77 | 
 78 | def augment_stream(streamname: str, outfile:str, new_fields: Union[pd.DataFrame, dict], where: str = 'chunk'):
 79 |     """Add new fields to chunk headers in the stream file, which can then be used for chopping or filtering.
 80 |     Somewhat similar to indexamajig's "include-hdf5-field" option, just *after* the fact.
 81 |     
 82 |     Args:
 83 |         streamname (str): Name of stream file
 84 |         new_fields (pd.DataFrame): pandas DataFrame with index matching the file and Event of the stream file
 85 |             and columns matching the additional fields to be added
 86 |     """
 87 | 
 88 |     chunk_init = False
 89 |     found_fn = ''
 90 |     found_event = ''
 91 |     
 92 |     with open(streamname, 'r') as fh_in, open(outfile, 'w') as fh:
 93 |         for ln, l in enumerate(fh_in):
 94 |         
 95 |             if not chunk_init and l.startswith(BEGIN_CHUNK):
 96 |                 # print('new chunk')
 97 |                 chunk_init = True
 98 |                 file_init = False
 99 |                 event_init = False
100 |                 found_event = ''
101 |                 cols = list(new_fields.keys())
102 | 
103 |             elif chunk_init and l.startswith('Image filename:'):
104 |                 found_fn = l.split(': ')[-1].strip()
105 |                 # print(found_fn)
106 |                 file_init = True
107 |                 
108 |             elif chunk_init and l.startswith('Event:'):
109 |                 found_event = l.split(': ')[-1].strip()
110 |                 # print(found_event)
111 |                 event_init = True
112 | 
113 |             elif chunk_init and event_init and file_init and \
114 |                 l.startswith(BEGIN_REFLECTIONS if where=='crystal' else BEGIN_PEAKS):
115 |                 # now is the time to insert the new stuff
116 |                 # print(found_fn, found_event)
117 |                 # print(chunk_init, event_init)
118 |                 for k, v in new_fields.loc[(found_fn, found_event),:].iteritems():
119 |                     # print(v)
120 |                     fh.write(f'{k} = {v}\n')
121 | 
122 |             elif chunk_init and l.startswith(END_CHUNK):
123 |                 chunk_init = False
124 |                 
125 |             fh.write(l)
126 | 
127 | def chop_stream(streamname: str, id_list: list, id_field: str = 'hdf5/%/shots/frame', 
128 |                 id_suffix: str = 'frame', fn_contains: str = None):
129 |     """Chops a stream file into sub-streams containing only shots with a specific value of
130 |     a defined field, which must be in the chunk header. Useful e.g. for chopping into aggregation
131 |     frames, different sample grids, runs with different rotation angles etc.
132 |     
133 |     If you just want to *select* a sub-set of a stream file instead of chopping it up into many parts,
134 |     consider using the stream_grep script included with CrystFEL, which is way faster and more flexible.
135 |     
136 |     Args:
137 |         streamname (str): Stream file name
138 |         id_list (str): List of values of the ID variable which you want to have in the final files.
139 |         id_field (str): Field in chunk data to select by. Defaults to 'hdf5/%/shots/frame'.
140 |         id_appendix (str): Appendix to be applied to the output stream file names. Defaults to 'frame'.
141 | 
142 |     """
143 | 
144 |     outfiles = {}
145 |     for fnum in id_list:
146 |         outfiles[fnum] = open(streamname.rsplit('.', 1)[0] + f'-{id_suffix}{fnum}.stream', 'w')
147 | 
148 |     chunk_init = False
149 |     chunk_string = ''
150 |     value = -1
151 |     
152 |     with open(streamname, 'r') as fh_in:
153 |         for ln, l in enumerate(fh_in):
154 |         
155 |             if not chunk_init and l.startswith(BEGIN_CHUNK):
156 |                 chunk_init = True
157 |                 chunk_string += l
158 |                 value = None
159 |                 include_file = True
160 | 
161 |             elif chunk_init and (fn_contains is not None) and l.startswith('Image filename'):
162 |                 found_fn = l.split(': ')[-1].strip()
163 |                 include_file = fn_contains in found_fn
164 |                 chunk_string += l
165 |                 
166 |             elif chunk_init and l.startswith(id_field):
167 |                 found_value = l.rsplit('=',1)[-1].strip()
168 |                 found_value = parse_str_val(found_value)
169 |                 chunk_string += l
170 |                 value = found_value if found_value in id_list else None
171 |         
172 |             elif chunk_init and l.startswith(END_CHUNK):
173 |                 chunk_init = False
174 |                 chunk_string += l
175 |                 #print(frame)
176 |                 if (value is not None) and include_file:
177 |                     #print(chunk_string)
178 |                     outfiles[value].write(chunk_string)
179 |                 chunk_string = ''
180 |                 
181 |             elif chunk_init:
182 |                 chunk_string += l
183 |         
184 |             elif not chunk_init:
185 |                 # no chunk initialized, write to all files
186 |                 for _, fh in outfiles.items():
187 |                     fh.write(l)
188 |                     
189 |             else:
190 |                 raise RuntimeError('This should not happen?! Please debug me.')
191 | 
192 | def parse_str_val(input: str):
193 |     try:
194 |         return int(input.strip())
195 |     except ValueError:
196 |         try:
197 |             return float(input.strip())
198 |         except:
199 |             return input.strip()
200 | 
201 | class StreamParser:
202 | 
203 |     def __init__(self, filename, parse_now=True, serial_offset=-1, new_folder=None):
204 | 
205 |         self.merge_shot = False
206 |         self.command = ''
207 |         self._cell_string = []
208 |         self._geometry_string = []
209 |         self._peaks = pd.DataFrame()
210 |         self._indexed = pd.DataFrame()
211 |         self._shots = pd.DataFrame()
212 |         self._crystals = pd.DataFrame()
213 |         self._parsed_lines = 0
214 |         self._total_lines = 0
215 |         self.filename = filename
216 |         self.serial_offset = serial_offset
217 | 
218 |         if parse_now:
219 |             self.parse(new_folder)
220 | 
221 |     @property
222 |     def geometry(self):
223 |         """
224 | 
225 |         :return: geometry section as dictionary
226 |         """
227 | 
228 |         g = {}
229 |         for l in self._geometry_string:
230 |             if l.startswith(';'):
231 |                 continue
232 |             if '=' not in l:
233 |                 continue
234 |             k, v = l.split(';')[0].split('=', 1)
235 |             g[k.strip()] = parse_str_val(v)
236 | 
237 |         return g
238 | 
239 |     @property
240 |     def cell(self):
241 |         """
242 | 
243 |         :return: cell section as dictionary
244 |         """
245 | 
246 |         c = {}
247 |         for l in self._cell_string:
248 |             if '=' not in l:
249 |                 continue
250 |             k, v = l.split('=', 1)
251 |             try:
252 |                 c[k.strip()] = float(v)
253 |             except ValueError:
254 |                 c[k.strip()] = v.strip()
255 | 
256 |         return c
257 | 
258 |     @property
259 |     def options(self):
260 |         """
261 | 
262 |         :return: crystfel call options (ONLY -- ones) as dict
263 |         """
264 |         o = {}
265 |         for opt in re.findall('--\S+', self.command):
266 |             if '=' in opt:
267 |                 k, v = opt[2:].split('=', 1)
268 |                 try:
269 |                     o[k.strip()] = int(v)
270 |                 except ValueError:
271 |                     try:
272 |                         o[k.strip()] = float(v)
273 |                     except ValueError:
274 |                         o[k.strip()] = v.strip()
275 |             else:
276 |                 o[opt[2:].strip()] = None
277 |         return o
278 | 
279 |     @property
280 |     def indexed(self):
281 |         return self._indexed
282 | 
283 |     @property
284 |     def peaks(self):
285 |         return self._peaks
286 | 
287 |     @property
288 |     def shots(self):
289 |         return self._shots.merge(self._crystals, on=ID_FIELDS, how='left')
290 | 
291 |     @property
292 |     def input_file(self):
293 |         return self.command.split('-i ')[1].split(' -')[0].strip()
294 | 
295 |     @property
296 |     def files(self):
297 |         return list(self.shots.file.unique())
298 |     
299 |     @property
300 |     def num_crystals(self):
301 |         return len(self._crystals)
302 |     
303 |     @property
304 |     def num_shots(self):
305 |         return len(self._shots)
306 | 
307 |     def parse(self, new_folder):
308 | 
309 |         linedat_peak = StringIO()
310 |         linedat_index = StringIO()
311 |         shotlist = []
312 |         crystallist = []
313 |         init_peak = False
314 |         init_index = False
315 |         init_geom = False
316 |         init_cell = False
317 |         init_crystal_info = False
318 |         init_chunk = False
319 |         shotdat = {'Event': None, 'shot_in_subset': None, 'subset': None,
320 |                    'file': None, 'serial': None}
321 |         crystal_info = {}
322 |         idstr = None
323 |         self._parsed_lines = 0
324 |         self._total_lines = 0
325 |         skip = False
326 | 
327 |         # lines are queried for their meaning. Lines belonging to tables are appended to StringIO virtual files,
328 |         # which are then read into pandas data frames at the very end. The order of Queries is chosen to optimize
329 |         # performance, that is, the table lines (most frequent) come first.
330 |         with open(self.filename) as fh:
331 | 
332 |             for ln, l in enumerate(fh):
333 | 
334 |                 self._parsed_lines += 1
335 |                 self._total_lines += 1
336 |                 if skip:
337 |                     skip = False
338 |                     continue
339 | 
340 |                 # EVENT CHUNKS
341 | 
342 |                 # Actual parsing (indexed peaks)
343 |                 if init_index and END_REFLECTIONS in l:
344 |                     init_index = False
345 |                 elif init_index:
346 |                     linedat_index.write(
347 |                         ' '.join([l.strip(), str(ln), idstr, '\n']))
348 | 
349 |                 # Actual parsing (found peaks)
350 |                 elif init_peak and END_PEAKS in l:
351 |                     init_peak = False
352 |                 elif init_peak:
353 |                     linedat_peak.write(
354 |                         ' '.join([l.strip(), str(ln), idstr, '\n']))
355 | 
356 |                 # Required info at chunk head
357 |                 elif BEGIN_CHUNK in l:
358 |                     shotdat = {'Event': '_', 'shot_in_subset': -1, 'subset': '_',
359 |                                'file': '', 'serial': -1, 'first_line': ln, 'last_line': -1}
360 |                     init_chunk = True
361 |                 elif END_CHUNK in l:
362 |                     shotdat['last_line'] = ln
363 |                     shotlist.append(shotdat)
364 |                     shotdat = {'Event': None, 'shot_in_subset': None, 'subset': None,
365 |                                'file': None, 'serial': None, 'first_line': None, 'last_line': None}
366 |                     init_chunk = False
367 |                 elif 'Event:' in l:
368 |                     shotdat['Event'] = l.split(': ')[-1].strip()
369 |                     dummy_shot = shotdat['Event'].split('//')[-1]
370 |                     if dummy_shot in ['_', '']:
371 |                         shotdat['shot_in_subset'] = 0
372 |                     else:
373 |                         shotdat['shot_in_subset'] = int(shotdat['Event'].split('//')[-1])
374 |                     shotdat['subset'] = shotdat['Event'].split('//')[0].strip()
375 |                 elif 'Image filename:' in l:
376 |                     shotdat['file'] = l.split(':')[-1].strip()
377 |                     if new_folder is not None:
378 |                         shotdat['file'] = new_folder + '/' + shotdat['file'].rsplit('/', 1)[-1]
379 |                 elif 'Image serial number:' in l:
380 |                     shotdat['serial'] = int(l.split(': ')[1]) + self.serial_offset
381 |                 elif (' = ' in l) and (not init_crystal_info) and init_chunk:    # optional shot info
382 |                     k, v = l.split(' = ', 1)                           
383 |                     shotdat[k.strip()] = parse_str_val(v)
384 | 
385 |                 # Table parsing activation for found peaks
386 |                 elif (None not in shotdat.values()) and (BEGIN_PEAKS in l):
387 |                     skip = True # skip the column header line
388 |                     init_peak = True
389 |                     idstr = ' '.join([shotdat['file'], shotdat['Event'], str(shotdat['serial'])])
390 | 
391 |                 # Table parsing activation for indexing
392 |                 elif (None not in shotdat.values()) and (BEGIN_REFLECTIONS in l):
393 |                     skip = True
394 |                     init_index = True
395 |                     idstr = ' '.join([shotdat['file'], shotdat['Event'], str(shotdat['serial'])])
396 | 
397 |                 # Additional information from indexing
398 |                 elif BEGIN_CRYSTAL in l:
399 |                     crystal_info = {k: shotdat[k] for k in ID_FIELDS}
400 |                     init_crystal_info = True
401 |                 elif END_CRYSTAL in l:
402 |                     crystallist.append(crystal_info)
403 |                     crystal_info = {}
404 |                     init_crystal_info = False
405 |                 elif 'Cell parameters' in l:
406 |                     for k, v in zip(['a', 'b', 'c', 'dummy', 'al', 'be', 'ga'], l.split(' ')[2:9]):
407 |                         if k == 'dummy':
408 |                             continue
409 |                         crystal_info[k] = float(v)
410 |                 elif 'astar' in l:
411 |                     crystal_info.update(
412 |                         {k: float(v) for k, v in zip(['astar_x', 'astar_y', 'astar_z'], l.split(' ')[2:5])})
413 |                 elif 'bstar' in l:
414 |                     crystal_info.update(
415 |                         {k: float(v) for k, v in zip(['bstar_x', 'bstar_y', 'bstar_z'], l.split(' ')[2:5])})
416 |                 elif 'cstar' in l:
417 |                     crystal_info.update(
418 |                         {k: float(v) for k, v in zip(['cstar_x', 'cstar_y', 'cstar_z'], l.split(' ')[2:5])})
419 |                 elif 'diffraction_resolution_limit' in l:
420 |                     crystal_info['diff_limit'] = float(l.rsplit(' nm', 1)[0].rsplit('= ', 1)[-1])
421 |                 elif 'predict_refine/det_shift' in l:
422 |                     crystal_info['xshift'] = float(l.split(' ')[3])
423 |                     crystal_info['yshift'] = float(l.split(' ')[6])
424 |                     continue
425 |                 elif (' = ' in l) and init_crystal_info and init_chunk:    # optional shot info
426 |                     k, v = l.split(' = ', 1)  
427 |                     crystal_info[k.strip()] = parse_str_val(v)
428 | 
429 |                 # CALL STRING
430 | 
431 |                 elif 'indexamajig' in l:
432 |                     self.command = l
433 | 
434 |                 # GEOMETRY FILE
435 | 
436 |                 elif init_geom and (END_GEOM in l):
437 |                     init_geom = False
438 |                 elif init_geom:
439 |                     self._geometry_string.append(l.strip())
440 |                 elif BEGIN_GEOM in l:
441 |                     init_geom = True
442 | 
443 |                 # CELL FILE
444 | 
445 |                 elif init_cell and (END_CELL in l):
446 |                     init_cell = False
447 |                 elif init_cell:
448 |                     self._cell_string.append(l.strip())
449 |                 elif BEGIN_CELL in l:
450 |                     init_cell = True
451 | 
452 |                 else:
453 |                     self._parsed_lines -= 1
454 | 
455 |         # Now convert to pandas data frames
456 | 
457 |         linedat_index.seek(0)
458 |         linedat_peak.seek(0)
459 |         self._peaks = pd.read_csv(linedat_peak, delim_whitespace=True, header=None,
460 |                                   names=PEAK_COLUMNS + ['stream_line', 'file', 'Event', 'serial']
461 |                                   ).sort_values('serial').reset_index().sort_values(['serial', 'index']).reset_index(
462 |             drop=True).drop('index', axis=1)
463 | 
464 |         self._indexed = pd.read_csv(linedat_index, delim_whitespace=True, header=None,
465 |                                     names=REFLECTION_COLUMNS + ['stream_line', 'file', 'Event', 'serial']
466 |                                     ).sort_values('serial').reset_index().sort_values(['serial', 'index']).reset_index(
467 |             drop=True).drop('index', axis=1)
468 | 
469 |         self._shots = pd.DataFrame(shotlist).sort_values('serial').reset_index(drop=True)
470 |         if crystallist:
471 |             self._crystals = pd.DataFrame(crystallist).sort_values('serial').reset_index(drop=True)
472 |         else:
473 |             self._crystals = pd.DataFrame(columns=ID_FIELDS)
474 | 
475 |     def write(self, filename, include_peaks=True, include_indexed=True, include_geom=True, include_cell=True):
476 | 
477 |         from tqdm import tqdm
478 | 
479 |         with open(filename, 'w') as fh:
480 |             fh.write(HEAD+'\n'+GENERATOR+'\n'+self.command+'\n')
481 |             if include_geom:
482 |                 fh.write(BEGIN_GEOM+'\n'+'\n'.join(self._geometry_string)+'\n'+END_GEOM + '\n')
483 |             if include_cell:
484 |                 fh.write(BEGIN_CELL + '\n' + '\n'.join(self._cell_string) + '\n' + END_CELL + '\n')
485 | 
486 |             for ii, shot in tqdm(self._shots.iterrows(), total=len(self._shots)):
487 |                 fh.write(BEGIN_CHUNK + '\n')
488 |                 fh.write(f'Image filename: {shot.file}\n')
489 |                 fh.write(f'Event: {shot.Event}\n')
490 |                 fh.write(f'Image serial number: {shot.serial - self.serial_offset}\n')
491 |                 keys = set(shot.keys()).difference(
492 |                     {'Event', 'file', 'serial', 'shot_in_subset', 'subset'})
493 |                 for k in keys:
494 |                     fh.write(f'{k} = {shot[k]}\n')
495 |                 if include_peaks:
496 |                     fh.write(BEGIN_PEAKS + '\n')
497 |                     self._peaks.loc[self._peaks.serial == shot.serial, PEAK_COLUMNS].to_csv(
498 |                         fh, sep=' ', index=False, na_rep='-nan')
499 |                     fh.write(END_PEAKS + '\n')
500 | 
501 |                 crystals = self._crystals.loc[self._crystals.serial == shot.serial, :]
502 | 
503 |                 for cid, crs in crystals.iterrows():
504 |                     fh.write(BEGIN_CRYSTAL + '\n')
505 |                     fh.write(f'Cell parameters {crs.a} {crs.b} {crs.c} nm, {crs.al} {crs.be} {crs.ga} deg\n')
506 |                     fh.write(f'astar = {crs.astar_x} {crs.astar_y} {crs.astar_z} nm^-1\n')
507 |                     fh.write(f'bstar = {crs.bstar_x} {crs.bstar_y} {crs.bstar_z} nm^-1\n')
508 |                     fh.write(f'cstar = {crs.cstar_x} {crs.cstar_y} {crs.cstar_z} nm^-1\n')
509 |                     fh.write(f'diffraction_resolution_limit = {crs.diff_limit} nm^-1 or {10 / crs.diff_limit} A\n')
510 |                     fh.write(f'predict_refine/det_shift x = {crs.xshift} y = {crs.yshift} mm\n')
511 |                     keys = set(crs.keys()).difference(
512 |                         {'Event', 'file', 'serial', 'shot_in_subset', 'subset',
513 |                          'a', 'b', 'c', 'al', 'be', 'ga',
514 |                          'astar_x', 'astar_y', 'astar_z',
515 |                          'bstar_x', 'bstar_y', 'bstar_z',
516 |                          'cstar_x', 'cstar_y', 'cstar_z',
517 |                          'diff_limit', 'xshift', 'yshift'})
518 |                     for k in keys:
519 |                         fh.write(f'{k} = {crs[k]}\n')
520 |                     if include_indexed:
521 |                         fh.write(BEGIN_REFLECTIONS + '\n')
522 |                         self._indexed.loc[self._indexed.serial == shot.serial, REFLECTION_COLUMNS].to_csv(
523 |                             fh, sep=' ', index=False, na_rep='-nan')
524 |                         fh.write(END_REFLECTIONS + '\n')
525 |                     fh.write(END_CRYSTAL + '\n')
526 |                 fh.write(END_CHUNK + '\n')
527 | 
528 |     def change_path(self, new_folder=None, old_pattern=None, new_pattern=None):
529 | 
530 |         for df in [self._crystals, self._shots, self._indexed, self._peaks]:
531 |             if (new_folder is not None) and (old_pattern is not None):
532 |                 df.file = new_folder + '/' + \
533 |                           df.file.str.rsplit('/', 1, True).iloc[:, -1].str.replace(old_pattern, new_pattern)
534 |             elif old_pattern is not None:
535 |                 df.file = df.file.str.replace(old_pattern, new_pattern)
536 |             elif new_folder is not None:
537 |                 df.file = new_folder + '/' + df.file.str.rsplit('/', 1, True).iloc[:, -1]
538 | 
539 |     def get_cxi_format(self, what='peaks', shots=None, half_pixel_shift=True):
540 | 
541 |         if shots is None:
542 |             shots = self.shots
543 | 
544 |         if half_pixel_shift:
545 |             off = -.5
546 |         else:
547 |             off = 0
548 | 
549 |         if what == 'peaks':
550 |             ifield = 'Intensity'
551 |             indexed = False
552 |         elif what in ['indexed', 'predict', 'prediction']:
553 |             ifield = 'I'
554 |             indexed = True
555 |         else:
556 |             raise ValueError('what must be peaks or indexed')
557 | 
558 |         # some majig to get CXI arrays
559 |         if indexed:
560 |             self._indexed['pk_id'] = self._indexed.groupby(['file', 'Event']).cumcount()
561 |             pk2 = self._indexed.set_index(['file', 'Event', 'pk_id'])
562 |         else:
563 |             self._peaks['pk_id'] = self._peaks.groupby(['file', 'Event']).cumcount()
564 |             pk2 = self._peaks.set_index(['file', 'Event', 'pk_id'])
565 |         # joining step with shot list is required to make sure that shots without peaks/indexing stay in
566 |         s2 = shots[['file', 'Event']].set_index(['file', 'Event'])
567 |         s2.columns = pd.MultiIndex.from_arrays([[], []], names=('field', 'pk_id'))
568 |         pk2 = s2.join(pk2.unstack(-1), how='left')
569 |         if indexed:
570 |             self._indexed.drop('pk_id', axis=1)
571 |         else:
572 |             self._peaks.drop('pk_id', axis=1)
573 | 
574 |         cxidat = {
575 |             'peakXPosRaw': (pk2['fs/px'] + off).fillna(0).values,
576 |             'peakYPosRaw': (pk2['ss/px'] + off).fillna(0).values,
577 |             'peakTotalIntensity': pk2[ifield].fillna(0).values,
578 |             'nPeaks': pk2['fs/px'].notna().sum(axis=1).values}
579 | 
580 |         if indexed:
581 |             cxidat.update({'peakSNR': (pk2[ifield]/pk2['Sigma(I)']).fillna(0).values,
582 |                            'indexH': pk2['h'].fillna(0).values.astype(np.int),
583 |                            'indexK': pk2['k'].fillna(0).values.astype(np.int),
584 |                            'indexL': pk2['l'].fillna(0).values.astype(np.int)})
585 | 
586 |         return cxidat
587 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | import recommonmark
16 | from recommonmark.transform import AutoStructify
17 | sys.path.insert(0, os.path.abspath('.'))
18 | sys.path.insert(0, os.path.abspath('..'))
19 | 
20 | 
21 | # -- Project information -----------------------------------------------------
22 | 
23 | project = 'diffractem'
24 | copyright = '2020, Robert Bücker'
25 | author = 'Robert Bücker'
26 | 
27 | 
28 | # -- General configuration ---------------------------------------------------
29 | 
30 | master_doc = 'index'
31 | 
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = ['sphinx.ext.autodoc', 
36 |               'sphinx.ext.napoleon', 'sphinx.ext.viewcode',
37 |               'sphinx.ext.autosectionlabel',
38 |             #   'jupyter_sphinx.execute', 
39 |             #   'nbsphinx', 
40 |             'm2r2',
41 |               'sphinx.ext.mathjax',
42 |               'nbsphinx_link', 'sphinx_autodoc_typehints'
43 | ]
44 | 
45 | # Add any paths that contain templates here, relative to this directory.
46 | templates_path = ['_templates']
47 | source_suffix = ['.rst', '.md']
48 | # List of patterns, relative to source directory, that match files and
49 | # directories to ignore when looking for source files.
50 | # This pattern also affects html_static_path and html_extra_path.
51 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
52 | autosectionlabel_maxdepth=2
53 | autosectionlabel_prefix_document=True
54 | autodoc_mock_imports = ['PyQt5']
55 | 
56 | nbsphinx_execute = 'never'
57 | 
58 | # -- Options for HTML output -------------------------------------------------
59 | 
60 | # The theme to use for HTML and HTML Help pages.  See the documentation for
61 | # a list of builtin themes.
62 | #
63 | html_theme = 'sphinx_rtd_theme'
64 | 
65 | # Add any paths that contain custom static files (such as style sheets) here,
66 | # relative to this directory. They are copied after the builtin static files,
67 | # so a file named "default.css" will overwrite the builtin "default.css".
68 | html_static_path = ['_static']
69 | 


--------------------------------------------------------------------------------
/docs/crystfel.rst:
--------------------------------------------------------------------------------
 1 | CrystFEL integration
 2 | ====================
 3 | 
 4 | Stream parsing
 5 | --------------
 6 | 
 7 | Peak finders
 8 | ------------
 9 | 
10 | Calling indexamajig
11 | -------------------


--------------------------------------------------------------------------------
/docs/dataset.rst:
--------------------------------------------------------------------------------
 1 | The Dataset object
 2 | ==================
 3 | 
 4 | A diffractem data set is represented by a :class:`Dataset <diffractem.dataset.Dataset>` object, which manages all diffraction and meta data
 5 | from an electron diffraction set, and provides a plethora of features to work with them. This comprises:
 6 | 
 7 | * Automatic management of the HDF5 files containing the diffraction and meta data (see also 
 8 |   :ref:`file_format`).
 9 | * A framework to apply massively parallel computations on larger-than-memory diffraction data stacks
10 |   using `dask <https://dask.org>`_, on a local machine or even remote clusters.
11 | * Handling of meta data for each single recorded diffraction pattern using an embedded `pandas.DataFrame` 
12 |   as a "shot list".
13 | * Methods for quick and transparent creation of sub-sets through complex queries on metadata.
14 | 
15 | To learn how to handle `Dataset` objects, we'd recommend the `tutorials <https://github.com/robertbuecker/serialed-examples>`_.
16 | 
17 | Shot list
18 | ---------
19 | 
20 | Data stacks
21 | -----------
22 | 
23 | Chunking
24 | ^^^^^^^^
25 | 
26 | Lazy evaluation and persisting
27 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
28 | 


--------------------------------------------------------------------------------
/docs/diffractem.adxv.rst:
--------------------------------------------------------------------------------
1 | diffractem.adxv module
2 | ======================
3 | 
4 | .. automodule:: diffractem.adxv
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/diffractem.compute.rst:
--------------------------------------------------------------------------------
1 | diffractem.compute module
2 | =========================
3 | 
4 | .. automodule:: diffractem.compute
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/diffractem.dataset.rst:
--------------------------------------------------------------------------------
1 | diffractem.dataset module
2 | =========================
3 | 
4 | .. automodule:: diffractem.dataset
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/diffractem.io.rst:
--------------------------------------------------------------------------------
1 | diffractem.io module
2 | ====================
3 | 
4 | .. automodule:: diffractem.io
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/diffractem.map_image.rst:
--------------------------------------------------------------------------------
1 | diffractem.map\_image module
2 | ============================
3 | 
4 | .. automodule:: diffractem.map_image
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/diffractem.nexus.rst:
--------------------------------------------------------------------------------
1 | diffractem.nexus module
2 | =======================
3 | 
4 | .. automodule:: diffractem.nexus
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/diffractem.pre_proc_opts.rst:
--------------------------------------------------------------------------------
1 | diffractem.pre\_proc\_opts module
2 | =================================
3 | 
4 | .. automodule:: diffractem.pre_proc_opts
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/diffractem.proc2d.rst:
--------------------------------------------------------------------------------
1 | diffractem.proc2d module
2 | ========================
3 | 
4 | .. automodule:: diffractem.proc2d
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/diffractem.proc_peaks.rst:
--------------------------------------------------------------------------------
1 | diffractem.proc\_peaks module
2 | =============================
3 | 
4 | .. automodule:: diffractem.proc_peaks
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/diffractem.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. mdinclude:: ../README.md
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | diffractem's functionality is contained in various submodules, click below for their API documentation.
 8 | The ones you'll likely deal with are :mod:`diffractem.dataset`, :mod:`diffractem.proc2d`, and :mod:`diffractem.tools`.
 9 | 
10 | .. toctree::
11 |    :maxdepth: 4
12 | 
13 |    diffractem.adxv
14 |    diffractem.compute
15 |    diffractem.dataset
16 |    diffractem.io
17 |    diffractem.map_image
18 |    diffractem.nexus
19 |    diffractem.pre_proc_opts
20 |    diffractem.proc2d
21 |    diffractem.proc_peaks
22 |    diffractem.stream_parser
23 |    diffractem.tools
24 | 
25 | Module contents
26 | ---------------
27 | 
28 | .. automodule:: diffractem
29 |    :members:
30 |    :undoc-members:
31 |    :show-inheritance:
32 | 


--------------------------------------------------------------------------------
/docs/diffractem.stream_parser.rst:
--------------------------------------------------------------------------------
1 | diffractem.stream\_parser module
2 | ================================
3 | 
4 | .. automodule:: diffractem.stream_parser
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/diffractem.tools.rst:
--------------------------------------------------------------------------------
1 | diffractem.tools module
2 | =======================
3 | 
4 | .. automodule:: diffractem.tools
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/edview.rst:
--------------------------------------------------------------------------------
1 | EDview
2 | ======
3 | 
4 | Viewer for diffractem-format files (see :doc:`file_format`) and/or CrystFEL stream files.


--------------------------------------------------------------------------------
/docs/file_format.rst:
--------------------------------------------------------------------------------
1 | Diffractem NeXus files
2 | ======================


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. diffractem documentation master file, created by
 2 |    sphinx-quickstart on Fri Apr 24 17:13:42 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to diffractem's documentation!
 7 | ======================================
 8 | 
 9 | diffractem is a package for processing Serial Electron Diffraction data, following the protocols as outlined in `Bücker et al., Front. Mol. Biosci., 2021 <https://doi.org/10.3389/fmolb.2021.624264>`_.
10 | See this paper for a general introduction and documentation
11 | diffractem is mostly intended to be used from within Jupyter notebooks, such as those available from `here <https://github.com/robertbuecker/serialed-examples>`_.
12 | 
13 | Please see :ref:`diffractem:Installation` for how to install diffractem and CrystFEL such that you can get started.
14 | 
15 | Of particular interest might be the documentation of :class:`PreProcOpts <diffractem.pre_proc_opts.PreProcOpts>`, which explains the various options you can define for preprocessing.
16 | 
17 | For the full  API documentation, see :ref:`here <diffractem:Submodules>`
18 | 
19 | Table of contents
20 | -----------------
21 | 
22 | .. toctree::
23 |    :maxdepth: 4
24 | 
25 |    Overview<diffractem>
26 |    dataset
27 |    file_format
28 |    edview
29 |    CrystFEL integration<crystfel>
30 | 
31 | 
32 | Indices and tables
33 | ==================
34 | 
35 | * :ref:`genindex`
36 | * :ref:`modindex`
37 | * :ref:`search`
38 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 | 
4 | .. mdinclude:: ../README.md
5 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/map_image.rst:
--------------------------------------------------------------------------------
1 | Crystal-map images
2 | ==================
3 | 
4 | * Messy relation to :ref:`dataset`


--------------------------------------------------------------------------------
/docs/modules.rst:
--------------------------------------------------------------------------------
1 | diffractem
2 | ==========
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    diffractem
8 | 


--------------------------------------------------------------------------------
/docs/pre_processing.rst:
--------------------------------------------------------------------------------
 1 | Pre-processing with diffractem
 2 | ==============================
 3 | 
 4 | There are some things to make your life easier.
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 |    :caption: Preprocessing
 9 |    
10 |    Pre-processing options<diffractem.pre_proc_opts>
11 |    Pre-processing macros (old)<diffractem.pre_process>


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | h5py
 2 | pandas
 3 | tables
 4 | hdf5plugin
 5 | dask
 6 | distributed
 7 | tifffile
 8 | scipy
 9 | astropy
10 | matplotlib
11 | numba
12 | pyqtgraph
13 | pyyaml
14 | scikit-learn
15 | scikit-image
16 | opencv-python-headless
17 | PyQt5
18 | ipykernel
19 | nbsphinx
20 | jupyter-sphinx
21 | nbsphinx-link
22 | sphinx-autodoc-typehints
23 | m2r2


--------------------------------------------------------------------------------
/ideas.md:
--------------------------------------------------------------------------------
 1 | # Ideas for diffractem features...
 2 | 
 3 | ## Plotting
 4 | 
 5 | * high-level plot functions (in general) from older notebooks
 6 | * distribute in clever way between overview and dataset
 7 | 
 8 | ## Maps
 9 | 
10 | * map plot with unique axis projected length and orientation -> overview of e.g. preferred orientation.
11 | Could be represented as complex image. Fill areas e.g. using watershed in intensity space. 
12 | * lattice orientation clustering in maps 
13 | 
14 | ## Viewer
15 | 
16 | * jump to arbitrary ID/serial
17 | * real-/rec- space calibration, with diffraction rings and scale bar
18 | * rudimentary keyboard operation
19 | * line profiles,... check out glueviz?
20 | * direct transfer to Fiji (check how scipion does it)
21 | 
22 | ## Pre-processing
23 | 
24 | * function/script to automatically run full pre-proc pipeline, including some heuristics
25 | * connect to running experiment, e.g. using socket interface or ZeroMQ
26 | 
27 | ## Dataset
28 | 
29 | * direct CXI format deposition
30 | * export to other formats (TIF, MRC, CBF, cctbx Pickle,...)
31 | 
32 | ## Stream parser


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | astropy
 2 | Cython
 3 | dask
 4 | distributed
 5 | h5py
 6 | hdf5plugin
 7 | ipython
 8 | ipywidgets
 9 | matplotlib
10 | numba
11 | numpy
12 | pandas
13 | PyQt5
14 | pyqtgraph
15 | PyYAML
16 | recommonmark
17 | scikit_image
18 | scikit_learn
19 | scipy
20 | setuptools
21 | skimage
22 | tifffile
23 | tqdm
24 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | # This file is part of OnDA.
 2 | #
 3 | # OnDA is free software: you can redistribute it and/or modify it under the terms of
 4 | # the GNU General Public License as published by the Free Software Foundation, either
 5 | # version 3 of the License, or (at your option) any later version.
 6 | #
 7 | # OnDA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 8 | # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 9 | # PURPOSE.  See the GNU General Public License for more details.
10 | #
11 | # You should have received a copy of the GNU General Public License along with OnDA.
12 | # If not, see <http://www.gnu.org/licenses/>.
13 | #
14 | # Copyright 2014-2019 Deutsches Elektronen-Synchrotron DESY,
15 | # a research centre of the Helmholtz Association.
16 | [build-system]
17 | requires = ["setuptools", "wheel", "Cython", "numpy"]
18 | 
19 | [build_ext]
20 | inplace=1


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | from setuptools import setup, Extension
  2 | import os
  3 | import platform
  4 | 
  5 | # DIFFRACTEM - tools for processing Serial Electron Diffraction Data
  6 | # Copyright (C) 2020  Robert Bücker
  7 | 
  8 | # This library is free software; you can redistribute it and/or
  9 | # modify it under the terms of the GNU Lesser General Public
 10 | # License as published by the Free Software Foundation; either
 11 | # version 2.1 of the License, or (at your option) any later version.
 12 | 
 13 | # This library is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 16 | # Lesser General Public License for more details.
 17 | 
 18 | # You should have received a copy of the GNU Lesser General Public
 19 | # License along with this library; if not, write to the Free Software
 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 21 | 
 22 | # This library uses peakfinder8 for peak finding, written by Anton Barty,
 23 | # Valerio Mariani, and Oleksandr Yefanov;
 24 | # Copyright 2014-2019 Deutsches Elektronen-Synchrotron DESY
 25 | 
 26 | 
 27 | ### ---
 28 | # peakfinder8 Cython version adapted from OnDA: https://github.com/ondateam/onda
 29 | 
 30 | try:
 31 |     import numpy
 32 | except (ModuleNotFoundError, NameError):
 33 |     print('NumPy is not installed. Please install it before diffractem via:\n'
 34 |           'pip install numpy')
 35 | 
 36 | 
 37 | DIFFRACTEM_USE_CYTHON = os.getenv("DIFFRACTEM_USE_CYTHON")
 38 | 
 39 | ext = ".pyx" if DIFFRACTEM_USE_CYTHON else ".c"  # pylint: disable=invalid-name
 40 | 
 41 | 
 42 | if platform.uname().system == 'Windows':
 43 |     libraries = []
 44 |     pass
 45 | else:
 46 |     libraries = ["stdc++"]
 47 |     pass
 48 | 
 49 | peakfinder8_ext = Extension(  # pylint: disable=invalid-name
 50 |     name="diffractem.peakfinder8_extension",
 51 |     include_dirs=[numpy.get_include()],
 52 |     libraries=libraries,
 53 |     sources=[
 54 |         "src/peakfinder8_extension/peakfinder8.cpp",
 55 |         "src/peakfinder8_extension/peakfinder8_extension.pyx",
 56 |     ]
 57 |     if DIFFRACTEM_USE_CYTHON
 58 |     else [
 59 |         "src/peakfinder8_extension/peakfinder8_extension.cpp",
 60 |         "src/peakfinder8_extension/peakfinder8.cpp",
 61 |     ],
 62 |     language="c++",
 63 | )
 64 | 
 65 | if DIFFRACTEM_USE_CYTHON:
 66 |     from Cython.Build import cythonize
 67 |     print('USING CYTHON')
 68 |     extensions = cythonize(peakfinder8_ext)  # pylint: disable=invalid-name
 69 | else:
 70 |     extensions = [peakfinder8_ext]  # pylint: disable=invalid-name
 71 |     
 72 | ### ---
 73 | 
 74 | setup(
 75 |     name='diffractem',
 76 |     version='0.4.1',
 77 |     packages=['diffractem'],
 78 |     url='https://github.com/robertbuecker/diffractem',
 79 |     license='',
 80 |     scripts=['bin/nxs2tif.py', 'bin/edview.py'],
 81 |     # scripts=['bin/nxs2tif.py', 'bin/edview.py', 'bin/quick_proc.py'],
 82 |     entry_points={
 83 |         'console_scripts': [
 84 |             'quick_proc = diffractem.quick_proc:main',
 85 |             'stream2sol = diffractem.stream2sol:main'
 86 |         ],
 87 |     },
 88 |     author='Robert Buecker',
 89 |     author_email='robert.buecker@cssb-hamburg.de',
 90 |     description='Some tools for working with serial electron microscopy data.',
 91 |     install_requires=['h5py', 'numpy', 'pandas', 'hdf5plugin',
 92 |                       'dask[complete]', 'tifffile', 'scipy', 'astropy', 
 93 |                       'matplotlib', 'numba', 'pyqtgraph', 'pyyaml', 'scikit-learn', 
 94 |                       'scikit-image', 'PyQt5'],
 95 |     classifiers=[
 96 |         "Programming Language :: Python :: 3",
 97 |         "License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)",
 98 |     ],
 99 |     ext_modules = extensions,
100 |     include_package_data = True
101 | )
102 | 


--------------------------------------------------------------------------------
/src/peakfinder8_extension/peakfinder8.hh:
--------------------------------------------------------------------------------
 1 | //    This file is originally part of OnDA, available at
 2 | //    <https://www.ondamonitor.com>, which is released under the terms
 3 | //    of the GNU General Public License. It has been adapted for
 4 | //    use in diffractem.
 5 | //
 6 | //	  In agreement with the authors, you can redistribute it and/or modify
 7 | //    this file under the terms of the GNU Lesser General Public License 
 8 | //    as published by the Free Software Foundation, either version 3 
 9 | //    of the License, or (at your option) any later version.
10 | //
11 | //    It is distributed in the hope that it will be useful,
12 | //    but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | //    GNU Lesser General Public License for more details.
15 | //
16 | //    You should have received a copy of the GNU Lesser General Public License
17 | //    along with diffractem.  If not, see <http://www.gnu.org/licenses/>.
18 | //
19 | //    Copyright 2014-2019 Deutsches Elektronen-Synchrotron DESY,
20 | //    a research centre of the Helmholtz Association.
21 | 
22 | #ifndef PEAKFINDER8_H
23 | #define PEAKFINDER8_H
24 | 
25 | typedef struct {
26 | public:
27 | 	long	    nPeaks;
28 | 	long	    nHot;
29 | 	float		peakResolution;			// Radius of 80% of peaks
30 | 	float		peakResolutionA;		// Radius of 80% of peaks
31 | 	float		peakDensity;			// Density of peaks within this 80% figure
32 | 	float		peakNpix;				// Number of pixels in peaks
33 | 	float		peakTotal;				// Total integrated intensity in peaks
34 | 	int			memoryAllocated;
35 | 	long		nPeaks_max;
36 | 
37 | 	float		*peak_maxintensity;		// Maximum intensity in peak
38 | 	float		*peak_totalintensity;	// Integrated intensity in peak
39 | 	float		*peak_sigma;			// Signal-to-noise ratio of peak
40 | 	float		*peak_snr;				// Signal-to-noise ratio of peak
41 | 	float		*peak_npix;				// Number of pixels in peak
42 | 	float		*peak_com_x;			// peak center of mass x (in raw layout)
43 | 	float		*peak_com_y;			// peak center of mass y (in raw layout)
44 | 	long		*peak_com_index;		// closest pixel corresponding to peak
45 | 	float		*peak_com_x_assembled;	// peak center of mass x (in assembled layout)
46 | 	float		*peak_com_y_assembled;	// peak center of mass y (in assembled layout)
47 | 	float		*peak_com_r_assembled;	// peak center of mass r (in assembled layout)
48 | 	float		*peak_com_q;			// Scattering vector of this peak
49 | 	float		*peak_com_res;			// REsolution of this peak
50 | } tPeakList;
51 | 
52 | void allocatePeakList(tPeakList *peak, long NpeaksMax);
53 | void freePeakList(tPeakList peak);
54 | 
55 | int peakfinder8(tPeakList *peaklist, float *data, char *mask, float *pix_r,
56 |                 long asic_nx, long asic_ny, long nasics_x, long nasics_y,
57 |                 float ADCthresh, float hitfinderMinSNR,
58 | 				long hitfinderMinPixCount, long hitfinderMaxPixCount,
59 | 				long hitfinderLocalBGRadius, char* outliersMask);
60 | 
61 | #endif // PEAKFINDER8_H
62 | 


--------------------------------------------------------------------------------
/src/peakfinder8_extension/peakfinder8_extension.pyx:
--------------------------------------------------------------------------------
  1 | # This file is originally part of OnDA, available at
  2 | # <https://www.ondamonitor.com>, which is released under the terms
  3 | # of the GNU General Public License. It has been adapted for
  4 | # use in diffractem.
  5 | # 
  6 | # In agreement with the authors, you can redistribute it and/or modify
  7 | # this file under the terms of the GNU Lesser General Public License 
  8 | # as published by the Free Software Foundation, either version 3 
  9 | # of the License, or (at your option) any later version.
 10 | # 
 11 | # It is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU Lesser General Public License for more details.
 15 | # 
 16 | # You should have received a copy of the GNU Lesser General Public License
 17 | # along with diffractem.  If not, see <http://www.gnu.org/licenses/>.
 18 | # 
 19 | # Copyright 2014-2019 Deutsches Elektronen-Synchrotron DESY,
 20 | # a research centre of the Helmholtz Association.
 21 | 
 22 | from libcpp.vector cimport vector
 23 | from libc.stdlib cimport malloc, free
 24 | from libc.stdint cimport int8_t
 25 | 
 26 | import numpy
 27 | 
 28 | cdef extern from "peakfinder8.hh":
 29 | 
 30 |     ctypedef struct tPeakList:
 31 |         long	    nPeaks
 32 |         long	    nHot
 33 |         float		peakResolution
 34 |         float		peakResolutionA
 35 |         float		peakDensity
 36 |         float		peakNpix
 37 |         float		peakTotal
 38 |         int			memoryAllocated
 39 |         long		nPeaks_max
 40 | 
 41 |         float       *peak_maxintensity
 42 |         float       *peak_totalintensity
 43 |         float       *peak_sigma
 44 |         float       *peak_snr
 45 |         float       *peak_npix
 46 |         float       *peak_com_x
 47 |         float       *peak_com_y
 48 |         long        *peak_com_index
 49 |         float       *peak_com_x_assembled
 50 |         float       *peak_com_y_assembled
 51 |         float       *peak_com_r_assembled
 52 |         float       *peak_com_q
 53 |         float       *peak_com_res
 54 | 
 55 |     void allocatePeakList(tPeakList* peak_list, long max_num_peaks)
 56 |     void freePeakList(tPeakList peak_list)
 57 | 
 58 | cdef extern from "peakfinder8.hh":
 59 | 
 60 |     int peakfinder8(tPeakList *peaklist, float *data, char *mask, float *pix_r,
 61 |                     long asic_nx, long asic_ny, long nasics_x, long nasics_y,
 62 |                     float ADCthresh, float hitfinderMinSNR,
 63 |                     long hitfinderMinPixCount, long hitfinderMaxPixCount,
 64 |                     long hitfinderLocalBGRadius, char *outliersMask)
 65 | 
 66 | 
 67 | def peakfinder_8(int max_num_peaks, float[:,::1] data, char[:,::1] mask,
 68 |                  float[:,::1] pix_r, long asic_nx, long asic_ny, long nasics_x,
 69 |                  long nasics_y, float adc_thresh, float hitfinder_min_snr,
 70 |                  long hitfinder_min_pix_count, long hitfinder_max_pix_count,
 71 |                  long hitfinder_local_bg_radius):
 72 | 
 73 |     cdef tPeakList peak_list
 74 |     allocatePeakList(&peak_list, max_num_peaks)
 75 | 
 76 |     peakfinder8(&peak_list, &data[0, 0], &mask[0,0], &pix_r[0, 0], asic_nx, asic_ny,
 77 |                 nasics_x, nasics_y, adc_thresh, hitfinder_min_snr,
 78 |                 hitfinder_min_pix_count, hitfinder_max_pix_count,
 79 |                 hitfinder_local_bg_radius, NULL)
 80 | 
 81 |     cdef int i
 82 |     cdef float peak_x, peak_y, peak_value
 83 |     cdef vector[double] peak_list_x
 84 |     cdef vector[double] peak_list_y
 85 |     cdef vector[long] peak_list_index
 86 |     cdef vector[double] peak_list_value
 87 |     cdef vector[double] peak_list_npix
 88 |     cdef vector[double] peak_list_maxi
 89 |     cdef vector[double] peak_list_sigma
 90 |     cdef vector[double] peak_list_snr
 91 | 
 92 |     num_peaks = peak_list.nPeaks
 93 | 
 94 |     if num_peaks > max_num_peaks:
 95 |         num_peaks = max_num_peaks
 96 | 
 97 |     for i in range(0, num_peaks):
 98 | 
 99 |         peak_x = peak_list.peak_com_x[i]
100 |         peak_y = peak_list.peak_com_y[i]
101 |         peak_index = peak_list.peak_com_index[i]
102 |         peak_value = peak_list.peak_totalintensity[i]
103 |         peak_npix = peak_list.peak_npix[i]
104 |         peak_maxi = peak_list.peak_maxintensity[i]
105 |         peak_sigma = peak_list.peak_sigma[i]
106 |         peak_snr = peak_list.peak_snr[i]
107 | 
108 |         peak_list_x.push_back(peak_x)
109 |         peak_list_y.push_back(peak_y)
110 |         peak_list_index.push_back(peak_index)
111 |         peak_list_value.push_back(peak_value)
112 |         peak_list_npix.push_back(peak_npix)
113 |         peak_list_maxi.push_back(peak_maxi)
114 |         peak_list_sigma.push_back(peak_sigma)
115 |         peak_list_snr.push_back(peak_snr)
116 | 
117 |     freePeakList(peak_list)
118 | 
119 |     return (peak_list_x, peak_list_y, peak_list_value, peak_list_index,
120 |             peak_list_npix, peak_list_maxi, peak_list_sigma, peak_list_snr)
121 | 
122 | 
123 | def peakfinder_8_with_pixel_information(int max_num_peaks, float[:,::1] data,
124 |                                         char[:,::1] mask, float[:,::1] pix_r,
125 |                                         long asic_nx, long asic_ny, long nasics_x,
126 |                                         long nasics_y, float adc_thresh,
127 |                                         float hitfinder_min_snr,
128 |                                         long hitfinder_min_pix_count,
129 |                                         long hitfinder_max_pix_count,
130 |                                         long hitfinder_local_bg_radius,
131 |                                         char[:,::1] outlier_mask):
132 | 
133 |     cdef tPeakList peak_list
134 |     allocatePeakList(&peak_list, max_num_peaks)
135 | 
136 |     peakfinder8(&peak_list, &data[0, 0], &mask[0, 0], &pix_r[0, 0], asic_nx, asic_ny,
137 |                 nasics_x, nasics_y, adc_thresh, hitfinder_min_snr,
138 |                 hitfinder_min_pix_count, hitfinder_max_pix_count,
139 |                 hitfinder_local_bg_radius, &outlier_mask[0, 0])
140 | 
141 |     cdef int i
142 |     cdef float peak_x, peak_y, peak_value
143 |     cdef vector[double] peak_list_x
144 |     cdef vector[double] peak_list_y
145 |     cdef vector[long] peak_list_index
146 |     cdef vector[double] peak_list_value
147 |     cdef vector[double] peak_list_npix
148 |     cdef vector[double] peak_list_maxi
149 |     cdef vector[double] peak_list_sigma
150 |     cdef vector[double] peak_list_snr
151 | 
152 |     num_peaks = peak_list.nPeaks
153 | 
154 |     if num_peaks > max_num_peaks:
155 |         num_peaks = max_num_peaks
156 | 
157 |     for i in range(0, num_peaks):
158 | 
159 |         peak_x = peak_list.peak_com_x[i]
160 |         peak_y = peak_list.peak_com_y[i]
161 |         peak_index = peak_list.peak_com_index[i]
162 |         peak_value = peak_list.peak_totalintensity[i]
163 |         peak_npix = peak_list.peak_npix[i]
164 |         peak_maxi = peak_list.peak_maxintensity[i]
165 |         peak_sigma = peak_list.peak_sigma[i]
166 |         peak_snr = peak_list.peak_snr[i]
167 | 
168 |         peak_list_x.push_back(peak_x)
169 |         peak_list_y.push_back(peak_y)
170 |         peak_list_index.push_back(peak_index)
171 |         peak_list_value.push_back(peak_value)
172 |         peak_list_npix.push_back(peak_npix)
173 |         peak_list_maxi.push_back(peak_maxi)
174 |         peak_list_sigma.push_back(peak_sigma)
175 |         peak_list_snr.push_back(peak_snr)
176 | 
177 |     freePeakList(peak_list)
178 | 
179 |     return (peak_list_x, peak_list_y, peak_list_value, peak_list_index,
180 |             peak_list_npix, peak_list_maxi, peak_list_sigma, peak_list_snr)
181 | 
182 | 
183 | 


--------------------------------------------------------------------------------
/version.txt:
--------------------------------------------------------------------------------
1 | v0.4.1
2 | 


--------------------------------------------------------------------------------