├── .gitignore
├── LICENSE
├── README.md
├── setup.py
└── single_pulse_ml
├── __init__.py
├── classify.py
├── data
└── data.txt
├── dataproc.py
├── frbkeras.py
├── model
└── model.txt
├── plot_tools.py
├── plots
└── Freq_train.png
├── reader.py
├── run_frb_simulation.py
├── run_single_pulse_DL.py
├── sim_parameters.py
├── simulate_frb.py
├── simulate_multibeam.py
├── telescope.py
├── tests
├── __init__.py
├── test_frbkeras.py
├── test_reader.py
├── test_run_frb_simulation.py
└── test_simulate_frb.py
└── tools.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.npy
3 | *.hdf5
4 | *.py~
5 |
6 | ./dist/
7 |
8 | ./single_pulse_ml/*pkl
9 | ./single_pulse_ml/*npy
10 | ./single_pulse_ml/*hdf5
11 | ./single_pulse_ml/plots/*png
12 | ./single_pulse_ml/model
13 | ./single_pulse_ml/run_frb_simulation.py
14 | ./single_pulse_ml/run_single_pulse_DL.py
15 |
16 | /*.egg-info
17 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### single_pulse_ml
2 |
3 | Build, train, and apply deep neural networks to single pulse candidates.
4 |
5 | run_frb_simulation.py constructs a training set that includes simulated FRBs
6 |
7 | run_single_pulse_DL.py allows for training of deep neural networks for several
8 | input data products, including:
9 | -- dedispersed dynamic spectra (2D CNN)
10 | -- DM/time intensity array (2D CNN)
11 | -- frequency-collapsed pulse profile (1D CNN)
12 | -- Multi-beam S/N information (1D feed forward DNN)
13 |
14 | run_single_pulse_DL.py can also be used when a trained model already exists and candidates are to be classified
15 |
16 | This code has been used on CHIME Pathfinder incoherent data as well as commissioning data on Apertif.
17 |
18 | ### Requirements
19 |
20 | - You will need the following:
21 | - numpy
22 | - scipy
23 | - h5py
24 | - matplotlib
25 | - tensorflow
26 | - keras
27 |
28 | ### Tests
29 |
30 | In the single_pulse_ml/tests/ directory,
31 | "test_run_frb_simulation.py" can be run to generate 100 simulated FRBs
32 | to ensure the simulation backend works.
33 |
34 | "test_frbkeras.py" will generate 1000 gaussian-noise
35 | dynamic spectrum candidates of dimension 32x64, then
36 | build, train, and test a CNN using the tools in frbkeras.
37 | This allows a test of the keras/tensorflow code.
38 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | def readme():
4 | with open('README.rst') as f:
5 | return f.read()
6 |
7 | setup(name='single_pulse_ml',
8 | version='0.1',
9 | description='Deep learning implementation of single-pulse search',
10 | url='http://github.com/liamconnor/single_pulse_ml',
11 | author='Liam Connor',
12 | author_email='liam.dean.connor@gmail.com',
13 | license='GPL v2.0',
14 | packages=['single_pulse_ml'],
15 | install_requires=[
16 | 'numpy',
17 | 'scipy',
18 | 'h5py',
19 | 'matplotlib',
20 | 'tensorflow-gpu',
21 | 'keras',
22 | ],
23 | test_suite='nose.collector',
24 | tests_require=['nose'],
25 | zip_safe=False)
26 |
--------------------------------------------------------------------------------
/single_pulse_ml/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liamconnor/single_pulse_ml/88b6b76ebf3d3939214d9785d4e1c5076f653c38/single_pulse_ml/__init__.py
--------------------------------------------------------------------------------
/single_pulse_ml/classify.py:
--------------------------------------------------------------------------------
1 | # Liam Connor 25 July 2018
2 | # Script to classify single-pulses
3 | # using tensorflow/keras model. Output probabilities
4 | # can be saved and plotted
5 |
6 | import optparse
7 | import numpy as np
8 | import h5py
9 |
10 | import warnings
11 | warnings.simplefilter(action='ignore', category=FutureWarning)
12 |
13 | import matplotlib as mpl
14 | mpl.use('pdf')
15 |
16 | import frbkeras
17 | import reader
18 | import plot_tools
19 |
20 | def classify(data, model, save_ranked=False,
21 | plot_ranked=False, prob_threshold=0.5,
22 | fnout='ranked'):
23 |
24 | model = frbkeras.load_model(model)
25 |
26 | mshape = model.input.shape
27 | dshape = data.shape
28 |
29 | # normalize data
30 | data = data.reshape(len(data), -1)
31 | data -= np.median(data, axis=-1)[:, None]
32 | data /= np.std(data, axis=-1)[:, None]
33 |
34 | # zero out nans
35 | data[data!=data] = 0.0
36 | data = data.reshape(dshape)
37 |
38 | if dshape[-1]!=1:
39 | data = data[..., None]
40 |
41 | if len(mshape)==3:
42 | data = data.mean(1)
43 | dshape = data.shape
44 |
45 | if mshape[1]dshape[1]:
51 | print("Model expects:", mshape)
52 | print("Data has:", dshape)
53 |
54 | return
55 |
56 | if mshape[2]dshape[2]:
62 | print("Model expects:", mshape)
63 | print("Data has:", dshape)
64 |
65 | return
66 |
67 | y_pred_prob = model.predict(data)
68 | y_pred_prob = y_pred_prob[:,1]
69 |
70 | ind_frb = np.where(y_pred_prob>prob_threshold)[0]
71 |
72 | print("\n%d out of %d events with probability > %.2f:\n %s" %
73 | (len(ind_frb), len(y_pred_prob),
74 | prob_threshold, ind_frb))
75 |
76 | low_to_high_ind = np.argsort(y_pred_prob)
77 |
78 | if save_ranked is True:
79 | print("Need to fix the file naming")
80 | fnout_ranked = fn_data.rstrip('.hdf5') + \
81 | 'freq_time_candidates.hdf5'
82 |
83 | g = h5py.File(fnout_ranked, 'w')
84 | g.create_dataset('data_frb_candidate', data=data[ind_frb])
85 | g.create_dataset('frb_index', data=ind_frb)
86 | g.create_dataset('probability', data=y_pred_prob)
87 | g.close()
88 | print("\nSaved them and all probabilities to: \n%s" % fnout_ranked)
89 |
90 | if plot_ranked is True:
91 | if save_ranked is False:
92 | argtup = (data[ind_frb], ind_frb, y_pred_prob)
93 |
94 | plot_tools.plot_multiple_ranked(argtup, nside=10, \
95 | fnfigout=fnout, ascending=False)
96 | else:
97 | plot_tools.plot_multiple_ranked(fnout_ranked, nside=10, \
98 | fnfigout=fnout, ascending=False)
99 |
100 |
101 | if __name__=="__main__":
102 | parser = optparse.OptionParser(prog="classify.py", \
103 | version="", \
104 | usage="%prog FN_DATA FN_MODEL [OPTIONS]", \
105 | description="Apply DNN model to FRB candidates")
106 |
107 | parser.add_option('--fn_model_dm', dest='fn_model_dm', type='str', \
108 | help="Filename of dm_time model. Default None", \
109 | default=None)
110 |
111 | parser.add_option('--fn_model_time', dest='fn_model_time', type='str', \
112 | help="Filename of 1d time model. Default None", \
113 | default=None)
114 |
115 | parser.add_option('--fn_model_mb', dest='fn_model_mb', type='str', \
116 | help="Filename of multibeam model. Default None", \
117 | default=None)
118 |
119 | parser.add_option('--pthresh', dest='prob_threshold', type='float', \
120 | help="probability treshold", default=0.5)
121 |
122 | parser.add_option('--save_ranked', dest='save_ranked',
123 | action='store_true', \
124 | help="save FRB events + probabilities", \
125 | default=False)
126 |
127 | parser.add_option('--plot_ranked', dest='plot_ranked', \
128 | action='store_true',\
129 | help="plot triggers", default=False)
130 |
131 | parser.add_option('--twindow', dest='twindow', type='int', \
132 | help="time width, default 64", default=64)
133 |
134 | parser.add_option('--fnout', dest='fnout', type='str', \
135 | help="beginning of figure names", \
136 | default='ranked_trig')
137 |
138 | options, args = parser.parse_args()
139 |
140 | assert len(args)==2, "Arguments are FN_DATA FN_MODEL [OPTIONS]"
141 |
142 | fn_data = args[0]
143 | fn_model_freq = args[1]
144 |
145 | print("Using datafile %s" % fn_data)
146 | print("Using keras model in %s" % fn_model_freq)
147 |
148 | data_freq, y, data_dm, data_mb = reader.read_hdf5(fn_data)
149 |
150 | NFREQ = data_freq.shape[1]
151 | NTIME = data_freq.shape[2]
152 | WIDTH = options.twindow
153 |
154 | # low time index, high time index
155 | tl, th = NTIME//2-WIDTH//2, NTIME//2+WIDTH//2
156 |
157 | if data_freq.shape[-1] > (th-tl):
158 | data_freq = data_freq[..., tl:th]
159 |
160 | fn_fig_out = options.fnout + '_freq_time'
161 | print("\nCLASSIFYING FREQ/TIME DATA\n")
162 | classify(data_freq, fn_model_freq,
163 | save_ranked=options.save_ranked,
164 | plot_ranked=options.plot_ranked,
165 | prob_threshold=options.prob_threshold,
166 | fnout=fn_fig_out)
167 |
168 | if options.fn_model_dm is not None:
169 | if len(data_dm)>0:
170 | print("\nCLASSIFYING DM/TIME DATA\n)")
171 | print(data_dm.shape)
172 | fn_fig_out = options.fnout + '_dm_time'
173 | classify(data_dm, options.fn_model_dm,
174 | save_ranked=options.save_ranked,
175 | plot_ranked=options.plot_ranked,
176 | prob_threshold=options.prob_threshold,
177 | fnout=fn_fig_out)
178 | else:
179 | print("No DM/time data to classify")
180 |
181 | if options.fn_model_time is not None:
182 | print("\nCLASSIFYING 1D TIME DATA\n)")
183 | fn_fig_out = options.fnout + '_1d_time'
184 | classify(data_freq, options.fn_model_time,
185 | save_ranked=options.save_ranked,
186 | plot_ranked=options.plot_ranked,
187 | prob_threshold=options.prob_threshold,
188 | fnout=fn_fig_out)
189 |
190 | if options.fn_model_mb is not None:
191 | classify(data_mb, options.fn_model_mb,
192 | save_ranked=options.save_ranked,
193 | plot_ranked=options.plot_ranked,
194 | prob_threshold=options.prob_threshold,
195 | fnout=options.fnout)
196 |
197 | exit()
198 |
199 | dshape = data_freq.shape
200 |
201 | # normalize data
202 | data_freq = data_freq.reshape(len(data_freq), -1)
203 | data_freq -= np.median(data_freq, axis=-1)[:, None]
204 | data_freq /= np.std(data_freq, axis=-1)[:, None]
205 |
206 | # zero out nans
207 | data_freq[data_freq!=data_freq] = 0.0
208 | data_freq = data_freq.reshape(dshape)
209 |
210 | if data_freq.shape[-1]!=1:
211 | data_freq = data_freq[..., None]
212 |
213 | model = frbkeras.load_model(fn_model_freq)
214 |
215 | if len(model.input.shape)==3:
216 | data_freq = data_freq.mean(1)
217 |
218 | y_pred_prob = model.predict(data_freq)
219 | y_pred_prob = y_pred_prob[:,1]
220 |
221 | ind_frb = np.where(y_pred_prob>options.prob_threshold)[0]
222 |
223 | print("\n%d out of %d events with probability > %.2f:\n %s" %
224 | (len(ind_frb), len(y_pred_prob),
225 | options.prob_threshold, ind_frb))
226 |
227 | low_to_high_ind = np.argsort(y_pred_prob)
228 |
229 | if options.save_ranked is True:
230 | fnout_ranked = fn_data.rstrip('.hdf5') + 'freq_time_candidates.hdf5'
231 |
232 | g = h5py.File(fnout_ranked, 'w')
233 | g.create_dataset('data_frb_candidate', data=data_freq[ind_frb])
234 | g.create_dataset('frb_index', data=ind_frb)
235 | g.create_dataset('probability', data=y_pred_prob)
236 | g.close()
237 | print("\nSaved them and all probabilities to: \n%s" % fnout_ranked)
238 |
239 | if options.plot_ranked is True:
240 | if options.save_ranked is False:
241 | argtup = (data_freq[ind_frb], ind_frb, y_pred_prob)
242 | plot_tools.plot_multiple_ranked(argtup, nside=5, \
243 | fnfigout=options.fnout)
244 | else:
245 | plot_tools.plot_multiple_ranked(fnout_ranked, nside=5, \
246 | fnfigout=options.fnout)
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
--------------------------------------------------------------------------------
/single_pulse_ml/data/data.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liamconnor/single_pulse_ml/88b6b76ebf3d3939214d9785d4e1c5076f653c38/single_pulse_ml/data/data.txt
--------------------------------------------------------------------------------
/single_pulse_ml/dataproc.py:
--------------------------------------------------------------------------------
1 | """ Tools for preprocessing data
2 | """
3 |
4 | import numpy as np
5 |
6 | def normalize_data(data):
7 | """ Normalize data to zero-median and
8 | unit standard deviation
9 |
10 | Parameters
11 | ----------
12 | data : np.array
13 | (nfreq, ntimes)
14 | """
15 | # subtract each channel's median
16 | data -= np.median(data, axis=-1)[:, None]
17 | # demand unit variance
18 | # data /= np.std(data, axis=-1)[:, None]
19 | # Try dividing by global variance.
20 | data /= np.std(data)
21 | # Replace nans with zero
22 | data[data!=data] = 0.
23 |
24 | return data
25 |
26 |
27 | def dedisp(data, dm, freq=np.linspace(800, 400, 1024), dt=512*2.56e-6):
28 | """ Dedisperse data by shifting freq bins
29 |
30 | Parameters
31 | ----------
32 | data : np.array
33 | (nfreq, ntimes)
34 | dm : np.float
35 | dispersion measure in pc cm**-3
36 | freq : np.array
37 | (nfreq) vector in MHz
38 | dt : np.float
39 | time resolution of data in seconds
40 | """
41 | dm_del = 4.148808e3 * dm * (freq**(-2) - 600.0**(-2))
42 | data_out = np.zeros_like(data)
43 |
44 | for ii, ff in enumerate(freq):
45 | dmd = int(round(dm_del[ii] / dt))
46 | data_out[ii] = np.roll(data[ii], -dmd, axis=-1)
47 |
48 | return data_out
49 |
50 | def dm_delays(dm, freq, f_ref):
51 | """ Calculate dispersion delays in seconds
52 |
53 | Parameters
54 | ----------
55 | dm : np.float
56 | dispersion measure in pc cm**-3
57 | freq : np.array
58 | (nfreq) vector in MHz
59 | f_ref: np.float
60 | reference frequency in MHz
61 | """
62 | return 4.148808e3 * dm * (freq**(-2) - f_ref**(-2))
63 |
64 |
65 | def straighten_arr(data):
66 | """ Step through each freq, find DM shift
67 | that gives largest S/N, realign bins
68 |
69 | Parameters
70 | ----------
71 | data : np.array
72 | (nfreq, ntimes)
73 | """
74 |
75 | sn = []
76 |
77 | dms = np.linspace(-5, 5, 100)
78 |
79 | for dm in dms:
80 | d_ = dedisp(data.copy(), dm, freq=linspace(800,400,16))
81 | sn.append(d_.mean(0).max() / np.std(d_.mean(0)))
82 |
83 | d_ = dedisp(data, dms[np.argmax(sn)], freq=linspace(800,400,16))
84 |
85 | return d_
86 |
87 | def run_straightening(fn):
88 | """ Take filename, read in data, shift
89 | to remove any excess dm-delay.
90 |
91 | Parameters
92 | ----------
93 | fn : str
94 | filename of numpy array
95 | """
96 | f = np.load(fn)
97 |
98 | y = f[:, -1]
99 |
100 | d = f[y==1, :-1].copy()
101 |
102 | for ii in range(len(d)):
103 | dd_ = d[ii].reshape(-1, 250)
104 | d[ii] = (straighten_arr(dd_)).reshape(-1)
105 |
106 | f[y==1, :-1] = d
107 |
108 | for jj in range(len(f)):
109 | dd_ = f[jj, :-1].reshape(-1, 250)
110 | dd_ = reader.normalize_data(dd_)
111 | f[jj, :-1] = dd_.flatten()
112 |
113 | return f
--------------------------------------------------------------------------------
/single_pulse_ml/frbkeras.py:
--------------------------------------------------------------------------------
1 | """ Tools for building and training deep neural
2 | networks in keras using the tensorflow backend.
3 | """
4 |
5 |
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import sys
11 |
12 | import numpy as np
13 | from numpy.random import seed
14 | import h5py
15 |
16 | import keras
17 | from keras.models import Sequential
18 | from keras.layers import Dense, Dropout, Flatten, Merge
19 | from keras.layers import Conv1D, Conv2D
20 | from keras.layers import MaxPooling2D, MaxPooling1D, GlobalAveragePooling1D, BatchNormalization
21 | from keras.optimizers import SGD
22 | from keras.models import load_model
23 |
24 |
25 | def get_predictions(model, data, true_labels=None):
26 | """ Take a keras.model object, a data array,
27 | and true_labels, and return the probability of
28 | each feature being a TP, the prediction itself,
29 | and the mistakes.
30 | """
31 | if len(true_labels.shape)==2:
32 | true_labels = true_labels[:,1]
33 |
34 | prob = model.predict(data)
35 | predictions = np.round(prob[:, 1])
36 |
37 | if true_labels is not None:
38 | mistakes = np.where(predictions!=true_labels)[0]
39 | else:
40 | mistakes = []
41 |
42 | return prob, predictions, mistakes
43 |
44 | def get_classification_results(y_true, y_pred):
45 | """ Take true labels (y_true) and model-predicted
46 | label (y_pred) for a binary classifier, and return
47 | true_positives, false_positives, true_negatives, false_negatives
48 | """
49 |
50 | true_positives = np.where((y_true==1) & (y_pred==1))[0]
51 | false_positives = np.where((y_true==0) & (y_pred==1))[0]
52 | true_negatives = np.where((y_true==0) & (y_pred==0))[0]
53 | false_negatives = np.where((y_true==1) & (y_pred==0))[0]
54 |
55 | return true_positives, false_positives, true_negatives, false_negatives
56 |
57 | def confusion_mat(y_true, y_pred):
58 | """ Generate a confusion matrix for a
59 | binary classifier based on true labels (
60 | y_true) and model-predicted label (y_pred)
61 |
62 | returns np.array([[TP, FP],[FN, TN]])
63 | """
64 | TP, FP, TN, FN = get_classification_results(y_true, y_pred)
65 |
66 | NTP = len(TP)
67 | NFP = len(FP)
68 | NTN = len(TN)
69 | NFN = len(FN)
70 |
71 | conf_mat = np.array([[NTP, NFP],[NFN, NTN]])
72 |
73 | return conf_mat
74 |
75 | def print_metric(y_true, y_pred):
76 | """ Take true labels (y_true) and model-predicted
77 | label (y_pred) for a binary classifier
78 | and print a confusion matrix, metrics,
79 | return accuracy, precision, recall, fscore
80 | """
81 | conf_mat = confusion_mat(y_true, y_pred)
82 |
83 | NTP, NFP, NTN, NFN = conf_mat[0,0], conf_mat[0,1], conf_mat[1,1], conf_mat[1,0]
84 |
85 | print("Confusion matrix:")
86 |
87 | print('\n'.join([''.join(['{:8}'.format(item) for item in row])
88 | for row in conf_mat]))
89 |
90 | accuracy = float(NTP + NTN)/conf_mat.sum()
91 | precision = float(NTP) / (NTP + NFP + 1e-19)
92 | recall = float(NTP) / (NTP + NFN + 1e-19)
93 | fscore = 2*precision*recall/(precision+recall)
94 |
95 | print("accuracy: %f" % accuracy)
96 | print("precision: %f" % precision)
97 | print("recall: %f" % recall)
98 | print("fscore: %f" % fscore)
99 |
100 | return accuracy, precision, recall, fscore
101 |
102 | def construct_ff1d(features_only=False, fit=False,
103 | train_data=None, train_labels=None,
104 | eval_data=None, eval_labels=None,
105 | nbeam=32, epochs=5,
106 | nlayer1=32, nlayer2=64, batch_size=32):
107 | """ Build a one-dimensional feed forward neural network
108 | with a binary classifier. Can be used for, e.g.,
109 | multi-beam detections.
110 |
111 | Parameters:
112 | ----------
113 | features_only : bool
114 | Don't construct full model, only features layers
115 | fit : bool
116 | Fit model
117 | train_data : ndarray
118 | (ntrain, ntime, 1) float64 array with training data
119 | train_labels : ndarray
120 | (ntrigger, 2) binary labels of training data [0, 1] = FRB, [1, 0]=RFI
121 | eval_data : ndarray
122 | (neval, ntime, 1) float64 array with evaluation data
123 | eval_labels :
124 | (neval, 2) binary labels of eval data
125 | nbeam : int
126 | Number of input beams (more generally, number of data inputs)
127 | epochs : int
128 | Number of training epochs
129 | nlayer1 : int
130 | Number of neurons in first hidden layer
131 | nlayer2 : int
132 | Number of neurons in second hidden layer
133 | batch_size : int
134 | Number of batches for training
135 |
136 | Returns
137 | -------
138 | model : XX
139 |
140 | score : XX
141 |
142 | """
143 | model = Sequential()
144 | model.add(Dense(nlayer1, input_dim=nbeam, activation='relu'))
145 | model.add(Dropout(0.4))
146 | model.add(Dense(nlayer2, init='normal', activation='relu'))
147 |
148 | if features_only is True:
149 | model.add(BatchNormalization()) # hack
150 | return model, []
151 |
152 | model.add(Dropout(0.4))
153 | model.add(Dense(2, activation='sigmoid'))
154 |
155 | model.compile(loss='binary_crossentropy',
156 | optimizer='rmsprop',
157 | metrics=['accuracy'])
158 |
159 | model.fit(train_data, train_labels, batch_size=batch_size, epochs=epochs)
160 | score = model.evaluate(eval_data, eval_labels, batch_size=batch_size)
161 |
162 | return model, score
163 |
164 | def construct_conv2d(features_only=False, fit=False,
165 | train_data=None, train_labels=None,
166 | eval_data=None, eval_labels=None,
167 | nfreq=16, ntime=250, epochs=5,
168 | nfilt1=32, nfilt2=64, batch_size=32):
169 | """ Build a two-dimensional convolutional neural network
170 | with a binary classifier. Can be used for, e.g.,
171 | freq-time dynamic spectra of pulsars, dm-time intensity array.
172 |
173 | Parameters:
174 | ----------
175 | features_only : bool
176 | Don't construct full model, only features layers
177 | fit : bool
178 | Fit model
179 | train_data : ndarray
180 | (ntrain, ntime, 1) float64 array with training data
181 | train_labels : ndarray
182 | (ntrigger, 2) binary labels of training data [0, 1] = FRB, [1, 0]=RFI
183 | eval_data : ndarray
184 | (neval, ntime, 1) float64 array with evaluation data
185 | eval_labels :
186 | (neval, 2) binary labels of eval data
187 | epochs : int
188 | Number of training epochs
189 | nfilt1 : int
190 | Number of neurons in first hidden layer
191 | nfilt2 : int
192 | Number of neurons in second hidden layer
193 | batch_size : int
194 | Number of batches for training
195 |
196 | Returns
197 | -------
198 | model : XX
199 |
200 | score : np.float
201 | accuracy, i.e. fraction of predictions that are correct
202 |
203 | """
204 |
205 | if train_data is not None:
206 | nfreq=train_data.shape[1]
207 | ntime=train_data.shape[2]
208 |
209 | model = Sequential()
210 | # this applies 32 convolution filters of size 5x5 each.
211 | model.add(Conv2D(nfilt1, (5, 5), activation='relu', input_shape=(nfreq, ntime, 1)))
212 |
213 | #model.add(Conv2D(32, (3, 3), activation='relu'))
214 | model.add(MaxPooling2D(pool_size=(2, 2)))
215 | # Randomly drop some fraction of nodes (set weights to 0)
216 | model.add(Dropout(0.4))
217 | model.add(Conv2D(nfilt2, (5, 5), activation='relu'))
218 | model.add(MaxPooling2D(pool_size=(2, 2)))
219 | model.add(Dropout(0.4))
220 | model.add(Flatten())
221 |
222 | if features_only is True:
223 | model.add(BatchNormalization()) # hack
224 | return model, []
225 |
226 | model.add(Dense(256, activation='relu')) # should be 1024 hack
227 |
228 | # model.add(Dense(1024, activation='relu')) # remove for now hack
229 | model.add(Dropout(0.5))
230 | model.add(Dense(2, activation='softmax'))
231 |
232 | sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
233 | model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
234 |
235 | # train_labels = keras.utils.to_categorical(train_labels)
236 | # eval_labels = keras.utils.to_categorical(eval_labels)
237 |
238 | if fit is True:
239 | print("Using batch_size: %d" % batch_size)
240 | print("Using %d epochs" % epochs)
241 | cb = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0,
242 | batch_size=32, write_graph=True, write_grads=False,
243 | write_images=True, embeddings_freq=0, embeddings_layer_names=None,
244 | embeddings_metadata=None)
245 |
246 | model.fit(train_data, train_labels, batch_size=batch_size, epochs=epochs, callbacks=[cb])
247 | score = model.evaluate(eval_data, eval_labels, batch_size=batch_size)
248 | print("Conv2d only")
249 | print(score)
250 |
251 | return model, score
252 |
253 | def construct_conv1d(features_only=False, fit=False,
254 | train_data=None, train_labels=None,
255 | eval_data=None, eval_labels=None,
256 | nfilt1=64, nfilt2=128,
257 | batch_size=16, epochs=5):
258 | """ Build a one-dimensional convolutional neural network
259 | with a binary classifier. Can be used for, e.g.,
260 | pulse profiles.
261 |
262 | Parameters:
263 | ----------
264 | features_only : bool
265 | Don't construct full model, only features layers
266 | fit : bool
267 | Fit model
268 | train_data : ndarray
269 | (ntrain, ntime, 1) float64 array with training data
270 | train_labels : ndarray
271 | (ntrigger, 2) binary labels of training data [0, 1] = FRB, [1, 0]=RFI
272 | eval_data : ndarray
273 | (neval, ntime, 1) float64 array with evaluation data
274 | eval_labels :
275 | (neval, 2) binary labels of eval data
276 | epochs : int
277 | Number of training epochs
278 | nfilt1 : int
279 | Number of neurons in first hidden layer
280 | nfilt2 : int
281 | Number of neurons in second hidden layer
282 | batch_size : int
283 | Number of batches for training
284 |
285 | Returns
286 | -------
287 | model : XX
288 |
289 | score : XX
290 |
291 | """
292 |
293 | if train_data is not None:
294 | NTIME=train_data.shape[1]
295 |
296 | model = Sequential()
297 | model.add(Conv1D(nfilt1, 3, activation='relu', input_shape=(NTIME, 1)))
298 | model.add(Conv1D(nfilt1, 3, activation='relu'))
299 | model.add(MaxPooling1D(3))
300 | model.add(Conv1D(nfilt2, 3, activation='relu'))
301 | model.add(Conv1D(nfilt2, 3, activation='relu'))
302 | model.add(GlobalAveragePooling1D())
303 |
304 | if features_only is True:
305 | return model, []
306 |
307 | model.add(Dropout(0.5))
308 | model.add(Dense(2, activation='sigmoid'))
309 |
310 | model.compile(loss='binary_crossentropy',
311 | optimizer='rmsprop',
312 | metrics=['accuracy'])
313 |
314 | if fit is True:
315 | model.fit(train_data, train_labels, batch_size=batch_size, epochs=epochs)
316 | score = model.evaluate(eval_data, eval_labels, batch_size=16)
317 | print("Conv1d only")
318 |
319 | return model, score
320 |
321 |
322 | def merge_models(model_list, train_data_list,
323 | train_labels, eval_data_list, eval_labels,
324 | batch_size=32, epochs=5):
325 | """ Take list of models, list of training data,
326 | merge models and train as a single network.
327 | """
328 |
329 |
330 | model = Sequential()
331 | model.add(Merge(model_list, mode = 'concat'))
332 | model.add(Dense(256, activation='relu'))
333 | model.add(Dense(2, init = 'normal', activation = 'sigmoid'))
334 | sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
335 | model.compile(loss = 'binary_crossentropy',
336 | optimizer=sgd,
337 | metrics=['accuracy'])
338 | seed(2017)
339 | model.fit(train_data_list, train_labels,
340 | batch_size=batch_size, nb_epoch=epochs, verbose=1)
341 | score = model.evaluate(eval_data_list, eval_labels, batch_size=batch_size)
342 |
343 | return model, score
344 |
345 |
--------------------------------------------------------------------------------
/single_pulse_ml/model/model.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liamconnor/single_pulse_ml/88b6b76ebf3d3939214d9785d4e1c5076f653c38/single_pulse_ml/model/model.txt
--------------------------------------------------------------------------------
/single_pulse_ml/plot_tools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | try:
4 | import matplotlib
5 | matplotlib.use('Agg')
6 |
7 | import matplotlib.pyplot as plt
8 | from matplotlib import gridspec
9 | except:
10 | "Didn't work"
11 | pass
12 |
13 | def plot_simulated_events(data, labels, figname,
14 | NSIDE, NFREQ, NTIME, cmap='RdBu'):
15 | """ Make series of waterfall plots of training / test
16 | set.
17 | """
18 |
19 | NFIG=NSIDE**2
20 | lab_dict = {0 : 'RFI', 1 : 'FRB'}
21 |
22 | fig = plt.figure(figsize=(15,15))
23 | for ii in range(NFIG):
24 | plt.subplot(NSIDE,NSIDE,ii+1)
25 | plt.imshow(data[ii].reshape(-1, NTIME),
26 | aspect='auto', interpolation='nearest',
27 | cmap=cmap, vmin=-3, vmax=3)
28 | plt.axis('off')
29 | plt.colorbar()
30 | plt.title(lab_dict[labels[ii]])
31 | plt.xlim(125-32,125+32)
32 |
33 | fig.savefig('%s_rfi.png' % figname)
34 |
35 | fig = plt.figure(figsize=(15,15))
36 | for ii in range(NFIG):
37 | plt.subplot(NSIDE,NSIDE,ii+1)
38 | plt.imshow(data[-ii-1].reshape(-1, NTIME),
39 | aspect='auto', interpolation='nearest',
40 | cmap=cmap, vmin=-3, vmax=3)
41 | plt.axis('off')
42 | plt.colorbar()
43 | plt.title(lab_dict[labels[ii]])
44 | plt.xlim(125-32,125+32)
45 |
46 | fig.savefig(figname)
47 |
48 | def plot_gallery(data_arr, titles, h, w, n_row=3, n_col=4,
49 | figname=None, cmap='RdBu', suptitle=''):
50 | """Helper function to plot a gallery of portraits"""
51 | plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
52 | plt.suptitle(suptitle, fontsize=35, color='blue', alpha=0.5)
53 | plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
54 | for i in range(min(n_row * n_col, len(data_arr))):
55 | d_arr = data_arr[i].reshape((h, w))
56 | d_arr -= np.median(d_arr)
57 | plt.subplot(n_row, n_col, i + 1)
58 | plt.imshow(d_arr, cmap=cmap, aspect='auto')
59 | plt.title(titles[i], size=12, color='red')
60 | plt.xticks(())
61 | plt.yticks(())
62 | if figname:
63 | plt.savefig(figname)
64 |
65 |
66 | def get_title(y, target_names):
67 | prediction_titles = y.astype(str)
68 | prediction_titles[prediction_titles=='0'] = target_names[0]
69 | prediction_titles[prediction_titles=='1'] = target_names[1]
70 |
71 | return prediction_titles
72 |
73 | def get_title2(y_pred, y_test, target_names, i):
74 | pred_name = target_names[y_pred[i]]
75 | true_name = target_names[y_test[i]]
76 | return 'predicted: %s\ntrue: %s' % (pred_name, true_name)
77 |
78 | def plot_ranked_trigger(data, prob_arr, h=6, w=6,
79 | ascending=False, outname='out',
80 | cmap='RdBu', vmax=3, vmin=-3,
81 | yaxlabel='Freq'):
82 | """ Plot single-pulse triggers ranked by the
83 | classifier's assigned probability.
84 |
85 | Parameters
86 | ----------
87 | data : np.array
88 | data array with triggers
89 | prob_arr : np.array
90 | probability of event being a true FRB
91 | h : np.int
92 | number of rows of triggers
93 | w : np.int
94 | number of columns of triggers
95 | ascending : bool / str
96 | plot in ascending order (True, False, 'mid')
97 | outname : str
98 | figure name
99 | cmap : str
100 | colormap to use in imshow
101 |
102 | Returns
103 | -------
104 | None
105 | """
106 |
107 | if len(prob_arr.shape)>1:
108 | prob_arr = prob_arr[:,1]
109 |
110 | ranking = np.argsort(prob_arr)
111 |
112 | if ascending == True:
113 | ranking = ranking[::-1]
114 | title_str = 'RFI most probable'
115 | outname = outname
116 | elif ascending == 'mid':
117 | # cp = np.argsort(abs(prob_arr[:,0]-0.5))
118 | # ranking = cp[:h*w]
119 | inflection = np.argmax(abs(np.diff(prob_arr[ranking])))
120 | ranking = ranking[inflection-h*w/2:inflection+h*w/2]
121 | title_str = 'Marginal events'
122 | outname = outname
123 | else:
124 | title_str = 'FRB most probable'
125 | outname = outname
126 |
127 | fig = plt.figure(figsize=(15,15))
128 |
129 | for ii in range(min(h*w, len(prob_arr))):
130 | plt.subplot(h, w, ii+1)
131 | if len(data.shape)==3:
132 | plt.imshow(data[ranking[ii]],
133 | cmap=cmap, interpolation='nearest',
134 | aspect='auto', vmin=vmin, vmax=vmax,
135 | extent=[0, 1, 400, 800])
136 | elif len(data.shape)==2:
137 | plt.plot(data[ranking[ii]])
138 | else:
139 | print("Wrong data input shape")
140 | return
141 |
142 | #plt.axis('off')
143 | plt.xticks([])
144 | plt.yticks([])
145 | plt.title('p='+str(np.round(prob_arr[ranking[ii]], 5)), fontsize=12)
146 |
147 | if ii % w == 0:
148 | plt.ylabel(yaxlabel, fontsize=14)
149 | if ii >= (h*w-w):
150 | plt.xlabel("Time", fontsize=14)
151 |
152 | if outname is not None:
153 | fig.savefig(outname)
154 | else:
155 | plt.show()
156 |
157 | def plot_multiple_ranked(argin, nside=5, fnfigout='ranked_trig',
158 | ascending=True):
159 | """ Generate multiple multi-panel figures
160 | using plot_ranked_trigger
161 |
162 | Parameters
163 | ----------
164 |
165 | argin : str/tuple
166 | input arguments, either
167 | (data_frb_candidate, frb_index, probability)
168 | or a filename
169 | nside : np.int
170 | number of figures per row/col
171 | fnfigout : str
172 | fig name
173 | """
174 | import sys
175 | import h5py
176 |
177 | if type(argin)==tuple:
178 | data_frb_candidate, frb_index, probability = argin
179 | fn = './'
180 | elif type(argin)==str:
181 | fn = argin
182 | f = h5py.File(fn,'r')
183 | data_frb_candidate = f['data_frb_candidate'][:]
184 | frb_index = f['frb_index'][:]
185 | probability = f['probability'][:]
186 | f.close()
187 | else:
188 | print("Wrong input argument")
189 | return
190 |
191 | ntrig = len(frb_index)
192 | probability = probability[frb_index]
193 | ind = np.argsort(probability)[::-1]
194 | data = data_frb_candidate[ind]
195 | probability_ = probability[ind]
196 |
197 | for ii in range(ntrig//nside**2+1):
198 | data_sub = data[nside**2*ii:nside**2*(ii+1),...,0]
199 | prob_sub = probability_[nside**2*ii:nside**2*(ii+1)]
200 | pmin, pmax = prob_sub.min(), prob_sub.max()
201 |
202 | fnfigout_ = fnfigout+'prob:%.2f-%.2f.pdf' % (pmin, pmax)
203 | print("Saving to %s" % fnfigout)
204 |
205 | plot_ranked_trigger(data_sub, prob_sub,
206 | h=nside, w=nside, ascending=ascending,
207 | outname=fnfigout_, cmap=None)
208 |
209 |
210 | def plot_image_probabilities(FT_arr, DT_arr, FT_prob_spec, DT_prob_spec):
211 |
212 | assert (len(FT_arr.shape)==2) and (len(DT_arr.shape)==2), \
213 | "Input data should be (nfreq, ntimes)"
214 |
215 | gs2 = gridspec.GridSpec(4, 3)
216 | ax1 = plt.subplot(gs2[:2, :2])
217 | ax1.xaxis.set_ticklabels('')
218 | ax1.yaxis.set_ticklabels('')
219 | plt.ylabel('Freq', fontsize=18)
220 | plt.xlabel('Time', fontsize=18)
221 | ax1.imshow(FT_arr, cmap='RdBu', interpolation='nearest', aspect='auto')
222 |
223 | ax2 = plt.subplot(gs2[:2, 2:])
224 | ax2.yaxis.tick_right()
225 | ax2.yaxis.set_label_position('right')
226 | plt.ylabel('probability', fontsize=18)
227 | ax2.bar([0, 1], FT_prob_spec, color='red', alpha=0.75)
228 | plt.xticks([0.5, 1.5], ['RFI', 'Pulse'])
229 | plt.ylim(0, 1)
230 | plt.xlim(-.25, 2.)
231 |
232 | ax3 = plt.subplot(gs2[2:, :2])
233 | ax3.xaxis.set_ticklabels('')
234 | ax3.yaxis.set_ticklabels('')
235 | plt.ylabel('Freq', fontsize=18)
236 | plt.xlabel('Time', fontsize=18)
237 | ax3.imshow(DT_arr, cmap='RdBu', interpolation='nearest', \
238 | aspect='auto')
239 |
240 | ax4 = plt.subplot(gs2[2:, 2:])
241 | ax4.yaxis.set_label_position('right')
242 | ax4.yaxis.tick_right()
243 | plt.ylabel('probability', fontsize=18)
244 | ax4.bar([0, 1], DT_prob_spec, color='red', alpha=0.75)
245 | plt.xticks([0.5, 1.5], ['RFI', 'Pulse'])
246 | plt.ylim(0, 1)
247 | plt.xlim(-.25, 2.)
248 |
249 | plt.suptitle('TensorFlow Deep Learn', fontsize=45, )
250 |
251 |
252 | class VisualizeLayers:
253 | """ Class to visualize the hidden
254 | layers of a deep neural network in
255 | keras.
256 | """
257 | import keras.backend as backend
258 |
259 | def __init__(self, model):
260 | self._model = model
261 | self._NFREQ = model.get_input_shape_at(0)[1]
262 | self._NTIME = model.get_input_shape_at(0)[2]
263 | self.grid_counter = 0
264 | # Create empty list for non-redundant activations
265 | self._activations_nonred = []
266 | self._NFREQ_min = min([mm.input.shape[1] for mm in model.layers])
267 |
268 | def print_layers(self):
269 | """ Print layer names and shapes of keras model
270 | """
271 | for layer in self._model.layers:
272 | print("%s: %10s" % (layer.name, layer.input.shape))
273 |
274 | def imshow_custom(self, data, **kwargs):
275 | """ matplotlib imshow with custom arguments
276 | """
277 | plt.imshow(data, aspect='auto', interpolation='nearest',
278 | **kwargs)
279 |
280 | def remove_doubles(self, activations):
281 | """ Remove layers with identical shapes, e.g.
282 | dropout layers
283 | """
284 | self._activations_nonred.append(activations[0])
285 |
286 | # Start from first element, skip input data
287 | for ii, activation in enumerate(activations[1:]):
288 | act_shape = activation.shape
289 | if act_shape != activations[ii].shape:
290 | self._activations_nonred.append(activation)
291 |
292 | def get_activations(self, model_inputs,
293 | print_shape_only=True,
294 | layer_name=None):
295 |
296 | print('----- activations -----')
297 | activations = []
298 | inp = self._model.input
299 |
300 | model_multi_inputs_cond = True
301 | if not isinstance(inp, list):
302 | # only one input! let's wrap it in a list.
303 | inp = [inp]
304 | model_multi_inputs_cond = False
305 |
306 | outputs = [layer.output for layer in self._model.layers if
307 | layer.name == layer_name or layer_name is None] # all layer outputs
308 |
309 | funcs = [backend.function(inp + \
310 | [backend.learning_phase()], [out]) \
311 | for out in outputs] # evaluation functions
312 |
313 | if model_multi_inputs_cond:
314 | list_inputs = []
315 | list_inputs.extend(model_inputs)
316 | list_inputs.append(0.)
317 | else:
318 | list_inputs = [model_inputs, 0.]
319 |
320 | # Learning phase. 0 = Test mode (no dropout or batch normalization)
321 | # layer_outputs = [func([model_inputs, 0.])[0] for func in funcs]
322 | layer_outputs = [func(list_inputs)[0] for func in funcs]
323 |
324 | # Append input data
325 | activations.append(model_inputs)
326 |
327 | for layer_activations in layer_outputs:
328 | activations.append(layer_activations)
329 |
330 | return activations
331 |
332 | def plot_feature_layer(self, activation, NSIDE=16):
333 | N_SUBFIG = activation.shape[-1]
334 |
335 | if N_SUBFIG==1:
336 |
337 | ax = plt.subplot2grid((NSIDE,NSIDE),
338 | (self.grid_counter, 3*NSIDE//8),
339 | colspan=NSIDE//4, rowspan=NSIDE//4)
340 | plt.plot(activation[0,:,0])
341 | return
342 |
343 | for ii in range(N_SUBFIG):
344 | size=int(activation.shape[1] / self._NFREQ_min)
345 | # size=int(np.round(4*activation.shape[1]/self._NFREQ * NSIDE//32))
346 | # size=min(size, NSIDE//8)
347 | start_grid = NSIDE//2 - N_SUBFIG*size//2
348 | print(NSIDE, self.grid_counter, start_grid + ii*size, size)
349 | ax = plt.subplot2grid((NSIDE,NSIDE),
350 | (self.grid_counter, start_grid + ii*size),
351 | colspan=size, rowspan=size)
352 | plt.plot(activation[0,:,ii])
353 | plt.axis('off')
354 |
355 | def im_feature_layer(self, activation, cmap='Greys', NSIDE=16,
356 | start_grid=0, N_SUBFIG=None, skip=1):
357 | N_SUBFIG = activation.shape[-1] if N_SUBFIG is None else N_SUBFIG
358 |
359 | if N_SUBFIG==1:
360 | # cmap = 'RdBu'
361 |
362 | ax = plt.subplot2grid((NSIDE,NSIDE),
363 | (self.grid_counter, 3*NSIDE//8),
364 | colspan=NSIDE//4, rowspan=NSIDE//4)
365 |
366 | print(self.grid_counter,'0')
367 | self.grid_counter += (NSIDE//4+NSIDE//16) # Add one extra unit of space
368 | print(activation.shape)
369 | data = activation[0,:,:,0]
370 | data -= np.median(data)
371 | vmax = 6*np.std(data)
372 | vmin = -1*np.std(data)
373 | self.imshow_custom(data, cmap=cmap, extent=[0, 1, 400, 800], \
374 | vmax=vmax, vmin=vmin)
375 | print(self.grid_counter,'1')
376 |
377 | plt.xlabel('Time')
378 | plt.ylabel('Freq [MHz]')
379 |
380 | return
381 |
382 |
383 | def im_layers(self, activations, loc_obj, cmap='Greys'):
384 |
385 | sizes = loc_obj[0]
386 | loc = loc_obj[1]
387 |
388 | for jj, activation in enumerate(activations):
389 | for ii in range(activation.shape[-1]):
390 | ax = plt.subplot2grid((NSIDE,NSIDE),(self.grid_counter, loc[jj][ii]),
391 | colspan=sizes[jj], rowspan=sizes[jj])
392 |
393 | self.imshow_custom(activation[0,:,:,ii], cmap='Greys')
394 | plt.axis('off')
395 |
396 | self.grid_counter += (NSIDE//32+int(sizes[jj]))
397 |
398 | plt.show()
399 |
400 | def get_image_index(self, NSIDE=100):
401 | offset = 0
402 | sizes = np.array([8, 4, 4, 2])
403 | N_SUBFIG = np.array([8, 8, 16, 16])
404 | offset = NSIDE//2 - N_SUBFIG*sizes//2
405 | loc1 = (offset[0] + np.arange(8)*sizes[0]).astype(int)
406 | loc2 = (loc1 + (sizes[0]/2 - sizes[1]/2)).astype(int)
407 | loc3 = (offset[2] + arange(16)*(1+sizes[2])).astype(int)
408 | offset3 = NSIDE//2 - (loc3[0] + (loc3[-1] - loc3[0])/2.)
409 | loc3 += int(offset3)
410 | loc4 = (loc3 + (sizes[2]/2 - sizes[3]/2)).astype(int)
411 | loc = [loc1, loc2, loc3, loc4]
412 |
413 | loc_obj = (sizes, loc)
414 |
415 | return loc_obj
416 |
417 | def im_all(self, activations, NSIDE=32, figname=None, color='linen'):
418 | fig = figure(figsize=(15,15))
419 | self.grid_counter = 0
420 | start_grid_map = np.zeros([len(activations)]).astype(int)
421 | n_neuron_map = [activation.shape[-1] for activation in activations]
422 | loc_obj = self.get_image_index()
423 |
424 | for kk, activation in enumerate(activations[:]):
425 | print(self.grid_counter, kk, activation.shape)
426 | if kk==0:
427 | self.im_layers(activation, loc_obj, cmap='Greys')
428 | elif activation.shape[-1]==2: # For binary classification
429 | activation = activation[0]
430 | activation[0] = 0.025 # Hack for now, visualizing.
431 | ind = np.array([0, 1])
432 | width = 0.75
433 | ax = plt.subplot2grid((NSIDE,NSIDE),
434 | (self.grid_counter, 3*NSIDE//8),
435 | colspan=NSIDE//4, rowspan=NSIDE//4)
436 |
437 | rects1 = ax.bar(ind[1], activation[1], width, color='r', alpha=0.5)
438 | rects2 = ax.bar(ind[0], activation[0], width, color='green', alpha=0.5)
439 |
440 | ax.set_xticks(ind + width / 2)
441 | ax.set_xticklabels(('Noise', 'FRB'))
442 | ax.set_ylim(0, 1.25)
443 | ax.set_xlim(-0.25, 2.0)
444 |
445 | elif kk==1:
446 | self.im_layers(activations[1:5], loc_obj, cmap='Greys')
447 |
448 | if figname is not None:
449 | plt.savefig(figname)#, facecolor=color)
450 |
451 | def make_figure(self, data, NSIDE=32, figname=None):
452 | dsh = data.shape
453 |
454 | if len(dsh)==2:
455 | data = data[None,:,:,None]
456 | elif len(dsh)==3:
457 | if dsh[0]==1:
458 | data = data[..., None]
459 | elif dsh[-1]==1:
460 | data = data[None]
461 |
462 | # Make sure there's no activation
463 | # which has more filters than NSIDE
464 | for activation in activations:
465 | if len(activation.shape) > 2:
466 | NSIDE = max(NSIDE, activation.shape[-1])
467 |
468 | print("Using NSIDE: %d" % NSIDE)
469 |
470 | self.remove_doubles(activations)
471 | self.im_all(self._activations_nonred, NSIDE=NSIDE, figname=figname)
472 |
473 | if __name__=='__main__':
474 | import sys
475 |
476 | import h5py
477 |
478 | try:
479 | fn = sys.argv[1]
480 | except:
481 | print("\nExpected input datafile as argument\n")
482 | exit()
483 |
484 | plot_multiple_ranked(fn, nside=5)
485 |
486 |
487 |
488 |
--------------------------------------------------------------------------------
/single_pulse_ml/plots/Freq_train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liamconnor/single_pulse_ml/88b6b76ebf3d3939214d9785d4e1c5076f653c38/single_pulse_ml/plots/Freq_train.png
--------------------------------------------------------------------------------
/single_pulse_ml/reader.py:
--------------------------------------------------------------------------------
1 | """ Tools for io as well as creating training
2 | and test data sets.
3 | """
4 |
5 | import os
6 |
7 | import time
8 | import numpy as np
9 | import h5py
10 | import glob
11 | import pickle
12 |
13 | try:
14 | import matplotlib.pylab as plt
15 | except:
16 | pass
17 |
18 | try:
19 | import filterbank
20 | except:
21 | pass
22 |
23 |
24 | def read_hdf5(fn):
25 | """ Read in data from .hdf5 file
26 | containing dynamic spectra, dm-time array,
27 | and data labels
28 | """
29 |
30 | f = h5py.File(fn, 'r')
31 | data_freq = f['data_freq_time'][:]
32 |
33 | try:
34 | y = f['labels'][:]
35 | except:
36 | print("labels dataset not there")
37 | y = -1*np.zeros([len(data_freq)])
38 |
39 | try:
40 | data_dm = f['data_dm_time'][:]
41 | except:
42 | print("dm-time dataset not there")
43 | data_dm = None
44 |
45 | try:
46 | data_mb = f['multibeam_snr'][:]
47 | except:
48 | print("multibeam dataset not there")
49 | data_mb = None
50 |
51 | return data_freq, y, data_dm, data_mb
52 |
53 | def write_to_fil(data, header, fn):
54 | filterbank.create_filterbank_file(
55 | fn, header, spectra=data, mode='readwrite')
56 | print("Writing to %s" % fn)
57 |
58 | def read_fil_data(fn, start=0, stop=1e7):
59 | print("Reading filterbank file %s \n" % fn)
60 | fil_obj = filterbank.FilterbankFile(fn)
61 | header = fil_obj.header
62 | delta_t = fil_obj.header['tsamp'] # delta_t in milliseconds
63 | fch1 = header['fch1']
64 | nchans = header['nchans']
65 | foff = header['foff']
66 | fch_f = fch1 + nchans*foff
67 | freq = np.linspace(fch1, fch_f, nchans)
68 | data = fil_obj.get_spectra(start, stop)
69 | # turn array into time-major, for preprocess
70 | # data = data.transpose()
71 |
72 | return data, freq, delta_t, header
73 |
74 | def read_pathfinder_npy(fn):
75 | data = np.load(fn)
76 | nfreq, ntimes = data.shape[0], data.shape[1]
77 |
78 | if len(data)!=16:
79 | data = data.reshape(-1, nfreq//16, ntimes).mean(1)
80 |
81 | return data
82 |
83 | def rebin_arr(data, n0_f=1, n1_f=1):
84 | """ Rebin 2d array data to have shape
85 | (n0_f, n1_f)
86 | """
87 | assert len(data.shape)==2
88 |
89 | n0, n1 = data.shape
90 | data_rb = data[:n0//n0_f * n0_f, :n1//n1_f * n1_f]
91 | data_rb = data_rb.reshape(n0_f, n0//n0_f, n1_f, n1//n1_f)
92 | data_rb = data_rb.mean(1).mean(-1)
93 |
94 | return data_rb
95 |
96 | def im(data, title='',figname='out.png'):
97 | fig = plt.figure()#
98 | plt.imshow(data, aspect='auto', interpolation='nearest', cmap='Greys')
99 | plt.savefig(figname)
100 | plt.title(title)
101 | plt.show()
102 |
103 | def combine_data_DT(fn):
104 | """ Combine the training set data in DM / Time space,
105 | assuming text file with lines:
106 |
107 | # filepath label
108 | DM20-100_vdif_assembler+a=00+n=02_DM-T_ +11424.89s.npy 0
109 | DM20-100_vdif_assembler+a=00+n=02_DM-T_ +19422.29s.npy 1
110 | DM20-100_vdif_assembler+a=00+n=02_DM-T_ +21658.40s.npy 0
111 |
112 | e.g. usage: combine_data_DT('./single_pulse_ml/data/test/data_list_DM.txt')
113 | """
114 |
115 | f = open(fn,'r')
116 |
117 | data_full, y = [], []
118 | k=0
119 | for ff in f:
120 | fn = './single_pulse_ml/data/' + ff.strip()[:-2]
121 | try:
122 | data = np.load(fn)
123 | except ValueError:
124 | continue
125 | k+=1
126 | label = int(ff[-2])
127 | y.append(label)
128 | data = normalize_data(data)
129 | data = rebin_arr(data, 64, 250)
130 |
131 | data_full.append(data)
132 |
133 | ndm, ntimes = data.shape
134 |
135 | data_full = np.concatenate(data_full, axis=0)
136 | data_full.shape = (k, -1)
137 |
138 | return data_full, np.array(y)
139 |
140 | def combine_data_FT(fn):
141 | """ combine_data_FT('./single_pulse_ml/data/data_list')
142 | """
143 | f = open(fn,'r')
144 |
145 | # data and its label class
146 | data_full, y = [], []
147 |
148 | for ff in f:
149 | line = ff.split(' ')
150 |
151 | fn, label = line[0], int(line[1])
152 |
153 | y.append(label)
154 | print(fn)
155 | tstamp = fn.split('+')[-2]
156 |
157 | #fdm = glob.glob('./*DM-T*%s*.npy' % tstamp)
158 | fn = './single_pulse_ml/data/test/' + fn
159 | data = read_pathfinder_npy(fn)
160 | data = normalize_data(data)
161 | data_full.append(data)
162 |
163 | nfreq, ntimes = data.shape[0], data.shape[-1]
164 |
165 | data_full = np.concatenate(data_full, axis=0)
166 | data_full.shape = (-1, nfreq*ntimes)
167 |
168 | return data_full, np.array(y)
169 |
170 | def write_data(data, y, fname='out'):
171 | training_arr = np.concatenate((data, y[:, None]), axis=-1)
172 |
173 | np.save(fname, training_arr)
174 |
175 |
176 | def read_data(fn):
177 | arr = np.load(fn)
178 | data, y = arr[:, :-1], arr[:, -1]
179 |
180 | return data, y
181 |
182 | def read_pkl(fn):
183 | if fn[-4:]!='.pkl': fn+='.pkl'
184 |
185 | file = open(fn, 'rb')
186 |
187 | model = pickle.load(file)
188 |
189 | return model
190 |
191 | def write_pkl(model, fn):
192 | if fn[-4:]!='.pkl': fn+='.pkl'
193 |
194 | file = open(fn, 'wb')
195 | pickle.dump(model, file)
196 |
197 | print("Wrote to pkl file: %s" % fn)
198 |
199 | def get_labels():
200 | """ Cross reference DM-T files with Freq-T
201 | files and create a training set in DM-T space.
202 | """
203 |
204 | fin = open('./single_pulse_ml/data/data_list','r')
205 | fout = open('./single_pulse_ml/data/data_list_DM','a')
206 |
207 | for ff in fin:
208 | x = ff.split(' ')
209 | n, c = x[0], int(x[1])
210 | try:
211 | t0 = n.split('+')[-2]
212 | float(t0)
213 | except ValueError:
214 | t0 = n.split('+')[-1].split('s')[0]
215 |
216 | newlist = glob.glob('./single_pulse_ml/data/DM*DM*%s*' % t0)
217 |
218 | if len(newlist) > 0:
219 | string = "%s %s\n" % (newlist[0].split('/')[-1], c)
220 | fout.write(string)
221 |
222 | def create_training_set(freqtime=True,
223 | fout='./single_pulse_ml/data/data_freqtime_train'):
224 | if freqtime:
225 | data, y = combine_data_FT('test')
226 | else:
227 | data, y = combine_data_DT('test')
228 |
229 | write_data(data, y, fname=fout)
230 |
231 | def shuffle_array(data_1, data_2=None):
232 | """ Take one or two data array(s), shuffle
233 | in place, and shuffle the second array in the same
234 | ordering, if applicable.
235 | """
236 | ntrigger = len(data_1)
237 | index = np.arange(ntrigger)
238 |
239 | if data_1.shape > 2:
240 | data_1 = data_1.reshape(ntrigger, -1)
241 | data_2 = data_2.reshape(ntrigger, -1)
242 |
243 | data_1_ = np.concatenate((data_1, index[:, None]), axis=-1)
244 | np.random.shuffle(data_1_)
245 | index_shuffle = (data_1_[:, -1]).astype(int)
246 | data_2 = data_2[index_shuffle]
247 |
248 | return data_1_[:, :-1], data_2
249 |
250 |
251 |
252 |
253 |
254 |
255 |
--------------------------------------------------------------------------------
/single_pulse_ml/run_frb_simulation.py:
--------------------------------------------------------------------------------
1 | """ Script to build dataset out of simulated
2 | single pulses + false positive triggers
3 | """
4 |
5 | from single_pulse_ml import sim_parameters
6 | from single_pulse_ml import telescope
7 | #from single_pulse_ml import simulate_frb
8 | import simulate_frb
9 |
10 | # TELESCOPE PARAMETERS:
11 | freq = (800, 400) # (FREQ_LOW, FREQ_UP) in MHz
12 | FREQ_REF = 600 # reference frequency in MHz
13 | DELTA_T = 0.0016 # time res in seconds
14 | NAME = "CHIMEPathfinder"
15 |
16 | # SIMULATION PARAMETERS
17 | NFREQ = 32 # Number of frequencies. Must agree with FP data
18 | NTIME = 250 # Number of time stamps per trigger
19 | dm = (-0.05, 0.05)
20 | fluence = (1, 10)
21 | width = (0.0016, 0.75) # width lognormal dist in seconds
22 | spec_ind = (-4., 4.)
23 | disp_ind = 2.
24 | scat_factor = (-4., -1.5)
25 | NRFI = 5000
26 | SNR_MIN = 5.0
27 | SNR_MAX = 25.00
28 | out_file_name = None,
29 | mk_plot = False
30 | NSIDE = 8
31 | dm_time_array = False
32 | outname_tag = 'apertif_250'
33 |
34 | #fn_rfi = './data/arts_FPs_33583.hdf5'
35 | #fn_noise = './data/apertif_background3669.npy'
36 |
37 | # If no background data available, use None option
38 | fn_rfi = None # Use Gaussian noise as false positive data
39 | fn_noise = None # Use Gaussian noise for simulated FRBs
40 |
41 | sim_obj = sim_parameters.SimParams(dm=dm, fluence=fluence,
42 | width=width, spec_ind=spec_ind,
43 | disp_ind=disp_ind, scat_factor=scat_factor,
44 | SNR_MIN=SNR_MIN, SNR_MAX=SNR_MAX,
45 | out_file_name=out_file_name, NRFI=NRFI,
46 | NTIME=NTIME, NFREQ=NFREQ,
47 | mk_plot=mk_plot, NSIDE=NSIDE, )
48 |
49 | tel_obj = telescope.Telescope(freq=freq, FREQ_REF=FREQ_REF,
50 | DELTA_T=DELTA_T, name=NAME)
51 |
52 | data, labels, params, snr = simulate_frb.run_full_simulation(
53 | sim_obj, tel_obj, fn_rfi=fn_rfi,
54 | fn_noise=fn_noise,
55 | dm_time_array=dm_time_array,
56 | outname_tag=outname_tag)
57 |
58 |
--------------------------------------------------------------------------------
/single_pulse_ml/run_single_pulse_DL.py:
--------------------------------------------------------------------------------
1 | """ Script to train and test or multiple deep
2 | neural networks. Input models are expected to be
3 | sequential keras model saved as an .hdf5 file.
4 | Input data are .hdf5 files with data sets
5 | 'labels', 'data_freq_time', 'data_dm_time'.
6 | They can be read by single_pulse_ml.reader.read_hdf5
7 |
8 | """
9 | import sys
10 |
11 | import numpy as np
12 | import time
13 | import h5py
14 |
15 | #from single_pulse_ml import reader
16 | #from single_pulse_ml import frbkeras
17 | #from single_pulse_ml import plot_tools
18 |
19 | import reader
20 | import frbkeras
21 | import plot_tools
22 |
23 | try:
24 | import matplotlib
25 | matplotlib.use('Agg')
26 |
27 | import matplotlib.pyplot as plt
28 | from matplotlib import gridspec
29 | print("Worked")
30 | except:
31 | "Didn't work"
32 | pass
33 |
34 | FREQTIME=True # train 2D frequency-time CNN
35 | TIME1D=False # train 1D pulse-profile CNN
36 | DMTIME=False # train 2D DM-time CNN
37 | MULTIBEAM=False # train feed-forward NN on simulated multibeam data
38 |
39 | # If True, the different nets will be clipped after
40 | # feature extraction layers and will not be compiled / fit
41 | MERGE=False
42 |
43 | MK_PLOT=False
44 | CLASSIFY_ONLY=False
45 | save_classification=True
46 | model_nm = "./model/model_name"
47 | prob_threshold = 0.0
48 |
49 | ## Input hdf5 file.
50 | fn = './data/input_data.hdf5'
51 |
52 | # Save tf model as .hdf5
53 | save_model = True
54 | fnout = "./model/model_out_name"
55 |
56 | NDM=300 # number of DMs in input array
57 | WIDTH=64 # width to use of arrays along time axis
58 | train_size=0.5 # fraction of dataset to train on
59 |
60 | ftype = fn.split('.')[-1]
61 |
62 | # Create empty lists for final merged model
63 | model_list = []
64 | train_data_list = []
65 | eval_data_list = []
66 |
67 | # Configure the accuracy metric for evaluation
68 | metrics = ["accuracy", "precision", "false_negatives", "recall"]
69 |
70 | if __name__=='__main__':
71 | # read in time-freq data, labels, dm-time data
72 | data_freq, y, data_dm, data_mb = reader.read_hdf5(fn)
73 | NTRIGGER = len(y)
74 |
75 | print("Using %s" % fn)
76 |
77 | NFREQ = data_freq.shape[1]
78 | NTIME = data_freq.shape[2]
79 |
80 | # low time index, high time index
81 | tl, th = NTIME//2-WIDTH//2, NTIME//2+WIDTH//2
82 |
83 | if data_freq.shape[-1] > (th-tl):
84 | data_freq = data_freq[..., tl:th]
85 |
86 | dshape = data_freq.shape
87 |
88 | # normalize data
89 | data_freq = data_freq.reshape(len(data_freq), -1)
90 | data_freq -= np.median(data_freq, axis=-1)[:, None]
91 | data_freq /= np.std(data_freq, axis=-1)[:, None]
92 |
93 | # zero out nans
94 | data_freq[data_freq!=data_freq] = 0.0
95 | data_freq = data_freq.reshape(dshape)
96 |
97 | if DMTIME is True:
98 | if data_dm.shape[-1] > (th-tl):
99 | data_dm = data_dm[:, :, tl:th]
100 |
101 | if data_dm.shape[-2] > 100:
102 | data_dm = data_dm[:, NDM//2-50:NDM//2+50]
103 |
104 | # tf/keras expects 4D tensors
105 | data_dm = data_dm[..., None]
106 |
107 | if TIME1D is True:
108 | data_1d = data_freq.mean(1)[..., None]
109 | from scipy.signal import detrend
110 | data_1d = detrend(data_1d, axis=1)
111 |
112 | if FREQTIME is True:
113 | # tf/keras expects 4D tensors
114 | data_freq = data_freq[..., None]
115 |
116 | if CLASSIFY_ONLY is False:
117 | # total number of triggers
118 | NTRIGGER = len(y)
119 |
120 | # fraction of true positives vs. total triggers
121 | TP_FRAC = np.float(y.sum()) / NTRIGGER
122 |
123 | # number of events on which to train
124 | NTRAIN = int(train_size * NTRIGGER)
125 |
126 | ind = np.arange(NTRIGGER)
127 | np.random.shuffle(ind)
128 |
129 | ind_train = ind[:NTRAIN]
130 | ind_eval = ind[NTRAIN:]
131 |
132 | train_labels, eval_labels = y[ind_train], y[ind_eval]
133 |
134 | # Convert labels (integers) to binary class matrix
135 | train_labels = frbkeras.keras.utils.to_categorical(train_labels)
136 | eval_labels = frbkeras.keras.utils.to_categorical(eval_labels)
137 |
138 |
139 | if FREQTIME is True:
140 |
141 | if CLASSIFY_ONLY is True:
142 | print("Classifying freq-time data")
143 | model_freq_time_nm = model_nm + 'freq_time.hdf5'
144 | eval_data_list.append(data_freq)
145 |
146 | model_freq_time = frbkeras.load_model(model_freq_time_nm)
147 | y_pred_prob = model_freq_time.predict(data_freq)
148 | y_pred_prob = y_pred_prob[:,1]
149 | y_pred_freq_time = np.round(y_pred_prob)
150 |
151 | ind_frb = np.where(y_pred_prob>prob_threshold)[0]
152 |
153 | frbkeras.print_metric(y, y_pred_freq_time)
154 |
155 | print("\n%d out of %d events with probability > %.2f:\n %s" %
156 | (len(ind_frb), len(y_pred_prob),
157 | prob_threshold, ind_frb))
158 |
159 | low_to_high_ind = np.argsort(y_pred_prob)
160 | fnout_ranked = fn.rstrip('.hdf5') + 'freq_time_candidates.hdf5'
161 |
162 | eval_data_freq = data_freq #hack
163 | eval_labels = y
164 |
165 | if MK_PLOT is True:
166 | plot_tools.plot_ranked_trigger(data_freq[..., 0],
167 | y_pred_prob[:, None], h=5, w=5, ascending=False,
168 | outname='out')
169 |
170 | print("\nSaved them and all probabilities to: \n%s" % fnout_ranked)
171 | else:
172 | print("Learning frequency-time array")
173 |
174 | # split up data into training and evaluation sets
175 | train_data_freq, eval_data_freq = data_freq[ind_train], data_freq[ind_eval]
176 |
177 | # Build and train 2D CNN
178 | model_freq_time, score_freq_time = frbkeras.construct_conv2d(
179 | features_only=MERGE, fit=True,
180 | train_data=train_data_freq, eval_data=eval_data_freq,
181 | train_labels=train_labels, eval_labels=eval_labels,
182 | epochs=5, nfilt1=32, nfilt2=64,
183 | nfreq=NFREQ, ntime=WIDTH)
184 |
185 | model_list.append(model_freq_time)
186 | train_data_list.append(train_data_freq)
187 | eval_data_list.append(eval_data_freq)
188 |
189 | if save_model is True:
190 | if MERGE is True:
191 | fnout_freqtime = fnout+'freq_time_features.hdf5'
192 | else:
193 | fnout_freqtime = fnout + 'freq_time.hdf5'
194 | model_freq_time.save(fnout_freqtime)
195 | print("Saving freq-time model to: %s" % fnout_freqtime)
196 |
197 | fnout_ranked = fn.rstrip('.hdf5') + 'freq_time_candidates.hdf5'
198 | y_pred_prob = model_freq_time.predict(eval_data_freq)
199 | y_pred_prob = y_pred_prob[:,1]
200 | ind_frb = np.where(y_pred_prob>prob_threshold)[0]
201 |
202 | if save_classification is True:
203 | fnout_ranked = fn.rstrip('.hdf5') + 'freq_time_candidates.hdf5'
204 | g = h5py.File(fnout_ranked, 'w')
205 | g.create_dataset('data_frb_candidate', data=eval_data_freq)
206 | g.create_dataset('frb_index', data=ind_frb)
207 | g.create_dataset('probability', data=y_pred_prob)
208 | g.create_dataset('labels', data=eval_labels)
209 | g.close()
210 | print("\nSaved classification results to: \n%s" % fnout_ranked)
211 |
212 | if DMTIME is True:
213 |
214 | if CLASSIFY_ONLY is True:
215 | print("Classifying dm-time data")
216 |
217 | model_dm_time_nm = model_nm + 'dm_time.hdf5'
218 | eval_data_list.append(data_dm)
219 |
220 | model_dm_time = frbkeras.load_model(model_dm_time_nm)
221 | y_pred_prob = model_dm_time.predict(data_dm)
222 | y_pred_dm_time = np.round(y_pred_prob[:,1])
223 |
224 | eval_data_dm = data_dm #hack
225 | eval_labels = y
226 | mistakes = np.where(y_pred_dm_time!=y)[0]
227 | print("\nMistakes: %s" % mistakes)
228 |
229 | frbkeras.print_metric(y, y_pred_dm_time)
230 | print("")
231 | else:
232 | print("Learning DM-time array")
233 | # split up data into training and evaluation sets
234 | train_data_dm, eval_data_dm = data_dm[ind_train], data_dm[ind_eval]
235 |
236 | # split up data into training and evaluation sets
237 | train_data_dm, eval_data_dm = data_dm[ind_train], data_dm[ind_eval]
238 |
239 | # Build and train 2D CNN
240 | model_dm_time, score_dm_time = frbkeras.construct_conv2d(
241 | features_only=MERGE, fit=True,
242 | train_data=train_data_dm, eval_data=eval_data_dm,
243 | train_labels=train_labels, eval_labels=eval_labels,
244 | epochs=5, nfilt1=32, nfilt2=64,
245 | nfreq=NDM, ntime=WIDTH)
246 |
247 | model_list.append(model_dm_time)
248 | train_data_list.append(train_data_dm)
249 | eval_data_list.append(eval_data_dm)
250 |
251 | if save_model is True:
252 | if MERGE is True:
253 | fnout_dmtime = fnout+'dm_time_features.hdf5'
254 | else:
255 | fnout_dmtime = fnout+'dm_time.hdf5'
256 | model_dm_time.save(fnout_dmtime)
257 | print("Saving dm-time model to: %s" % fnout_dmtime)
258 |
259 | if save_classification is True:
260 | fnout_ranked = fn.rstrip('.hdf5') + 'dm_time_candidates.hdf5'
261 | g = h5py.File(fnout_ranked, 'w')
262 | g.create_dataset('data_frb_candidate', data=eval_data_dm)
263 | g.create_dataset('frb_index', data=ind_frb)
264 | g.create_dataset('probability', data=y_pred_prob)
265 | g.create_dataset('labels', data=eval_labels)
266 | g.close()
267 | print("\nSaved classification results to: \n%s" % fnout_ranked)
268 |
269 | if TIME1D is True:
270 |
271 | if CLASSIFY_ONLY is True:
272 | print("Classifying pulse profile")
273 |
274 | model_time_nm = model_nm + '1d_time.hdf5'
275 | eval_data_list.append(data_1d)
276 |
277 | model_1d_time = frbkeras.load_model(model_time_nm)
278 | y_pred_prob = model_1d_time.predict(data_1d)
279 | y_pred_time = np.round(y_pred_prob[:,1])
280 | ind_frb = np.where(y_pred_prob>prob_threshold)[0]
281 |
282 | eval_data_1d = data_1d #hack
283 | eval_labels = y
284 |
285 | print("\nMistakes: %s" % np.where(y_pred_time!=y)[0])
286 |
287 | frbkeras.print_metric(y, y_pred_time)
288 | print("")
289 | else:
290 | print("Learning pulse profile")
291 | # split up data into training and evaluation sets
292 | train_data_1d, eval_data_1d = data_1d[ind_train], data_1d[ind_eval]
293 |
294 | # Build and train 1D CNN
295 | model_1d_time, score_1d_time = frbkeras.construct_conv1d(
296 | features_only=MERGE, fit=True,
297 | train_data=train_data_1d, eval_data=eval_data_1d,
298 | train_labels=train_labels, eval_labels=eval_labels,
299 | nfilt1=64, nfilt2=128)
300 |
301 | model_list.append(model_1d_time)
302 | train_data_list.append(train_data_1d)
303 | eval_data_list.append(eval_data_1d)
304 |
305 | if save_model is True:
306 | if MERGE is True:
307 | fnout_1dtime = fnout+'1d_time_features.hdf5'
308 | else:
309 | fnout_1dtime = fnout+'1d_time.hdf5'
310 | model_1d_time.save(fnout_1dtime)
311 | print("Saving 1d-time model to: %s" % fnout_1dtime)
312 |
313 | y_pred_prob = model_1d_time.predict(eval_data_1d)
314 | y_pred_prob = y_pred_prob[:,1]
315 | ind_frb = np.where(y_pred_prob>prob_threshold)[0]
316 |
317 | if save_classification is True:
318 | fnout_ranked = fn.rstrip('.hdf5') + '1d_time_candidates.hdf5'
319 | g = h5py.File(fnout_ranked, 'w')
320 | g.create_dataset('data_frb_candidate', data=eval_data_1d)
321 | g.create_dataset('frb_index', data=ind_frb)
322 | g.create_dataset('probability', data=y_pred_prob)
323 | g.create_dataset('labels', data=eval_labels)
324 | g.close()
325 | print("\nSaved classification results to: \n%s" % fnout_ranked)
326 |
327 | if MULTIBEAM is True:
328 |
329 | if CLASSIFY_ONLY is True:
330 | print("Classifying multibeam SNR")
331 |
332 | model_multibeam_nm = model_nm + '_multibeam.hdf5'
333 | eval_data_list.append(data_mb)
334 |
335 | model_1d_multibeam = frbkeras.load_model(model_multibeam_nm)
336 | y_pred_prob = model_1d_multibeam.predict(data_mb)
337 | y_pred_time = np.round(y_pred_prob[:,1])
338 |
339 | print("\nMistakes: %s" % np.where(y_pred_time!=y)[0])
340 |
341 | frbkeras.print_metric(y, y_pred_time)
342 | print("")
343 | else:
344 | print("Learning multibeam data")
345 |
346 | # Right now just simulate multibeam, simulate S/N per beam.
347 | import simulate_multibeam as sm
348 |
349 | nbeam = 40
350 | # Simulate a multibeam dataset
351 | data_mb, labels_mb = sm.make_multibeam_data(ntrigger=NTRIGGER)
352 |
353 | data_mb_fp = data_mb[labels_mb[:,1]==0]
354 | data_mb_tp = data_mb[labels_mb[:,1]==1]
355 |
356 | train_data_mb = np.zeros([NTRAIN, nbeam])
357 | eval_data_mb = np.zeros([NTRIGGER-NTRAIN, nbeam])
358 |
359 | data_ = np.empty_like(data_mb)
360 | labels_ = np.empty_like(labels_mb)
361 |
362 | kk, ll = 0, 0
363 | for ii in range(NTRAIN):
364 | if train_labels[ii,1]==0:
365 | train_data_mb[ii] = data_mb_fp[kk]
366 | kk+=1
367 | elif train_labels[ii,1]==1:
368 | train_data_mb[ii] = data_mb_tp[ll]
369 | ll+=1
370 |
371 | for ii in range(NTRIGGER-NTRAIN):
372 | if eval_labels[ii,1]==0:
373 | eval_data_mb[ii] = data_mb_fp[kk]
374 | kk+=1
375 | elif eval_labels[ii,1]==1:
376 | eval_data_mb[ii] = data_mb_tp[ll]
377 | ll+=1
378 |
379 | model_mb, score_mb = frbkeras.construct_ff1d(
380 | features_only=MERGE, fit=True,
381 | train_data=train_data_mb,
382 | train_labels=train_labels,
383 | eval_data=eval_data_mb,
384 | eval_labels=eval_labels,
385 | nbeam=nbeam, epochs=5,
386 | nlayer1=32, nlayer2=32, batch_size=32)
387 |
388 | model_list.append(model_mb)
389 | train_data_list.append(train_data_mb)
390 | eval_data_list.append(eval_data_mb)
391 |
392 | if save_model is True:
393 | if MERGE is True:
394 | fnout_mb = fnout+'_multibeam_features.hdf5'
395 | else:
396 | fnout_mb = fnout+'_multibeam.hdf5'
397 | model_mb.save(fnout_mb)
398 |
399 | fnout_ranked = fn.rstrip('.hdf5') + 'multibeam_candidates.hdf5'
400 | y_pred_prob = model_mb.predict(eval_data_mb)
401 | y_pred_prob = y_pred_prob[:,1]
402 | ind_frb = np.where(y_pred_prob>prob_threshold)[0]
403 |
404 | print(fnout_ranked)
405 | print(eval_data_mb.shape)
406 |
407 | g = h5py.File(fnout_ranked, 'w')
408 | g.create_dataset('data_frb_candidate', data=eval_data_mb)
409 | g.create_dataset('frb_index', data=ind_frb)
410 | g.create_dataset('probability', data=y_pred_prob)
411 | g.create_dataset('labels', data=eval_labels)
412 | g.close()
413 |
414 |
415 | if len(model_list)==1:
416 | score = model_list[0].evaluate(eval_data_list[0], eval_labels, batch_size=32)
417 | prob, predictions, mistakes = frbkeras.get_predictions(
418 | model_list[0], eval_data_list[0],
419 | true_labels=eval_labels)
420 | print(mistakes)
421 | print("" % score)
422 |
423 | elif MERGE is True:
424 |
425 | if CLASSIFY_ONLY is True:
426 | print("Classifying merged model")
427 | model_merged_nm = model_nm + '_merged.hdf5'
428 |
429 | model_merged = frbkeras.load_model(model_merged_nm)
430 | y_pred_prob = model_merged.predict(data_list)
431 | y_pred = np.round(y_pred_prob[:,1])
432 |
433 | print("Mistakes: %s" % np.where(y_pred!=y)[0])
434 | frbkeras.print_metric(y, y_pred)
435 | print("")
436 | else:
437 |
438 | print("\n=================================")
439 | print(" Merging & training %d models" % len(model_list))
440 | print("=================================\n")
441 |
442 | model, score = frbkeras.merge_models(
443 | model_list, train_data_list,
444 | train_labels, eval_data_list, eval_labels,
445 | epochs=5)
446 |
447 | prob, predictions, mistakes = frbkeras.get_predictions(
448 | model, eval_data_list,
449 | true_labels=eval_labels[:, 1])
450 |
451 |
452 | if save_model is True:
453 | fnout_merged = fnout+'_merged.hdf5'
454 | model.save(fnout_merged)
455 |
456 | print("\nMerged NN accuracy: %f" % score[1])
457 | print("\nIndex of mistakes: %s\n" % mistakes)
458 | frbkeras.print_metric(eval_labels[:, 1], predictions)
459 |
460 | if CLASSIFY_ONLY is False:
461 | print('\n==========Results==========')
462 | try:
463 | print("\nFreq-time accuracy:\n--------------------")
464 | y_pred_prob = model_freq_time.predict(eval_data_freq)
465 | y_pred = np.round(y_pred_prob[:,1])
466 | tfreq_acc, tfreq_prec, tfreq_rec, tfreq_f = frbkeras.print_metric(eval_labels[:,1], y_pred)
467 |
468 | mistakes_freq = np.where(y_pred!=eval_labels[:,1])[0]
469 | print("\nMistakes: %s" % mistakes_freq)
470 | except:
471 | pass
472 | try:
473 | print("\nDM-time accuracy:\n--------------------")
474 | y_pred_prob = model_dm_time.predict(eval_data_dm)
475 | y_pred = np.round(y_pred_prob[:,1])
476 | dm_acc, dm_prec, dm_rec, dm_f = frbkeras.print_metric(eval_labels[:,1], y_pred)
477 |
478 | mistakes_dm = np.where(y_pred!=eval_labels[:,1])[0]
479 | # np.save('data_dm_mistakes', eval_data_dm[mistakes])
480 | print("\nMistakes: %s" % mistakes_dm)
481 | except:
482 | pass
483 | try:
484 | print("\nPulse-profile Results:\n--------------------")
485 | y_pred_prob = model_1d_time.predict(eval_data_1d)
486 | y_pred = np.round(y_pred_prob[:,1])
487 | pp_acc, pp_prec, pp_rec, pp_f = frbkeras.print_metric(eval_labels[:,1], y_pred)
488 |
489 | mistakes_1d = np.where(y_pred!=eval_labels[:,1])[0]
490 | # np.save('data_1d_mistakes', eval_1d_dm[mistakes])
491 | print("\nMistakes: %s" % mistakes_1d)
492 | except:
493 | pass
494 | try:
495 | print("\nMultibeam Results:\n--------------------")
496 | y_pred_prob = model_mb.predict(eval_data_mb)
497 | y_pred = np.round(y_pred_prob[:,1])
498 | mb_acc, mb_prec, mb_rec, mb_f = frbkeras.print_metric(eval_labels[:,1], y_pred)
499 | print("\nMistakes: %s" % np.where(y_pred!=eval_labels[:,1])[0])
500 | except:
501 | pass
502 |
503 |
504 |
505 |
506 |
507 |
508 |
--------------------------------------------------------------------------------
/single_pulse_ml/sim_parameters.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 |
4 | import numpy as np
5 | import h5py
6 |
7 | class SimParams:
8 |
9 | def __init__(self, dm=(-0.01, 0.01), fluence=(0.1, 0.3),
10 | width=(3*0.0016, 0.75), spec_ind=(-3., 3.),
11 | disp_ind=2., scat_factor=(-4., -1.), NRFI=None, NSIM=None,
12 | SNR_MIN=10., SNR_MAX=100., out_file_name=None,
13 | NTIME=250, NFREQ=16, mk_plot=False, NSIDE=8):
14 |
15 | self._dm = dm
16 | self._fluence = fluence
17 | self._width = width
18 | self._spec_ind = spec_ind
19 | self._disp_ind = disp_ind
20 | self._scat_factor = scat_factor
21 |
22 | self._SNR_MIN = SNR_MIN
23 | self._SNR_MAX = SNR_MAX
24 | self._NTIME = NTIME
25 | self._NFREQ = NFREQ
26 | self._out_file_name = out_file_name
27 |
28 | self._NRFI = NRFI
29 | self._NSIM = NSIM
30 | self.data_rfi = None
31 | self.y = None # FP labels
32 |
33 | self._mk_plot = mk_plot
34 | self._NSIDE = NSIDE
35 |
36 | def generate_noise(self):
37 | y = np.zeros([self._NRFI])
38 | noise = np.random.normal(0, 1, self._NRFI*self._NTIME*self._NFREQ)
39 | noise = noise.reshape(-1, self._NFREQ*self._NTIME)
40 | self._NSIM = self._NRFI
41 |
42 | return noise, y
43 |
44 | def get_false_positives(self, fn):
45 |
46 | ftype = fn.split('.')[-1]
47 |
48 | if ftype in ('hdf5', 'h5'):
49 | f = h5py.File(fn)
50 | data_rfi = f['data_freq_time'][:]
51 | data_rfi = data_rfi.reshape(len(data_rfi), -1)
52 | y = f['labels'][:]
53 | elif ftype in ('npy',):
54 | f_rfi = np.load(fn)
55 | # Important step! Need to scramble RFI triggers.
56 | np.random.shuffle(f_rfi)
57 | # Read in data array and labels from RFI file
58 | data_rfi, y = f_rfi[:, :-1], f_rfi[:, -1]
59 | else:
60 | return
61 |
62 | if self._NRFI is not None:
63 | if self._NSIM is None:
64 | self._NSIM = self._NRFI
65 |
66 | self.data_rfi = data_rfi[:self._NRFI]
67 | self.y = y[:self._NRFI]
68 | else:
69 | self._NRFI = len(y)
70 | self._NSIM = self._NRFI
71 | self.data_rfi = data_rfi[:self._NSIM]
72 | self.y = y[:self._NSIM]
73 |
74 | return data_rfi, y
75 |
76 | def write_sim_data(self, data_freq_time, labels, fnout,
77 | data_dm_time=None, params=None, snr=None,
78 | ):
79 |
80 | ftype = fnout.split('.')[-1]
81 |
82 | if os.path.exists(fnout):
83 | t0_str = time.strftime("_%Y_%m_%d_%H:%M:%S", time.gmtime())
84 | fnout = fnout.split(ftype)[0][:-1] + t0_str + '.' + ftype
85 |
86 | if ftype in ('hdf5', 'h5'):
87 |
88 | f = h5py.File(fnout)
89 | f.create_dataset('data_freq_time', data=data_freq_time)
90 | f.create_dataset('labels', data=labels)
91 |
92 | if data_dm_time is not None:
93 | f.create_dataset('data_dm_time', data=data_dm_time)
94 | if params is not None:
95 | f.create_dataset('params', data=params)
96 | if snr is not None:
97 | f.create_dataset('snr', data=snr)
98 |
99 | f.close()
100 |
101 | elif ftype in ('npy'):
102 | np.save(fnout, data)
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
--------------------------------------------------------------------------------
/single_pulse_ml/simulate_frb.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import numpy as np
4 | import glob
5 | from scipy import signal
6 |
7 | try:
8 | import matplotlib
9 | matplotlib.use('Agg')
10 | import matplotlib.pyplot as plt
11 | except:
12 | plt = None
13 | pass
14 |
15 | from single_pulse_ml import reader
16 | from single_pulse_ml import dataproc
17 | from single_pulse_ml import tools
18 |
19 | try:
20 | from single_pulse_ml import plot_tools
21 | except:
22 | plot_tools = None
23 |
24 |
25 | class Event(object):
26 | """ Class to generate a realistic fast radio burst and
27 | add the event to data, including scintillation, temporal
28 | scattering, spectral index variation, and DM smearing.
29 |
30 | This class was expanded from real-time FRB injection
31 | in Kiyoshi Masui's
32 | https://github.com/kiyo-masui/burst\_search
33 | """
34 | def __init__(self, t_ref, f_ref, dm, fluence, width,
35 | spec_ind, disp_ind=2, scat_factor=0):
36 | self._t_ref = t_ref
37 | self._f_ref = f_ref
38 | self._dm = dm
39 | self._fluence = fluence
40 | self._width = width
41 | self._spec_ind = spec_ind
42 | self._disp_ind = disp_ind
43 | self._scat_factor = min(1, scat_factor + 1e-18) # quick bug fix hack
44 |
45 | def disp_delay(self, f, _dm, _disp_ind=-2.):
46 | """ Calculate dispersion delay in seconds for
47 | frequency,f, in MHz, _dm in pc cm**-3, and
48 | a dispersion index, _disp_ind.
49 | """
50 | return 4.148808e3 * _dm * (f**(-_disp_ind))
51 |
52 | def arrival_time(self, f):
53 | t = self.disp_delay(f, self._dm, self._disp_ind)
54 | t = t - self.disp_delay(self._f_ref, self._dm, self._disp_ind)
55 | return self._t_ref + t
56 |
57 | def calc_width(self, dm, freq_c, bw=400.0, NFREQ=1024,
58 | ti=0.001, tsamp=0.001, tau=0):
59 | """ Calculated effective width of pulse
60 | including DM smearing, sample time, etc.
61 | Input/output times are in seconds.
62 | """
63 |
64 | ti *= 1e3
65 | tsamp *= 1e3
66 | delta_freq = bw/NFREQ
67 |
68 | # taudm in milliseconds
69 | tdm = 8.3e-3 * dm * delta_freq / freq_c**3
70 | tI = np.sqrt(ti**2 + tsamp**2 + tdm**2 + tau**2)
71 |
72 | return 1e-3*tI
73 |
74 | def dm_smear(self, DM, freq_c, bw=400.0, NFREQ=1024,
75 | ti=1, tsamp=0.0016, tau=0):
76 | """ Calculate DM smearing SNR reduction
77 | """
78 | tau *= 1e3 # make ms
79 | ti *= 1e3
80 | tsamp *= 1e3
81 |
82 | delta_freq = bw / NFREQ
83 |
84 | tI = np.sqrt(ti**2 + tsamp**2 + (8.3 * DM * delta_freq / freq_c**3)**2)
85 |
86 | return (np.sqrt(ti**2 + tau**2) / tI)**0.5
87 |
88 | def scintillation(self, freq):
89 | """ Include spectral scintillation across
90 | the band. Approximate effect as a sinusoid,
91 | with a random phase and a random decorrelation
92 | bandwidth.
93 | """
94 | # Make location of peaks / troughs random
95 | scint_phi = np.random.rand()
96 | f = np.linspace(0, 1, len(freq))
97 |
98 | # Make number of scintils between 0 and 10 (ish)
99 | nscint = np.exp(np.random.uniform(np.log(1e-3), np.log(7)))
100 |
101 | if nscint<1:
102 | nscint = 0
103 | # envelope = np.cos(nscint*(freq - self._f_ref)/self._f_ref + scint_phi)
104 | envelope = np.cos(2*np.pi*nscint*freq**-2/self._f_ref**-2 + scint_phi)
105 | envelope[envelope<0] = 0
106 | return envelope
107 |
108 | def gaussian_profile(self, nt, width, t0=0.):
109 | """ Use a normalized Gaussian window for the pulse,
110 | rather than a boxcar.
111 | """
112 | t = np.linspace(-nt//2, nt//2, nt)
113 | g = np.exp(-(t-t0)**2 / width**2)
114 |
115 | if not np.all(g > 0):
116 | g += 1e-18
117 |
118 | g /= g.max()
119 |
120 | return g
121 |
122 | def scat_profile(self, nt, f, tau=1.):
123 | """ Include exponential scattering profile.
124 | """
125 | tau_nu = tau * (f / self._f_ref)**-4.
126 | t = np.linspace(0., nt//2, nt)
127 |
128 | prof = 1 / tau_nu * np.exp(-t / tau_nu)
129 | return prof / prof.max()
130 |
131 | def pulse_profile(self, nt, width, f, tau=100., t0=0.):
132 | """ Convolve the gaussian and scattering profiles
133 | for final pulse shape at each frequency channel.
134 | """
135 | gaus_prof = self.gaussian_profile(nt, width, t0=t0)
136 | scat_prof = self.scat_profile(nt, f, tau)
137 | # pulse_prof = np.convolve(gaus_prof, scat_prof, mode='full')[:nt]
138 | pulse_prof = signal.fftconvolve(gaus_prof, scat_prof)[:nt]
139 |
140 | return pulse_prof
141 |
142 | def add_to_data(self, delta_t, freq, data, scintillate=True):
143 | """ Method to add already-dedispersed pulse
144 | to background noise data. Includes frequency-dependent
145 | width (smearing, scattering, etc.) and amplitude
146 | (scintillation, spectral index).
147 | """
148 |
149 | NFREQ = data.shape[0]
150 | NTIME = data.shape[1]
151 | tmid = NTIME//2
152 |
153 | scint_amp = self.scintillation(freq)
154 | self._fluence /= np.sqrt(NFREQ)
155 | stds = np.std(data)
156 | roll_ind = int(np.random.normal(0, 2))
157 |
158 | for ii, f in enumerate(freq):
159 | width_ = self.calc_width(self._dm, self._f_ref*1e-3,
160 | bw=400.0, NFREQ=NFREQ,
161 | ti=self._width, tsamp=delta_t, tau=0)
162 |
163 | index_width = max(1, (np.round((width_/ delta_t))).astype(int))
164 | tpix = int(self.arrival_time(f) / delta_t)
165 |
166 | if abs(tpix) >= tmid:
167 | # ensure that edges of data are not crossed
168 | continue
169 |
170 | pp = self.pulse_profile(NTIME, index_width, f,
171 | tau=self._scat_factor, t0=tpix)
172 | val = pp.copy()
173 | val /= (val.max()*stds)
174 | val *= self._fluence
175 | val /= (width_ / delta_t)
176 | val = val * (f / self._f_ref) ** self._spec_ind
177 |
178 | if scintillate is True:
179 | val = (0.1 + scint_amp[ii]) * val
180 |
181 | data[ii] += val
182 | data[ii] = np.roll(data[ii], roll_ind)
183 |
184 | def dm_transform(self, delta_t, data, freq, maxdm=5.0, NDM=50):
185 | """ Transform freq/time data to dm/time data.
186 | """
187 |
188 | if len(freq)<3:
189 | NFREQ = data.shape[0]
190 | freq = np.linspace(freq[0], freq[1], NFREQ)
191 |
192 | dm = np.linspace(-maxdm, maxdm, NDM)
193 | ndm = len(dm)
194 | ntime = data.shape[-1]
195 |
196 | data_full = np.zeros([ndm, ntime])
197 |
198 | for ii, dm in enumerate(dm):
199 | for jj, f in enumerate(freq):
200 | self._dm = dm
201 | tpix = int(self.arrival_time(f) / delta_t)
202 | data_rot = np.roll(data[jj], tpix, axis=-1)
203 | data_full[ii] += data_rot
204 |
205 | return data_full
206 |
207 | class EventSimulator():
208 | """Generates simulated fast radio bursts.
209 | Events occurrences are drawn from a Poissonian distribution.
210 |
211 |
212 | This class was expanded from real-time FRB injection
213 | in Kiyoshi Masui's
214 | https://github.com/kiyo-masui/burst\_search
215 | """
216 |
217 | def __init__(self, dm=(0.,2000.), fluence=(0.03,0.3),
218 | width=(2*0.0016, 1.), spec_ind=(-4.,4),
219 | disp_ind=2., scat_factor=(0, 0.5), freq=(800., 400.)):
220 | """
221 | Parameters
222 | ----------
223 | datasource : datasource.DataSource object
224 | Source of the data, specifying the data rate and band parameters.
225 | dm : float or pair of floats
226 | Burst dispersion measure or dispersion measure range (pc cm^-2).
227 | fluence : float or pair of floats
228 | Burst fluence (at band centre) or fluence range (s).
229 | width : float or pair of floats.
230 | Burst width or width range (s).
231 | spec_ind : float or pair of floats.
232 | Burst spectral index or spectral index range.
233 | disp_ind : float or pair of floats.
234 | Burst dispersion index or dispersion index range.
235 | freq : tuple
236 | Min and max of frequency range in MHz. Assumes low freq
237 | is first freq in array, not necessarily the lowest value.
238 |
239 | """
240 |
241 | self.width = width
242 | self.freq_low = freq[0]
243 | self.freq_up = freq[1]
244 |
245 | if hasattr(dm, '__iter__') and len(dm) == 2:
246 | self._dm = tuple(dm)
247 | else:
248 | self._dm = (float(dm), float(dm))
249 | if hasattr(fluence, '__iter__') and len(fluence) == 2:
250 | fluence = (fluence[1]**-1, fluence[0]**-1)
251 | self._fluence = tuple(fluence)
252 | else:
253 | self._fluence = (float(fluence)**-1, float(fluence)**-1)
254 | if hasattr(width, '__iter__') and len(width) == 2:
255 | self._width = tuple(width)
256 | else:
257 | self._width = (float(width), float(width))
258 | if hasattr(spec_ind, '__iter__') and len(spec_ind) == 2:
259 | self._spec_ind = tuple(spec_ind)
260 | else:
261 | self._spec_ind = (float(spec_ind), float(spec_ind))
262 | if hasattr(disp_ind, '__iter__') and len(disp_ind) == 2:
263 | self._disp_ind = tuple(disp_ind)
264 | else:
265 | self._disp_ind = (float(disp_ind), float(disp_ind))
266 | if hasattr(scat_factor, '__iter__') and len(scat_factor) == 2:
267 | self._scat_factor = tuple(scat_factor)
268 | else:
269 | self._scat_factor = (float(scat_factor), float(scat_factor))
270 |
271 | # self._freq = datasource.freq
272 | # self._delta_t = datasource.delta_t
273 |
274 | self._freq = np.linspace(self.freq_low, self.freq_up, 256) # tel parameter
275 |
276 | def draw_event_parameters(self):
277 | dm = uniform_range(*self._dm)
278 | fluence = uniform_range(*self._fluence)**(-2/3.)
279 | # Convert to Jy ms from Jy s
280 | fluence *= 1e3*self._fluence[0]**(-2/3.)
281 | spec_ind = uniform_range(*self._spec_ind)
282 | disp_ind = uniform_range(*self._disp_ind)
283 | # turn this into a log uniform dist. Note not *that* many
284 | # FRBs have been significantly scattered. Should maybe turn this
285 | # knob down.
286 | scat_factor = np.exp(np.random.uniform(*self._scat_factor))
287 | # change width from uniform to lognormal
288 | width = np.random.lognormal(np.log(self._width[0]), self._width[1])
289 | width = max(min(width, 100*self._width[0]), 0.5*self._width[0])
290 | return dm, fluence, width, spec_ind, disp_ind, scat_factor
291 |
292 | def uniform_range(min_, max_):
293 | return random.uniform(min_, max_)
294 |
295 |
296 | def gen_simulated_frb(NFREQ=16, NTIME=250, sim=True, fluence=(0.03,0.3),
297 | spec_ind=(-4, 4), width=(2*0.0016, 1), dm=(-0.01, 0.01),
298 | scat_factor=(-3, -0.5), background_noise=None, delta_t=0.0016,
299 | plot_burst=False, freq=(800, 400), FREQ_REF=600., scintillate=True,
300 | ):
301 | """ Simulate fast radio bursts using the EventSimulator class.
302 |
303 | Parameters
304 | ----------
305 | NFREQ : np.int
306 | number of frequencies for simulated array
307 | NTIME : np.int
308 | number of times for simulated array
309 | sim : bool
310 | whether or not to simulate FRB or just create noise array
311 | spec_ind : tuple
312 | range of spectral index
313 | width : tuple
314 | range of widths in seconds (atm assumed dt=0.0016)
315 | scat_factor : tuple
316 | range of scattering measure (atm arbitrary units)
317 | background_noise :
318 | if None, simulates white noise. Otherwise should be an array (NFREQ, NTIME)
319 | plot_burst : bool
320 | generates a plot of the simulated burst
321 |
322 | Returns
323 | -------
324 | data : np.array
325 | data array (NFREQ, NTIME)
326 | parameters : tuple
327 | [dm, fluence, width, spec_ind, disp_ind, scat_factor]
328 |
329 | """
330 | plot_burst = False
331 |
332 | # Hard code incoherent Pathfinder data time resolution
333 | # Maybe instead this should take a telescope class, which
334 | # has all of these things already.
335 | t_ref = 0. # hack
336 |
337 | if len(freq) < 3:
338 | freq=np.linspace(freq[0], freq[1], NFREQ)
339 |
340 | if background_noise is None:
341 | # Generate background noise with unit variance
342 | data = np.random.normal(0, 1, NTIME*NFREQ).reshape(NFREQ, NTIME)
343 | else:
344 | data = background_noise
345 |
346 | # What about reading in noisy background?
347 | if sim is False:
348 | return data, []
349 |
350 | # Call class using parameter ranges
351 | ES = EventSimulator(dm=dm, scat_factor=scat_factor, fluence=fluence,
352 | width=width, spec_ind=spec_ind)
353 | # Realize event parameters for a single FRB
354 | dm, fluence, width, spec_ind, disp_ind, scat_factor = ES.draw_event_parameters()
355 | # Create event class with those parameters
356 | E = Event(t_ref, FREQ_REF, dm, 10e-4*fluence,
357 | width, spec_ind, disp_ind, scat_factor)
358 | # Add FRB to data array
359 | data -= np.median(data)
360 | data /= np.std(data)
361 |
362 | E.add_to_data(delta_t, freq, data, scintillate=scintillate)
363 |
364 | if plot_burst:
365 | subplot(211)
366 | imshow(data.reshape(-1, NTIME), aspect='auto',
367 | interpolation='nearest', vmin=0, vmax=10)
368 | subplot(313)
369 | plot(data.reshape(-1, ntime).mean(0))
370 |
371 | return data, [dm, fluence, width, spec_ind, disp_ind, scat_factor]
372 |
373 |
374 | def inject_in_filterbank_background(fn_fil):
375 | """ Inject an FRB in each chunk of data
376 | at random times. Default params are for Apertif data.
377 | """
378 |
379 | chunksize = 5e5
380 | ii=0
381 |
382 | data_full =[]
383 | nchunks = 250
384 | nfrb_chunk = 8
385 | chunksize = 2**16
386 |
387 | for ii in range(nchunks):
388 | downsamp = 2**((np.random.rand(nfrb_chunk)*6).astype(int))
389 |
390 | try:
391 | # drop FRB in random location in data chunk
392 | rawdatafile = filterbank.filterbank(fn_fil)
393 | dt = rawdatafile.header['tsamp']
394 | freq_up = rawdatafile.header['fch1']
395 | nfreq = rawdatafile.header['nchans']
396 | freq_low = freq_up + nfreq*rawdatafile.header['foff']
397 | data = rawdatafile.get_spectra(ii*chunksize, chunksize)
398 | except:
399 | continue
400 |
401 |
402 | #dms = np.random.uniform(50, 750, nfrb_chunk)
403 | dm0 = np.random.uniform(90, 750)
404 | end_width = abs(4e3 * dm0 * (freq_up**-2 - freq_low**-2))
405 | data.dedisperse(dm0)
406 | NFREQ, NT = data.data.shape
407 |
408 | print("Chunk %d with DM=%.1f" % (ii, dm0))
409 | for jj in xrange(nfrb_chunk):
410 | if 8192*(jj+1) > (NT - end_width):
411 | print("Skipping at ", 8192*(jj+1))
412 | continue
413 | data_event = data.data[:, jj*8192:(jj+1)*8192]
414 | data_event = data_event.reshape(NFREQ, -1, downsamp[jj]).mean(-1)
415 | print(data_event.shape)
416 | data_event = data_event.reshape(32, 48, -1).mean(1)
417 |
418 | NTIME = data_event.shape[-1]
419 | data_event = data_event[..., NTIME//2-125:NTIME//2+125]
420 | data_event -= np.mean(data_event, axis=-1, keepdims=True)
421 | data_full.append(data_event)
422 |
423 | data_full = np.concatenate(data_full)
424 | data_full = data_full.reshape(-1, 32, 250)
425 |
426 | np.save('data_250.npy', data_full)
427 |
428 |
429 | def inject_in_filterbank(fn_fil, fn_fil_out, N_FRBs=1,
430 | NFREQ=1536, NTIME=2**15):
431 | """ Inject an FRB in each chunk of data
432 | at random times. Default params are for Apertif data.
433 | """
434 |
435 | chunksize = 5e5
436 | ii=0
437 |
438 | params_full_arr = []
439 |
440 | for ii in xrange(N_FRBs):
441 | start, stop = chunksize*ii, chunksize*(ii+1)
442 | # drop FRB in random location in data chunk
443 | offset = int(np.random.uniform(0.1*chunksize, 0.9*chunksize))
444 |
445 | data, freq, delta_t, header = reader.read_fil_data(fn_fil,
446 | start=start, stop=stop)
447 |
448 | # injected pulse time in seconds since start of file
449 | t0_ind = offset+NTIME//2+chunksize*ii
450 | t0 = t0_ind * delta_t
451 |
452 | if len(data[0])==0:
453 | break
454 |
455 | data_event = (data[offset:offset+NTIME].transpose()).astype(np.float)
456 |
457 | data_event, params = gen_simulated_frb(NFREQ=NFREQ,
458 | NTIME=NTIME, sim=True, fluence=(0.01, 1.),
459 | spec_ind=(-4, 4), width=(delta_t, 2),
460 | dm=(100, 1000), scat_factor=(-4, -0.5),
461 | background_noise=data_event,
462 | delta_t=delta_t, plot_burst=False,
463 | freq=(1550, 1250),
464 | FREQ_REF=1550.)
465 |
466 | params.append(offset)
467 | print("Injecting with DM:%f width: %f offset: %d" %
468 | (params[0], params[2], offset))
469 |
470 | data[offset:offset+NTIME] = data_event.transpose()
471 |
472 | #params_full_arr.append(params)
473 | width = params[2]
474 | downsamp = max(1, int(width/delta_t))
475 |
476 | params_full_arr.append([params[0], 20.0, t0, t0_ind, downsamp])
477 |
478 | if ii==0:
479 | fn_rfi_clean = reader.write_to_fil(data, header, fn_fil_out)
480 | elif ii>0:
481 | fil_obj = reader.filterbank.FilterbankFile(fn_fil_out, mode='readwrite')
482 | fil_obj.append_spectra(data)
483 |
484 | del data
485 |
486 | params_full_arr = np.array(params_full_arr)
487 |
488 | np.savetxt('/home/arts/connor/arts-analysis/simulated.singlepulse', params_full_arr)
489 |
490 | return params_full_arr
491 |
492 | # a, p = gen_simulated_frb(NFREQ=1536, NTIME=2**15, sim=True, fluence=(2),
493 | # spec_ind=(-4, 4), width=(dt), dm=(40.0),
494 | # scat_factor=(-3, -0.5), background_noise=None, delta_t=dt,
495 | # plot_burst=False, freq=(1550, 1250), FREQ_REF=1400.,
496 | # # )
497 |
498 | # a, p = gen_simulated_frb(NFREQ=32, NTIME=250, sim=True, fluence=(5, 100),
499 | # spec_ind=(-4, 4), width=(dt, 1), dm=(-0.1, 0.1),
500 | # scat_factor=(-3, -0.5), background_noise=None, delta_t=dt,
501 | # plot_burst=False, freq=(800, 400), FREQ_REF=600.,
502 | # )
503 |
504 |
505 | def run_full_simulation(sim_obj, tel_obj, mk_plot=False,
506 | fn_rfi='./data/all_RFI_8001.npy',
507 | fn_noise=None,
508 | ftype='hdf5', dm_time_array=True,
509 | outname_tag='', outdir = './data/',
510 | figname='./plots/simulated_frb.pdf'):
511 |
512 | outfn = outdir + "data_nt%d_nf%d_dm%d_snr%d-%d_%s.%s" \
513 | % (sim_obj._NTIME, sim_obj._NFREQ,
514 | round(max(sim_obj._dm)), sim_obj._SNR_MIN,
515 | sim_obj._SNR_MAX, outname_tag, ftype)
516 |
517 | if fn_rfi is not None:
518 | data_rfi, y = sim_obj.get_false_positives(fn_rfi)
519 | else:
520 | data_rfi, y = sim_obj.generate_noise()
521 |
522 | if fn_noise is not None:
523 | noise_arr = np.load(fn_noise) # Hack
524 |
525 | sim_obj._NRFI = min(sim_obj._NRFI, data_rfi.shape[0])
526 | print("\nUsing %d false-positive triggers" % sim_obj._NRFI)
527 | print("Simulating %d FRBs\n" % sim_obj._NSIM)
528 |
529 | arr_sim_full = [] # data array with all events
530 | yfull = [] # label array FP=0, TP=1
531 | arr_dm_time_full = []
532 |
533 | params_full_arr = []
534 | width_full_arr = []
535 |
536 | snr = [] # Keep track of simulated FRB signal-to-noise
537 | ii = -1
538 | jj = 0
539 |
540 | # Loop through total number of events
541 | while jj < (sim_obj._NRFI + sim_obj._NSIM):
542 | jj = len(arr_sim_full)
543 | ii += 1
544 | if ii % 500 == 0:
545 | print("simulated:%d kept:%d" % (ii, jj))
546 |
547 | # If ii is greater than the number of RFI events in f,
548 | # simulate an FRB
549 | #sim = bool(ii >= NRFI)
550 |
551 | if ii < sim_obj._NRFI:
552 | data = data_rfi[ii].reshape(sim_obj._NFREQ, sim_obj._NTIME)
553 |
554 | # Normalize data to have unit variance and zero median
555 | data = reader.rebin_arr(data, sim_obj._NFREQ, sim_obj._NTIME)
556 | data = dataproc.normalize_data(data)
557 |
558 | arr_sim_full.append(data.reshape(sim_obj._NFREQ*sim_obj._NTIME)[None])
559 | yfull.append(0) # Label the RFI with '0'
560 | continue
561 |
562 | elif (ii >=sim_obj._NRFI and jj < (sim_obj._NRFI + sim_obj._NSIM)):
563 |
564 | if fn_noise is not None:
565 | noise_ind = (jj-sim_obj._NRFI) % len(noise_arr) # allow for roll-over
566 | noise = (noise_arr[noise_ind]).copy()
567 | noise[noise!=noise] = 0.0
568 | noise -= np.median(noise, axis=-1)[..., None]
569 | noise -= np.median(noise)
570 | noise /= np.std(noise)
571 | # noise[:, 21] = 0 # hack mask out single bad channel
572 | else:
573 | noise = None
574 |
575 | # maybe should feed gen_sim a tel object and
576 | # a set of burst parameters...
577 | arr_sim, params = gen_simulated_frb(NFREQ=sim_obj._NFREQ,
578 | NTIME=sim_obj._NTIME,
579 | delta_t=tel_obj._DELTA_T,
580 | freq=tel_obj._freq,
581 | FREQ_REF=tel_obj._FREQ_REF,
582 | spec_ind=sim_obj._spec_ind,
583 | width=sim_obj._width,
584 | scat_factor=sim_obj._scat_factor,
585 | dm=sim_obj._dm,
586 | fluence=sim_obj._fluence,
587 | background_noise=noise,
588 | plot_burst=False,
589 | sim=True,
590 | )
591 |
592 | # Normalize data to have unit variance and zero median
593 | arr_sim = reader.rebin_arr(arr_sim, sim_obj._NFREQ, sim_obj._NTIME)
594 | arr_sim = dataproc.normalize_data(arr_sim)
595 | # get SNR of simulated pulse. Center should be at ntime//2
596 | # rebin until max SNR is found.
597 | snr_ = tools.calc_snr(arr_sim.mean(0), fast=False)
598 |
599 | # Only use events within a range of signal-to-noise
600 | if snr_ > sim_obj._SNR_MIN and snr_ < sim_obj._SNR_MAX:
601 | arr_sim_full.append(arr_sim.reshape(-1, sim_obj._NFREQ*sim_obj._NTIME))
602 | yfull.append(1) # Label the simulated FRB with '1'
603 | params_full_arr.append(params) # Save parameters bursts
604 | snr.append(snr_)
605 | continue
606 | else:
607 | continue
608 |
609 | if dm_time_array is True:
610 | E = Event(0, tel_obj._FREQ_REF, 0.0, 1.0, tel_obj._DELTA_T, 0., )
611 |
612 | for ii, data in enumerate(arr_sim_full):
613 | if ii%500==0:
614 | print("DM-transformed:%d" % ii)
615 |
616 | data = data.reshape(-1, sim_obj._NTIME)
617 | data = dataproc.normalize_data(data)
618 | data_dm_time = E.dm_transform(tel_obj._DELTA_T, data, tel_obj._freq)
619 | data_dm_time = dataproc.normalize_data(data_dm_time)
620 | arr_dm_time_full.append(data_dm_time)
621 |
622 | NDM = data_dm_time.shape[0]
623 | arr_dm_time_full = np.concatenate(arr_dm_time_full)
624 | arr_dm_time_full = arr_dm_time_full.reshape(-1, NDM, sim_obj._NTIME)
625 | else:
626 | data_dm_time_full = None
627 |
628 | params_full_arr = np.concatenate(params_full_arr).reshape(-1, 6)
629 | snr = np.array(snr)
630 | yfull = np.array(yfull)
631 |
632 | arr_sim_full = np.concatenate(arr_sim_full, axis=-1)
633 | arr_sim_full = arr_sim_full.reshape(-1, sim_obj._NFREQ*sim_obj._NTIME)
634 |
635 | print("\nGenerated %d simulated FRBs with mean SNR: %f"
636 | % (sim_obj._NSIM, snr.mean()))
637 | print("Used %d RFI triggers" % sim_obj._NRFI)
638 | print("Total triggers with SNR>10: %d" % arr_sim_full.shape[0])
639 |
640 | if ftype is 'hdf5':
641 | arr_sim_full = arr_sim_full.reshape(-1, sim_obj._NFREQ, sim_obj._NTIME)
642 | sim_obj.write_sim_data(arr_sim_full, yfull, outfn,
643 | data_dm_time=arr_dm_time_full,
644 | params=params_full_arr,
645 | snr=snr)
646 | print("Saving training/label data to:\n%s" % outfn)
647 | else:
648 | full_label_arr = np.concatenate((arr_sim_full, yfull[:, None]), axis=-1)
649 | print("Saving training/label data to:\n%s" % outfn)
650 |
651 | # save down the training data with labels
652 | np.save(outfn, full_label_arr)
653 |
654 | if plt==None:
655 | mk_plot = False
656 |
657 | if sim_obj._mk_plot==True:
658 | kk=0
659 |
660 | plot_tools.plot_simulated_events(
661 | arr_sim_full, y, figname,
662 | sim_obj._NSIDE, sim_obj._NFREQ,
663 | sim_obj._NTIME, cmap='Greys')
664 |
665 | return arr_sim_full, yfull, params_full_arr, snr
666 |
667 |
668 |
669 |
670 |
--------------------------------------------------------------------------------
/single_pulse_ml/simulate_multibeam.py:
--------------------------------------------------------------------------------
1 | # Script for simulating multi-beam detections
2 | # 5 December 2017
3 | # Liam Connor
4 | import sys
5 |
6 | import numpy as np
7 | from numpy.random import seed
8 | import h5py
9 |
10 | import keras
11 | from keras.models import Sequential
12 | from keras.layers import Dense, Dropout, Flatten, Merge
13 | from keras.layers import Conv1D, Conv2D
14 | from keras.layers import MaxPooling2D, MaxPooling1D, GlobalAveragePooling1D
15 | from keras.optimizers import SGD
16 | from keras.models import load_model
17 |
18 | import frbkeras
19 |
20 | def gauss(x, xo, sig):
21 | return np.exp(-(x-xo)**2/sig**2)
22 |
23 | def generate_multibeam(nbeam=40, rows=8, cols=5, width=27, nside=1000):
24 | """ width in arcminutes
25 | """
26 | # convert arcminutes to degrees
27 | width /= 60.
28 |
29 | # theta in degrees
30 | theta = np.linspace(-1, 1, 100)
31 |
32 | # compute 1D gaussian beam
33 | beam_theta = gauss(theta, 0, width)
34 |
35 | # compute 1D beam outer product with itself for 2D
36 | beam_2d = beam_theta[None]*beam_theta[:, None]
37 |
38 | # create nbeam arrays
39 | beam_arr = np.zeros([nside, nside, nbeam])
40 |
41 | # Make each beam
42 | kk=0
43 | for ii in range(rows):
44 | for jj in range(cols):
45 | # get x,y coordinates of each beam center
46 | xx, yy = 500-4*50+ii*50, 500-2*50+jj*50
47 | beam_arr[xx:xx+100, yy:yy+100, kk] += beam_2d
48 | kk+=1
49 |
50 | return beam_arr
51 |
52 | def test_merge_model(n=32, m=64, ntrigger=10000):
53 | data = np.random.normal(0, 1, n*m*ntrigger).reshape(ntrigger, n, m)
54 | data[ntrigger//2:, :, m//2-2:m//2+1] += 0.25
55 | data /= np.std(data.reshape(-1, n*m), -1)[:, None, None]
56 | data -= np.median(data, 2)[:, :, None]
57 |
58 | # set RFI labels to 0, FRBs to 1
59 | labels = np.zeros([ntrigger])
60 | labels[ntrigger//2:] = 1
61 |
62 | # convert to categorical array with shape (-1, 2)
63 | labels = labels.astype(int)
64 | labels = keras.utils.to_categorical(labels)
65 |
66 | data = data[..., None]
67 |
68 | model_2d_freq_time, score_freq_time = frbkeras.construct_conv2d(
69 | features_only=False, fit=True,
70 | train_data=data[::2], eval_data=data[1::2],
71 | train_labels=labels[::2], eval_labels=labels[1::2],
72 | epochs=5, nfilt1=32, nfilt2=64,
73 | nfreq=n, ntime=m)
74 | print(score_freq_time)
75 |
76 | train_data_mb, train_labels, eval_data_mb, eval_labels, model_mb = run_model(ntrigger)
77 |
78 | model_list = [model_mb, model_2d_freq_time]
79 | train_data_list = [train_data_mb, data[::2]]
80 | eval_data_list = [eval_data_mb, data[1::2]]
81 |
82 | model, score = frbkeras.merge_models(model_list, train_data_list,
83 | train_labels, eval_data_list, eval_labels,
84 | epoch=5)
85 |
86 | print(score)
87 |
88 | return data, labels, train_data_mb, train_labels, model
89 |
90 | def make_multibeam_data(ntrigger=2304, tp_frac=0.5,
91 | nbeam=40, rows=8, cols=5):
92 |
93 | A = generate_multibeam(nbeam=nbeam, rows=rows, cols=cols)
94 | # Take a euclidean flux distribution
95 | sn = np.random.uniform(1, 1000, 100*ntrigger)**-(2/3.)
96 | sn /= np.median(sn)
97 | sn *= 15
98 | #sn[sn > 150] = 150
99 |
100 | det_ = []
101 | sn_ = []
102 | multis = 0
103 |
104 | # drop FRBs at random locations with random flux
105 | for ii, ss in enumerate(sn):
106 | xi = np.random.uniform(400, 650)
107 | yi = np.random.uniform(300, 750)
108 | abeams = A[int(xi), int(yi)] * ss
109 | beamdet = np.where(abeams>=6)[0]
110 | if len(beamdet)>0:
111 | det_.append(beamdet)
112 | sn_.append(abeams[beamdet])
113 | if len(beamdet)>1:
114 | multis += 1
115 |
116 | ntrigger = min(2*len(det_), ntrigger)
117 | data = np.zeros([nbeam*ntrigger]).reshape(-1, nbeam)
118 | N_FP = int((1-tp_frac)*ntrigger)
119 | N_TP = int(tp_frac*ntrigger)
120 |
121 | for ii in range(N_FP):
122 | # nbeam_ii = int(np.random.uniform(1, 32))
123 |
124 | # Generate number of beams RFI shows up in
125 | nbeam_ii = min(nbeam, int(np.random.lognormal(1.25, 0.8)))
126 |
127 | ind = set(np.random.uniform(1, nbeam, nbeam_ii).astype(int).astype(list))
128 | data[ii][list(ind)] = np.random.normal(20, 5, len(ind))
129 |
130 | for ii in range(N_TP):
131 | # beam = int(np.random.uniform(1, 32))
132 | data[N_FP+ii][det_[ii]] = sn_[ii]#np.random.normal(20, 5, 1)
133 |
134 | # set RFI labels to 0, FRBs to 1
135 | labels = np.zeros([ntrigger])
136 | labels[N_FP:] = 1
137 |
138 | # convert to categorical array with shape (-1, 2)
139 | labels = labels.astype(int)
140 | labels = keras.utils.to_categorical(labels)
141 |
142 | # Print to see if fraction of multibeam detections is expected
143 | print(np.float(multis) / len(det_))
144 |
145 | return data, labels
146 |
147 | def run_model(n, nbeam=40):
148 | import frbkeras
149 |
150 | data_mb, labels = make_multibeam_data(nbeam=nbeam, ntrigger=n, tp_frac=0.5)
151 | train_data_mb = data_mb[::2]
152 | train_labels = labels[::2]
153 | eval_data_mb = data_mb[1::2]
154 | eval_labels = labels[1::2]
155 |
156 | model_mb, score_mb = frbkeras.construct_ff1d(
157 | features_only=False, fit=True,
158 | train_data=train_data_mb,
159 | train_labels=train_labels,
160 | eval_data=eval_data_mb,
161 | eval_labels=eval_labels,
162 | nbeam=nbeam, epochs=5,
163 | nlayer1=32, nlayer2=32,
164 | batch_size=32)
165 |
166 | if len(score_mb)>1:
167 | prob, predictions, mistakes = frbkeras.get_predictions(
168 | model_mb, eval_data_mb,
169 | true_labels=eval_labels)
170 | print(score_mb)
171 |
172 | return train_data_mb, train_labels, eval_data_mb, eval_labels, model_mb
173 |
174 |
--------------------------------------------------------------------------------
/single_pulse_ml/telescope.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class Telescope():
5 |
6 | def __init__(self, freq=(800, 400), FREQ_REF=600,
7 | DELTA_T=0.0016, name=None):
8 | """ Telescope class that can be fed to simulation
9 |
10 | Parameters:
11 | -----------
12 | freq : tuple
13 | two-element tuple with (FREQ_LOW, FREQ_UP) in MHz
14 | e.g. for CHIME this is (800., 400.)
15 | DELTA_T : float
16 | time resolution in seconds
17 | NFREQ : int
18 | number of frequencies
19 | NTIME : int
20 | number of time samples
21 | name : str
22 | telescope name, e.g. CHIME_PATHFINDER
23 |
24 | """
25 | self._FREQ_LOW = freq[0]
26 | self._FREQ_UP = freq[-1]
27 | self._freq = freq
28 | self._FREQ_REF = FREQ_REF
29 | self._DELTA_T = DELTA_T
30 | self._telname = name
31 |
--------------------------------------------------------------------------------
/single_pulse_ml/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liamconnor/single_pulse_ml/88b6b76ebf3d3939214d9785d4e1c5076f653c38/single_pulse_ml/tests/__init__.py
--------------------------------------------------------------------------------
/single_pulse_ml/tests/test_frbkeras.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import numpy as np
3 |
4 | from single_pulse_ml import frbkeras
5 |
6 | class TestFRBkeras(unittest.TestCase):
7 |
8 | def test_get_classification_results(self):
9 | """ Test that the true/false postives/negatives,
10 | are correctly identified.
11 | """
12 | y_true = np.round(np.random.rand(10000))
13 | y_pred = np.round(np.random.rand(10000))
14 |
15 | TP, FP, TN, FN = frbkeras.get_classification_results(y_true, y_pred)
16 | minlen = min(np.array([len(TP), len(FP), len(TN), len(FN)]))
17 | assert minlen>0, "There should be more than 0 of all"
18 |
19 | # Now create 1000 false events that are predicted true
20 | y_true = np.zeros([1000])
21 | y_pred = np.ones([1000])
22 |
23 | TP, FP, TN, FN = frbkeras.get_classification_results(y_true, y_pred)
24 |
25 | assert len(TP)==0
26 | assert len(FP)!=0
27 | assert len(TN)==0
28 | assert len(FN)==0
29 |
30 | def test_construct_conv2d(self):
31 | """ Test the 2d CNN by generating fake
32 | data (gaussian noise) and fitting model
33 | """
34 | ntime = 64
35 | nfreq = 32
36 | ntrigger = 1000
37 |
38 | data = np.random.normal(0, 1, ntrigger*nfreq*ntime)
39 | data.shape = (ntrigger, nfreq, ntime, 1)
40 | labels = np.round(np.random.rand(ntrigger))
41 | labels = frbkeras.keras.utils.to_categorical(labels)
42 |
43 | # try training a model on random noise. should not do
44 | # better than ~50% acc
45 | model, score = frbkeras.construct_conv2d(train_data=data[::2],
46 | train_labels=labels[::2],
47 | eval_data=data[1::2],
48 | eval_labels=labels[1::2],
49 | fit=True, epochs=3)
50 | assert score[1]<0.9, "Trained on random noise. Should not have high acc"
51 | self.model_conv2d = model
52 |
53 |
54 | def test_construct_conv1d(self):
55 | """ Test the 1d CNN by generating fake
56 | data (gaussian noise) and fitting model
57 | """
58 | ntime = 64
59 | ntrigger = 1000
60 |
61 | data = np.random.normal(0, 1, ntrigger*ntime)
62 | data.shape = (ntrigger, ntime, 1)
63 | labels = np.round(np.random.rand(ntrigger))
64 | labels = frbkeras.keras.utils.to_categorical(labels)
65 |
66 | # try training a model on random noise. should not do
67 | # better than ~50% acc
68 | model, score = frbkeras.construct_conv1d(fit=True, train_data=data[::2],
69 | train_labels=labels[::2],
70 | eval_data=data[1::2],
71 | eval_labels=labels[1::2],
72 | batch_size=16, epochs=3)
73 |
74 | assert score[1]<0.9, "Trained on random noise. Should not have high acc"
75 |
76 | self.model_conv1d = model
77 |
78 | def test_construct_ff1d(self):
79 | nbeam = 32
80 | ntrigger = 1000
81 |
82 | data = np.random.normal(0, 1, ntrigger*nbeam)
83 | data.shape = (ntrigger, nbeam, 1)
84 | labels = np.round(np.random.rand(ntrigger))
85 | labels = frbkeras.keras.utils.to_categorical(labels)
86 |
87 | # try training a model on random noise. should not do
88 | # better than ~50% acc
89 | model, score = frbkeras.construct_conv1d(fit=True, train_data=data[::2],
90 | train_labels=labels[::2],
91 | eval_data=data[1::2],
92 | eval_labels=labels[1::2],
93 | batch_size=16, epochs=3)
94 |
95 |
96 | if __name__ == '__main__':
97 | unittest.main()
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
--------------------------------------------------------------------------------
/single_pulse_ml/tests/test_reader.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from unittest import TestCase
3 | import h5py
4 | import numpy as np
5 |
6 | from single_pulse_ml import reader
7 |
8 | class TestReader(TestCase):
9 |
10 | def test_read_hdf5(self):
11 | NFREQ = 64
12 | NTIME = 250
13 | NCANDIDATES = 100
14 | data_freq_time = np.random.normal(0, 1, NFREQ*NTIME*NCANDIDATES)
15 | data_freq_time.shape = (NCANDIDATES, NFREQ, NTIME)
16 | labels = np.ones([NCANDIDATES])
17 | fn = './test.hdf5'
18 |
19 | g = h5py.File(fn,'w')
20 | g.create_dataset('data_freq_time', data=data_freq_time)
21 | g.create_dataset('labels', data=labels)
22 | g.create_dataset('data_dm_time', data=[])
23 | g.close()
24 |
25 | data_freq, y, data_dm, data_mb = reader.read_hdf5(fn)
26 |
27 |
28 | if __name__ == '__main__':
29 | unittest.main()
30 |
--------------------------------------------------------------------------------
/single_pulse_ml/tests/test_run_frb_simulation.py:
--------------------------------------------------------------------------------
1 | """ Test script generating 100 RFI events +
2 | 100 simulated FRBs. Gaussian noise is used.
3 | Parameters of the CHIME Pathfinder are used.
4 | Data are saved to hdf5 file and a plot is made
5 | of FRBs.
6 | """
7 |
8 | from single_pulse_ml import sim_parameters
9 | from single_pulse_ml import telescope
10 | from single_pulse_ml import simulate_frb
11 |
12 | # TELESCOPE PARAMETERS:
13 | freq = (800, 400) # (FREQ_LOW, FREQ_UP) in MHz
14 | FREQ_REF = 600 # reference frequency in MHz
15 | DELTA_T = 0.0016 # time res in seconds
16 | NAME = "CHIMEPathfinder"
17 |
18 | # SIMULATION PARAMETERS
19 | NFREQ = 32 # Number of frequencies. Must agree with FP data
20 | NTIME = 250 # Number of time stamps per trigger
21 | dm = (-0.05, 0.05)
22 | fluence = (5, 100)
23 | width = (2*0.0016, 0.75) # width lognormal dist in seconds
24 | spec_ind = (-4., 4.)
25 | disp_ind = 2.
26 | scat_factor = (-4., -1.5)
27 | NRFI = 100
28 | SNR_MIN = 8.0
29 | SNR_MAX = 100.0
30 | out_file_name = None,
31 | mk_plot = True
32 | NSIDE = 8
33 | dm_time_array = False
34 | outname_tag = 'test'
35 | outdir = '../data/'
36 | figname = '../plots/test_out_fig.pdf'
37 |
38 | fn_rfi = None
39 | fn_noise = None
40 |
41 | sim_obj = sim_parameters.SimParams(dm=dm, fluence=fluence,
42 | width=width, spec_ind=spec_ind,
43 | disp_ind=disp_ind, scat_factor=scat_factor,
44 | SNR_MIN=SNR_MIN, SNR_MAX=SNR_MAX,
45 | out_file_name=out_file_name, NRFI=NRFI,
46 | NTIME=NTIME, NFREQ=NFREQ,
47 | mk_plot=mk_plot, NSIDE=NSIDE, )
48 |
49 | tel_obj = telescope.Telescope(freq=freq, FREQ_REF=FREQ_REF,
50 | DELTA_T=DELTA_T, name=NAME)
51 |
52 | data, labels, params, snr = simulate_frb.run_full_simulation(
53 | sim_obj, tel_obj, fn_rfi=fn_rfi,
54 | fn_noise=fn_noise,
55 | dm_time_array=dm_time_array,
56 | outname_tag=outname_tag, outdir=outdir,
57 | figname=figname)
58 |
59 |
--------------------------------------------------------------------------------
/single_pulse_ml/tests/test_simulate_frb.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import numpy as np
3 |
4 | from single_pulse_ml import simulate_frb
5 |
6 | class TestSimulate_FRB(unittest.TestCase):
7 |
8 | def test_gen_simulated_frb(self):
9 |
10 |
11 | sim_data, params = simulate_frb.gen_simulated_frb(NFREQ=16, NTIME=250, sim=True,
12 | fluence=(0.03,0.3),
13 | spec_ind=(-4, 4), width=(2*0.0016, 1), dm=(-0.15, 0.15),
14 | scat_factor=(-3, -0.5), background_noise=None, delta_t=0.0016,
15 | plot_burst=False, freq=(800, 400), FREQ_REF=600.,
16 | )
17 |
18 | dm, fluence, width, spec_ind, disp_ind, scat_factor = params
19 |
20 | print(dm)
21 | assert np.abs(dm) < 0.2, "DM is not in correct DM range"
22 | assert width > 0, "Width must be positive"
23 | assert disp_ind==2, "Disp index doesn't match input"
24 |
25 |
26 | if __name__ == '__main__':
27 | unittest.main()
28 |
--------------------------------------------------------------------------------
/single_pulse_ml/tools.py:
--------------------------------------------------------------------------------
1 | # training data 30 September 2017
2 | # miscellaneous tools for preparing and processing
3 | # machine learning data
4 |
5 | import numpy as np
6 | import glob
7 | import scipy.signal
8 |
9 | from single_pulse_ml import dataproc
10 |
11 | def save_background_data(fdir, outfile=None, nfreq = 32):
12 | """ Read in randomly selected Pathfinder data in directory fdir,
13 | dedisperse to a DM between 25 and 2000 pc cm**-3,
14 | and create a large array of (nfreq, ntime_pulse) arrays
15 | over which FRBs can be injected.
16 | These data haven't been RFI cleaned! Could cause problems.
17 | """
18 | fl = glob.glob(fdir)
19 | fl.sort()
20 | arr_full = []
21 |
22 | freq_rebin = 1
23 | ntime_pulse = 250
24 |
25 | for ff in fl[:75]:
26 | print(ff)
27 | arr = np.load(ff)[:, 0]
28 | arr[arr!=arr] = 0.
29 | nfreq_arr, ntime = arr.shape
30 | print(arr.shape)
31 |
32 | # Disperse data to random dm
33 | _dm = np.random.uniform(25, 2000.0)
34 | arr = dedisperse_data(arr, _dm)
35 |
36 | # rebin to nfreq, divide data into blocks of len ntime_pulse
37 | arr = np.nansum(arr.reshape(-1, freq_rebin, ntime), axis=1)/freq_rebin
38 | arr = arr[:, :ntime//ntime_pulse*ntime_pulse]
39 | arr = arr.reshape(nfreq, -1, ntime_pulse)
40 | arr_full.append(arr)
41 |
42 | # Reorganize array to be (ntriggers, nfreq, ntime_pulse)
43 | arr_full = np.concatenate(arr_full)[:, :ntime//ntime_pulse*ntime_pulse]
44 | arr_full = arr_full.reshape(-1, nfreq, ntime//ntime_pulse, ntime_pulse)
45 | arr_full = np.transpose(arr_full, (0, 2, 1, 3)).reshape(-1, nfreq, ntime_pulse)
46 |
47 | # Go through each noise trigger and add data
48 | for ii, arr in enumerate(arr_full):
49 | arr_full[ii] = dataproc.normalize_data(arr)
50 |
51 | # Reshape to have same shape as RFI triggers
52 | #arr_full = arr_full.reshape(-1, nfreq*ntime_pulse)
53 | np.random.shuffle(arr_full)
54 |
55 | if outfile is not None:
56 | np.save(outfile, arr_full)
57 |
58 | return arr_full
59 |
60 | def dedisperse_data(f, _dm, freq_bounds=(800,400), dt=0.0016, freq_ref=600):
61 | """ Dedisperse data to some dispersion measure _dm.
62 | Frequency is in MHz, dt delta time in seconds.
63 | f is data to be dedispersed, shaped (nfreq, ntime)
64 | """
65 |
66 | # Calculate the number of bins to shift for each freq
67 | NFREQ=f.shape[0]
68 | freq = np.linspace(freq_bounds[0], freq_bounds[1], NFREQ)
69 | ind_delay = ((4.148808e3 * _dm * (freq**(-2.) - freq_ref**(-2.))) / dt).astype(int)
70 | for ii, nu in enumerate(freq):
71 | f[ii] = np.roll(f[ii], -ind_delay[ii])
72 |
73 | return f
74 |
75 | def calc_snr(arr, fast=False):
76 | """ Calculate the S/N of pulse profile after
77 | trying 9 rebinnings.
78 |
79 | Parameters
80 | ----------
81 | arr : np.array
82 | (ntime,) vector of pulse profile
83 | ntime : np.int
84 | number of times in profile
85 |
86 | Returns
87 | -------
88 | snr : np.float
89 | S/N of pulse
90 | """
91 | assert len(arr.shape)==1
92 |
93 | ntime = len(arr)
94 | snr_max = 0
95 | widths = [1, 2, 4, 8, 16, 32, 64, 128]
96 |
97 | # for ii in range(1, 10):
98 | for ii in widths:
99 |
100 | # skip if boxcar width is greater than 1/4th ntime
101 | if ii > ntime//8:
102 | continue
103 |
104 | arr_copy = arr.copy()
105 | arr_ = arr_copy[:len(arr)//ii*ii].reshape(-1, ii).mean(-1)
106 |
107 | if fast is False:
108 | std_chunk = scipy.signal.detrend(arr_, type='linear')
109 | std_chunk.sort()
110 | ntime_r = len(std_chunk)
111 | stds = 1.148*np.sqrt((std_chunk[ntime_r//40:-ntime_r//40]**2.0).sum() /
112 | (0.95*ntime_r))
113 | snr_ = std_chunk[-1] / stds
114 | else:
115 | sig = np.std(arr_[:len(arr_)//3])
116 | snr_ = arr_.max() / sig
117 |
118 | if snr_ > snr_max:
119 | snr_max = snr_
120 | width_max = ii
121 |
122 | return snr_max
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
--------------------------------------------------------------------------------