├── requirements_gpu.txt
├── docs
    ├── contents.rst
    ├── index.rst
    ├── Makefile
    ├── find_event.rst
    ├── find_doppler.rst
    └── conf.py
├── turbo_seti
    ├── find_event
    │   ├── __init__.py
    │   ├── plot_event_pipeline.py
    │   └── dat_filter.py
    ├── find_doppler
    │   ├── kernels
    │   │   ├── _taylor_tree
    │   │   │   ├── __init__.py
    │   │   │   ├── _core_numba.py
    │   │   │   └── _core_cuda.py
    │   │   ├── Scheduler
    │   │   │   └── __init__.py
    │   │   ├── _bitrev
    │   │   │   └── __init__.py
    │   │   ├── _hitsearch
    │   │   │   ├── kernels.cu
    │   │   │   └── __init__.py
    │   │   └── __init__.py
    │   ├── turbo_seti_version.py
    │   ├── __init__.py
    │   ├── helper_functions.py
    │   ├── merge_dats_logs.py
    │   ├── dat_diff.py
    │   ├── seti_event.py
    │   └── file_writers.py
    ├── drift_indexes
    │   ├── drift_indexes_array_2.txt
    │   ├── drift_indexes_array_3.txt
    │   ├── drift_indexes_array_4.txt
    │   ├── drift_indexes_array_5.txt
    │   └── drift_indexes_array_6.txt
    └── __init__.py
├── setup.cfg
├── MANIFEST.in
├── dependencies.txt
├── requirements.txt
├── requirements_test.txt
├── test
    ├── test_setup.py
    ├── .coveragerc
    ├── run_tests.sh
    ├── fb_dat_reference.txt
    ├── test_dat_diff.py
    ├── test_plot_dat.py
    ├── test_dat_filter.py
    ├── run_benchmark.sh
    ├── test_pipelines_4.py
    ├── fb_genref.py
    ├── pipelines_util.py
    ├── fb_cases_def.py
    ├── test_fb_cases.py
    ├── download_test_data.py
    ├── test_drift_rates.py
    ├── test_pipelines_2.py
    ├── test_find_event.py
    ├── test_pipelines_1.py
    ├── fb_cases_util.py
    └── test_pipelines_3.py
├── .readthedocs.yml
├── .gitignore
├── .github
    ├── workflows
    │   ├── docker_build.yml
    │   ├── pythonpublish.yml
    │   ├── push_docker.yml
    │   └── python_tests.yml
    └── ISSUE_TEMPLATE
    │   ├── feature_request.md
    │   └── bug_report.md
├── INTEGRATION.md
├── Dockerfile
├── this_was_the.travis.yml
├── LICENSE
├── setup.py
├── gen_drift_indexes
    ├── ancient_history.py.txt
    └── gen_drift_indexes.py
├── tutorial
    ├── README.md
    └── initialise.ipynb
├── MAINTENANCE.md
├── FinalReport.md
├── README.md
└── VERSION-HISTORY.md


/requirements_gpu.txt:
--------------------------------------------------------------------------------
1 | cupy


--------------------------------------------------------------------------------
/docs/contents.rst:
--------------------------------------------------------------------------------
1 | .. include:: index.rst


--------------------------------------------------------------------------------
/turbo_seti/find_event/__init__.py:
--------------------------------------------------------------------------------
1 | from . import *
2 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/kernels/_taylor_tree/__init__.py:
--------------------------------------------------------------------------------
1 | from . import *


--------------------------------------------------------------------------------
/turbo_seti/drift_indexes/drift_indexes_array_2.txt:
--------------------------------------------------------------------------------
1 | 0 1 3 0 
2 | 0 1 2 3 
3 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/turbo_seti_version.py:
--------------------------------------------------------------------------------
1 | TURBO_SETI_VERSION = '2.3.2'
2 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [aliases]
2 | test=pytest
3 | 
4 | [metadata]
5 | description-file=README.md
6 | 
7 | [tool:pytest]
8 | addopts=--verbose
9 | 


--------------------------------------------------------------------------------
/turbo_seti/drift_indexes/drift_indexes_array_3.txt:
--------------------------------------------------------------------------------
1 |  0 1 3 5 7 0 0 0
2 |  0 1 3 4 5 7 0 0
3 |  0 1 2 3 5 6 7 0
4 |  0 1 2 3 4 5 6 7
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include MANIFEST.in
3 | include *.md
4 | include *.txt
5 | recursive-include turbo_seti/drift_indexes *.txt
6 | 


--------------------------------------------------------------------------------
/dependencies.txt:
--------------------------------------------------------------------------------
 1 | python3-pip
 2 | python3-dev
 3 | python3-setuptools
 4 | libhdf5-dev
 5 | gcc
 6 | gfortran
 7 | wget
 8 | curl
 9 | git
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | astropy
 2 | numpy
 3 | blimpy>=2.0.34
 4 | pandas
 5 | toolz
 6 | fsspec
 7 | dask
 8 | dask[bag]
 9 | numba
10 | cloudpickle
11 | 


--------------------------------------------------------------------------------
/requirements_test.txt:
--------------------------------------------------------------------------------
 1 | pytest
 2 | pytest-cov
 3 | pytest-runner
 4 | codecov
 5 | coverage
 6 | wget
 7 | setigen
 8 | pyslalib
 9 | pytest-order
10 | 


--------------------------------------------------------------------------------
/test/test_setup.py:
--------------------------------------------------------------------------------
1 | r""" Test setup.py"""
2 | 
3 | 
4 | def test_setup():
5 |     import os
6 |     cmd = "python3 setup.py check"
7 |     os.system(cmd)
8 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/__init__.py:
--------------------------------------------------------------------------------
1 | from . import seti_event, helper_functions, file_writers, data_handler, merge_dats_logs, dat_diff
2 | from .find_doppler import FindDoppler
3 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 | 
5 | sphinx:
6 |   configuration: docs/conf.py
7 | 
8 | python:
9 |    setup_py_install: true


--------------------------------------------------------------------------------
/turbo_seti/__init__.py:
--------------------------------------------------------------------------------
1 | from .find_doppler import seti_event, FindDoppler, helper_functions
2 | from .find_event import find_event, plot_event, plot_event_pipeline, \
3 |     find_event_pipeline, run_pipelines
4 | from pkg_resources import get_distribution, DistributionNotFound
5 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to turbo_seti's documentation!
 2 | ======================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 |    :caption: Contents:
 7 | 
 8 |    find_doppler.rst
 9 |    find_event.rst
10 | 
11 | 
12 | 
13 | Indices and tables
14 | ==================
15 | 
16 | * :ref:`genindex`
17 | * :ref:`modindex`
18 | * :ref:`search`
19 | 


--------------------------------------------------------------------------------
/turbo_seti/drift_indexes/drift_indexes_array_4.txt:
--------------------------------------------------------------------------------
1 |  0 1 3 5 7 9 11 13 15 0 0 0 0 0 0 0
2 |  0 1 3 5 7 8 9 11 13 15 0 0 0 0 0 0
3 |  0 1 3 4 5 7 9 11 12 13 15 0 0 0 0 0
4 |  0 1 3 4 5 7 8 9 11 12 13 15 0 0 0 0
5 |  0 1 2 3 5 6 7 9 10 11 13 14 15 0 0 0
6 |  0 1 2 3 5 6 7 8 9 10 11 13 14 15 0 0
7 |  0 1 2 3 4 5 6 7 9 10 11 12 13 14 15 0
8 |  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.png
 2 | *.dat
 3 | *.fil
 4 | *.h5
 5 | *.log
 6 | *.pyc
 7 | *.egg-info/
 8 | 
 9 | .eggs/
10 | .idea/
11 | .ipynb_checkpoints/
12 | .coverage
13 | .virtualenv*/
14 | .venv*/
15 | .pytest_cache
16 | stats_file.bin
17 | turbo_seti_version.py
18 | 
19 | build/
20 | dist/
21 | docs/_build/
22 | exec
23 | 
24 | .coverage
25 | htmlcov
26 | coverage.xml
27 | *profile.svg
28 | 


--------------------------------------------------------------------------------
/test/.coveragerc:
--------------------------------------------------------------------------------
 1 | # configuration file used by run_tests.py
 2 | [report]
 3 | 
 4 | omit =
 5 |     */turbo_seti/__init__.py
 6 |     */turbo_seti/find_doppler/__init__.py
 7 |     */turbo_seti/find_event/__init__.py
 8 |     # Numba code isn't compatible. It will be reported as uncovered. Whitelisting...
 9 |     */turbo_seti/find_doppler/kernels/_taylor_tree/_core_numba.py
10 |     */turbo_seti/find_doppler/kernels/_bitrev/__init__.py


--------------------------------------------------------------------------------
/test/run_tests.sh:
--------------------------------------------------------------------------------
 1 | coverage run --rcfile=.coveragerc --source=turbo_seti -m pytest
 2 | EXITCODE=$?
 3 | if [ $EXITCODE -ne 0 ]; then
 4 |     echo
 5 |     echo '*** Oops, coverage pytest failed, exit code = '$EXITCODE' ***'
 6 |     echo
 7 |     exit $EXITCODE
 8 | fi
 9 | coverage report --rcfile=.coveragerc
10 | EXITCODE=$?
11 | if [ $EXITCODE -ne 0 ]; then
12 |     echo
13 |     echo '*** Oops, coverage report failed, exit code = '$EXITCODE' ***'
14 |     echo
15 |     exit $EXITCODE
16 | fi
17 | codecov
18 | 


--------------------------------------------------------------------------------
/.github/workflows/docker_build.yml:
--------------------------------------------------------------------------------
 1 | name: Test Dockerfile
 2 | 
 3 | on:
 4 |   push:
 5 |     paths-ignore:
 6 |       - '**.ipynb'
 7 |       - '**.png'
 8 |       - '**.rst'
 9 |       - '**.md'
10 |   pull_request:
11 |     paths-ignore:
12 |       - '**.ipynb'
13 |       - '**.png'
14 |       - '**.rst'
15 |       - '**.md'
16 | 
17 | jobs:
18 |   build-cpu:
19 | 
20 |     runs-on: ubuntu-latest
21 | 
22 |     steps:
23 |       - uses: actions/checkout@v2
24 |       - name: Build the Docker CPU image
25 |         run: docker build . --file Dockerfile --tag turboseti-docker:$(date +%s)
26 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/INTEGRATION.md:
--------------------------------------------------------------------------------
 1 | This document describes the changes from Travis CI to Github Actions.
 2 | 
 3 | ### Travis CI Status
 4 | - Deprecated in favor of Github Actions.
 5 | 
 6 | ### Github Actions Approach
 7 | #### On Commit or Pull-Request
 8 | Test and validate the integrity of each commit to any branch.
 9 | 
10 | 1. `python_tests.yml`: Run Python tests with coverage report.
11 | 2.  `docker_build.yml`: Run build test with Docker.
12 | 
13 | #### On Master Commit
14 | Publish the image to Docker Hub after a commit to `master` branch.
15 | 
16 | * `push_docker.yml`: Build & publish the image on Docker Hub.
17 | 
18 | ### Required Secrets
19 | - **DOCKER_USER**: Docker Hub Username.
20 | - **DOCKER_PASS**: Docker Hub Password.
21 | - **CODECOV_TOKEN**: Codecov turbo_seti Token.
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | 
 5 | ---
 6 | 
 7 | **Please describe your new feature request.**
 8 | 
 9 | A clear and concise description of what enhancement you would like to occur. Examples:
10 | * This new type of plot would be of benefit [...]
11 | * More testing should be developed for this situation [...]
12 | * More information should be provided when this happens [...]
13 | 
14 | **Any more details?**
15 | 
16 | If you have source code or support file(s) ideas in mind, please provide a clear and concise description.
17 | 
18 | **Describe alternatives you've considered**
19 | 
20 | A clear and concise description of any alternative solutions or features you've considered.
21 | 
22 | **Additional context**
23 | 
24 | Add any other context or screenshots about the feature request here.
25 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG IMAGE=ubuntu:20.04
 2 | FROM ${IMAGE}
 3 | 
 4 | ARG DEBIAN_FRONTEND=noninteractive
 5 | 
 6 | RUN apt-get update
 7 | 
 8 | COPY . /turboseti
 9 | WORKDIR /turboseti
10 | 
11 | RUN cat dependencies.txt | xargs -n 1 apt install --no-install-recommends -y
12 | 
13 | RUN python3 -m pip install -U pip
14 | RUN python3 -m pip install git+https://github.com/UCBerkeleySETI/blimpy
15 | RUN python3 -m pip install -r requirements.txt
16 | RUN python3 -m pip install -r requirements_test.txt
17 | RUN python3 setup.py install
18 | RUN cd test && python3 download_test_data.py && cd ..
19 | RUN cd test && bash run_tests.sh && cd ..
20 | 
21 | RUN find test -name "*.h5" -type f -delete
22 | RUN find test -name "*.log" -type f -delete
23 | RUN find test -name "*.dat" -type f -delete
24 | RUN find test -name "*.fil" -type f -delete
25 | RUN find test -name "*.png" -type f -delete
26 | RUN find . -path '*/__pycache__*' -delete
27 | 
28 | WORKDIR /home
29 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | 
 5 | ---
 6 | 
 7 | **Describe the bug**
 8 | 
 9 | A clear and concise description of what the bug is.
10 | 
11 | **Relevant BL files (.fil, .h5)**
12 | 
13 | * Name the input file{s}.
14 | * Where are these file(s)? - URL or precise machine name and directory
15 | 
16 | **To Reproduce**
17 | 
18 | Steps to reproduce the behavior:
19 | 1. Go to '...'
20 | 2. Click on '....'
21 | 3. Scroll down to '....'
22 | 4. See error
23 | 
24 | **Expected behavior**
25 | 
26 | A clear and concise description of what you expected to happen.
27 | 
28 | **Screenshots**
29 | 
30 | If applicable, add screenshots to help explain your problem.
31 | 
32 | **Setup**
33 | 
34 |  - Python version: 3.X?
35 |  - turbo_seti version?
36 |  - blimpy version?
37 |  - other package (astropy, dask, numba, numpy, pandas) versions?
38 | 
39 | **Additional context**
40 | 
41 | Add any other context about the problem here.
42 | 


--------------------------------------------------------------------------------
/.github/workflows/pythonpublish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [published]
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v1
19 |       with:
20 |         python-version: '3.x'
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install setuptools wheel twine
25 |     - name: Build and publish
26 |       env:
27 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 |       run: |
30 |         python setup.py sdist bdist_wheel
31 |         twine upload dist/*
32 | 


--------------------------------------------------------------------------------
/this_was_the.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.6"
 4 |   - "3.7"
 5 |   
 6 | # command to install dependencies
 7 | install:
 8 |   - sudo apt-get update -qq
 9 |   - sudo apt-get install -qq libhdf5-serial-dev
10 |   - pip install --upgrade pip setuptools wheel
11 |   - pip install --only-binary=numpy,scipy numpy scipy
12 |   - pip install h5py --only-binary=h5py
13 |   - pip install git+https://github.com/ucberkeleyseti/blimpy
14 |   - pip install -r requirements.txt
15 |   - pip install -r requirements_test.txt
16 |   - pip install .
17 | 
18 | # Command to run test
19 | script:
20 |   # Ping stdout every 5 minutes or Travis kills build,
21 |   # while travis_wait does not show the command output while processing.
22 |   # https://docs.travis-ci.com/user/common-build-problems/#build-times-out-because-no-output-was-received
23 |   - |
24 |     while sleep 5m; do
25 |       echo "====[ $SECONDS seconds still running ]===="
26 |     done &
27 |   - cd test; python download_test_data.py; ./run_tests.sh;
28 |     
29 | branches:
30 |   only:
31 |     - master
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 J. E. Enriquez
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/workflows/push_docker.yml:
--------------------------------------------------------------------------------
 1 | name: Push to Docker Hub
 2 | 
 3 | on:
 4 |   push:
 5 |     paths-ignore:
 6 |       - '**.rst'
 7 |     branches:
 8 |       - master
 9 | 
10 | jobs:
11 |   build-cpu:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v2
17 |       - name: Build and push CPU image to Docker Hub
18 |         uses: docker/build-push-action@v1
19 |         with:
20 |           username: ${{ secrets.DOCKER_USER }}
21 |           password: ${{ secrets.DOCKER_PASS }}
22 |           repository: ucberkeleyseti/turbo_seti
23 |           tags: latest
24 | 
25 | # ==========================
26 | # Work-around for issue #240
27 | # ==========================
28 | #  build-gpu:
29 | 
30 | #    runs-on: nvidia-gpu
31 | 
32 | #    steps:
33 | #      - uses: actions/checkout@v2
34 | #      - name: Build and push GPU image to Docker Hub
35 | #        uses: docker/build-push-action@v1
36 | #        with:
37 | #          username: ${{ secrets.DOCKER_USER }}
38 | #          password: ${{ secrets.DOCKER_PASS }}
39 | #          repository: ucberkeleyseti/turbo_seti_gpu
40 | #          build_args: IMAGE=cupy/cupy:v8.6.0
41 | #          tags: latest
42 | 


--------------------------------------------------------------------------------
/test/fb_dat_reference.txt:
--------------------------------------------------------------------------------
 1 | CaseNr  fdir  drsign     tophit   drate     snr            freq         index
 2 | #=====  ====  ======     ======   =====     ===            ====         =====
 3 | #
 4 | # Case 1: frequency increasing, drift rate positive
 5 | 1       1     1          1        1.296875      85.384996    8421.596432    209715
 6 | 1       1     1          2        2.578125     328.901712    8422.225578    838861
 7 | #
 8 | # Case 2: frequency increasing, drift rate negative
 9 | 2       1     -1          1        -1.28125      82.17038    8421.596432    209715
10 | 2       1     -1          2        -2.59375     340.502374    8422.225578    838861
11 | #
12 | # Case 3: frequency decreasing, drift rate positive
13 | 3       -1     1          1        2.578125     330.401936    8421.177003    209714
14 | 3       -1     1          2        1.296875      86.588677    8420.547857    838860
15 | #
16 | # Case 4: frequency decreasing, drift rate negative
17 | 4       -1     -1          1        -2.59375     340.946023    8421.177003    209714
18 | 4       -1     -1          2        -1.28125      81.893643    8420.547857    838860
19 | #
20 | #---------END 2021-01-25 08:39:11 ---------------------------
21 | 


--------------------------------------------------------------------------------
/test/test_dat_diff.py:
--------------------------------------------------------------------------------
 1 | """ Test find_doppler/dat_diff.py """
 2 | import os
 3 | import pytest
 4 | from turbo_seti.find_doppler.dat_diff import main
 5 | 
 6 | TESTDIR = os.path.split(os.path.abspath(__file__))[0]
 7 | VOYADAT = os.path.join(TESTDIR, "Voyager1.single_coarse.fine_res.dat")
 8 | VOYADATFLIPPED = os.path.join(TESTDIR, "Voyager1.single_coarse.fine_res.flipped.dat")
 9 | 
10 | 
11 | @pytest.mark.order(index=-3)
12 | def test_dat_diff_help(capsys):
13 | 
14 |     with pytest.raises(SystemExit) as exit_code:
15 |         args = ["-h"]
16 |         main(args)
17 |     out, err = capsys.readouterr()
18 |     print(out, err)
19 |     assert exit_code.type == SystemExit
20 |     assert exit_code.value.code == 0
21 | 
22 | 
23 | @pytest.mark.order(index=-2)
24 | def test_dat_diff_2_dats():
25 | 
26 |     args = [VOYADAT, VOYADATFLIPPED]
27 |     main(args)
28 | 
29 | 
30 | @pytest.mark.order(index=-1)
31 | def test_dat_diff_missing_dat(capsys):
32 | 
33 |     with pytest.raises(SystemExit) as exit_code:
34 |         args = ["nonexistent.dat", VOYADATFLIPPED]
35 |         main(args)
36 |     out, err = capsys.readouterr()
37 |     print(out, err)
38 |     assert exit_code.type == SystemExit
39 |     assert exit_code.value.code != 0
40 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/kernels/Scheduler/__init__.py:
--------------------------------------------------------------------------------
 1 | from concurrent.futures import ThreadPoolExecutor
 2 | 
 3 | 
 4 | class Scheduler:
 5 |     def __init__(self, method, params, n_futures=2, n_workers=2):
 6 |         self.n_workers = n_workers
 7 |         self.n_futures = n_futures
 8 |         self.method = method
 9 |         self.params = params
10 |         
11 |         self._init_threads()
12 |         self._update_futures()
13 |         
14 |     @staticmethod
15 |     def _batch(iterable, n):
16 |         return iterable[:min(n, len(iterable))]
17 |         
18 |     def _init_threads(self):
19 |         self.futures = []
20 |         self.client = ThreadPoolExecutor(self.n_workers)
21 |         
22 |     def _update_futures(self):
23 |         n = self.n_futures - len(self.futures)
24 |         for p in self._batch(self.params, n):
25 |             self._submit_future(p)
26 |         
27 |     def _submit_future(self, p):
28 |         call = (self.method, *p)
29 |         future = self.client.submit(*call)
30 |         self.futures.append(future)
31 |         self.params.remove(p)
32 |         
33 |     def get(self):
34 |         self._update_futures()
35 |         for f in self.futures:
36 |             result = f.result()
37 |             self.futures.remove(f)
38 |             return result


--------------------------------------------------------------------------------
/turbo_seti/drift_indexes/drift_indexes_array_5.txt:
--------------------------------------------------------------------------------
 1 |  0 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2 |  0 1 3 5 7 9 11 13 15 16 17 19 21 23 25 27 29 31 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 3 |  0 1 3 5 7 8 9 11 13 15 17 19 21 23 24 25 27 29 31 0 0 0 0 0 0 0 0 0 0 0 0 0
 4 |  0 1 3 5 7 8 9 11 13 15 16 17 19 21 23 24 25 27 29 31 0 0 0 0 0 0 0 0 0 0 0 0
 5 |  0 1 3 4 5 7 9 11 12 13 15 17 19 20 21 23 25 27 28 29 31 0 0 0 0 0 0 0 0 0 0 0
 6 |  0 1 3 4 5 7 9 11 12 13 15 16 17 19 20 21 23 25 27 28 29 31 0 0 0 0 0 0 0 0 0 0
 7 |  0 1 3 4 5 7 8 9 11 12 13 15 17 19 20 21 23 24 25 27 28 29 31 0 0 0 0 0 0 0 0 0
 8 |  0 1 3 4 5 7 8 9 11 12 13 15 16 17 19 20 21 23 24 25 27 28 29 31 0 0 0 0 0 0 0 0
 9 |  0 1 2 3 5 6 7 9 10 11 13 14 15 17 18 19 21 22 23 25 26 27 29 30 31 0 0 0 0 0 0 0
10 |  0 1 2 3 5 6 7 9 10 11 13 14 15 16 17 18 19 21 22 23 25 26 27 29 30 31 0 0 0 0 0 0
11 |  0 1 2 3 5 6 7 8 9 10 11 13 14 15 17 18 19 21 22 23 24 25 26 27 29 30 31 0 0 0 0 0
12 |  0 1 2 3 5 6 7 8 9 10 11 13 14 15 16 17 18 19 21 22 23 24 25 26 27 29 30 31 0 0 0 0
13 |  0 1 2 3 4 5 6 7 9 10 11 12 13 14 15 17 18 19 20 21 22 23 25 26 27 28 29 30 31 0 0 0
14 |  0 1 2 3 4 5 6 7 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 25 26 27 28 29 30 31 0 0
15 |  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 0
16 |  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
17 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/kernels/_bitrev/__init__.py:
--------------------------------------------------------------------------------
 1 | from numba import jit
 2 | 
 3 | 
 4 | @jit(nopython=True)
 5 | def bitrev(inval, nbits):
 6 |     r"""
 7 |     This function bit-reverses the given value "inval" with the number of bits, "nbits".
 8 |     
 9 |     Parameters
10 |     ----------
11 |     inval : int
12 |       Number to be bit-reversed.
13 |     nbits : int
14 |       The length of inval in bits. If user only wants the bit-reverse of a certain amount of bits of
15 |       inval, nbits is the amount of bits to be reversed counting from the least significant (rightmost)
16 |       bit. Any bits beyond this length will not be reversed and will be truncated from the result.
17 |     
18 |     Returns
19 |     -------
20 |     : int
21 |       The bit-reverse of inval. If there are more significant bits beyond nbits, they are truncated.
22 | 
23 |     References
24 |     ----------
25 |     - R. Ramachandran, 10-Nov-97, nfra. -- Original C implementation.
26 |     - H. Chen, 2014 -- Python version.
27 |     - R. Elkins (texadactyl), 2020 -- Speedup.
28 | 
29 |     """
30 |     if nbits <= 1:
31 |         ibitr = inval
32 |     else:
33 |         ifact = 2**(nbits - 1)
34 |         k = inval
35 |         ibitr = (1 & k) * ifact
36 |         for _ in range(2, nbits+1):
37 |             k = k >> 1
38 |             ifact = ifact >> 1
39 |             if 1 & k:
40 |                 ibitr += ifact
41 |     return ibitr


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/kernels/_hitsearch/kernels.cu:
--------------------------------------------------------------------------------
 1 | extern "C" __global__
 2 | void hitsearch_float64(const int n, const double* spectrum, const double threshold, const double drift_rate,
 3 |         double* maxsnr, double* maxdrift, unsigned int* tot_hits, const float median, const float stddev) {
 4 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 5 |     int stride = blockDim.x * gridDim.x;
 6 |     int count = 0;
 7 |     for (int i = index; i < n; i += stride) {
 8 |         const double bin = (spectrum[i] - median) / stddev;
 9 |         if (bin > threshold) {
10 |             count++;
11 |             if (bin > maxsnr[i]) {
12 |                 maxsnr[i] = bin;
13 |                 maxdrift[i] = drift_rate;
14 |             }
15 |         }
16 |     }
17 |     atomicAdd(&tot_hits[0], count);
18 | }
19 | 
20 | extern "C" __global__
21 | void hitsearch_float32(const int n, const float* spectrum, const double threshold, const double drift_rate,
22 |         float* maxsnr, float* maxdrift, unsigned int* tot_hits, const float median, const float stddev) {
23 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
24 |     int stride = blockDim.x * gridDim.x;
25 |     int count = 0;
26 |     for (int i = index; i < n; i += stride) {
27 |         const double bin = (spectrum[i] - median) / stddev;
28 |         if (bin > threshold) {
29 |             count++;
30 |             if (bin > maxsnr[i]) {
31 |                 maxsnr[i] = bin;
32 |                 maxdrift[i] = drift_rate;
33 |             }
34 |         }
35 |     }
36 |     atomicAdd(&tot_hits[0], count);
37 | }
38 | 


--------------------------------------------------------------------------------
/docs/find_event.rst:
--------------------------------------------------------------------------------
 1 | De-Doppler Analysis
 2 | ===================
 3 | 
 4 | In this code, the following terminology is used:
 5 |     - Hit: Single strong narrowband signal in an observation.
 6 |     - Event: Strong narrowband signal that is associated with multiple hits
 7 |       across ON observations.
 8 | 
 9 | .. note::
10 |    This code works for .dat files that were produced by seti_event.py
11 |    after turboSETI version 0.8.2, and blimpy version 1.1.7 (~mid 2019). The 
12 |    drift rates *before* that version were recorded with the incorrect sign
13 |    and thus the drift rate sign would need to be flipped in the make_table 
14 |    function.
15 | 
16 | Authors
17 | -------
18 | - Version 2.0 - Sofia Sheikh (ssheikhmsa@gmail.com) and Karen Perez (kip2105@columbia.edu)
19 | - Version 1.0 - Emilio Enriquez (jeenriquez@gmail.com)
20 | 
21 | plotSETI Command Main Program
22 | -----------------------------
23 | 
24 | .. automodule:: turbo_seti.find_event.run_pipelines
25 |    :members:
26 | 
27 | Find Event Pipeline
28 | -------------------
29 | 
30 | .. automodule:: turbo_seti.find_event.find_event_pipeline
31 |    :members:
32 | 
33 | Find Event
34 | ----------
35 | 
36 | .. automodule:: turbo_seti.find_event.find_event
37 |    :members:
38 | 
39 | Plot DAT
40 | --------
41 | 
42 | .. automodule:: turbo_seti.find_event.plot_dat
43 |    :members:
44 | 
45 | Plot Event Pipeline
46 | -------------------
47 | 
48 | .. automodule:: turbo_seti.find_event.plot_event_pipeline
49 |    :members:
50 | 
51 | Plot Event
52 | ----------
53 | 
54 | .. automodule:: turbo_seti.find_event.plot_event
55 |    :members:
56 | 


--------------------------------------------------------------------------------
/test/test_plot_dat.py:
--------------------------------------------------------------------------------
 1 | r'''test_plot_dat - test plotting of a DAT file.
 2 | NOTE:  This source file uses data downloaded by test_pipelines_1.py
 3 | '''
 4 | 
 5 | from shutil import rmtree
 6 | from os import mkdir
 7 | import glob
 8 | from tempfile import gettempdir
 9 | from turbo_seti.find_event import plot_dat
10 | 
11 | TEMPDIR = gettempdir() + '/pipeline_testing/'
12 | PLOTDIR = TEMPDIR + 'plots/'
13 | h5_list = sorted(glob.glob(TEMPDIR + 'single*.h5'))
14 | dat_list = sorted(glob.glob(TEMPDIR + 'single*.dat'))
15 | H5_LIST_FILE  = TEMPDIR + 'h5_files.lst'
16 | DAT_LIST_FILE = TEMPDIR + 'dat_files.lst'
17 | PATH_CSVF = TEMPDIR + 'found_event_table.csv'
18 | 
19 | def test_plot_dat():
20 | 
21 |     rmtree(PLOTDIR, ignore_errors=True)
22 |     mkdir(PLOTDIR)
23 | 
24 |     # test default settings, will produce six plots, all candidates
25 |     plot_dat.plot_dat(DAT_LIST_FILE, H5_LIST_FILE, PATH_CSVF)
26 | 
27 |     # will produce no plots
28 |     plot_dat.plot_dat(DAT_LIST_FILE, H5_LIST_FILE, PATH_CSVF, outdir=PLOTDIR, 
29 |                         alpha=0.5, window=(8418.542731-793e-6, 8418.542731+793e-6))
30 | 
31 |     # will produce three candidate plots
32 |     plot_dat.plot_dat(DAT_LIST_FILE, H5_LIST_FILE, PATH_CSVF, outdir=PLOTDIR, 
33 |                         alpha=0.5, window=(8419.542731-793e-6, 8419.542731+793e-6))
34 | 
35 |     # will produce one plot, no candidates
36 |     plot_dat.plot_dat(DAT_LIST_FILE, H5_LIST_FILE, PATH_CSVF, outdir=PLOTDIR, 
37 |                         window=(8419.519896-2.5e-3, 8419.519896+2.5e-3))
38 | 
39 | if __name__ == "__main__":
40 |     test_plot_dat()
41 | 


--------------------------------------------------------------------------------
/test/test_dat_filter.py:
--------------------------------------------------------------------------------
 1 | r'''
 2 | Test dat_filter.
 3 | '''
 4 | 
 5 | from tempfile import gettempdir
 6 | import shutil
 7 | import pandas as pd
 8 | from pandas.errors import EmptyDataError
 9 | from turbo_seti.find_event import dat_filter
10 | 
11 | TESTDIR = gettempdir() + '/pipeline_testing/'
12 | TESTDAT = TESTDIR + "TESTDAT_filter.dat"
13 | SEP = r"\s+"
14 | 
15 | 
16 | def execute_one(counter, args):
17 |     print("\n====TESTDAT_filter [{}]================== args: {}".format(counter, args))
18 | 
19 |     shutil.copyfile(TESTDIR + "single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.dat", TESTDAT)
20 |     rc = dat_filter.main(args)
21 |     print("\n====TESTDAT_filter [{}]================== rc: {}".format(counter, rc))
22 |     assert rc == 0
23 |     try:
24 |         df = pd.read_csv(TESTDAT, header=None, sep=SEP, engine="python", comment="#")
25 |     except EmptyDataError:
26 |         # Create empty dataframe.
27 |         df = pd.DataFrame()
28 |     return len(df)
29 | 
30 | 
31 | def test_dat_filter():
32 |     print('\n===== TESTDAT_filter: BEGIN =====')
33 | 
34 |     args = ["-s", "10", "-m", "0.1", "-M", "0.4", TESTDAT]
35 |     len_df = execute_one(1, args)
36 |     assert len_df == 3
37 | 
38 |     args = ["-m", "0.36", TESTDAT]
39 |     len_df = execute_one(2, args)
40 |     assert len_df == 2
41 | 
42 |     args = ["-M", "0.34", TESTDAT]
43 |     len_df = execute_one(3, args)
44 |     assert len_df == 0
45 | 
46 |     args = ["-s", "100", TESTDAT]
47 |     len_df = execute_one(3, args)
48 |     assert len_df == 1
49 | 
50 |     print('\n===== TESTDAT_filter: END =====')
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     test_dat_filter()
55 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/kernels/_hitsearch/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import cupy as cp
 4 | 
 5 | kernels_file = os.path.join(os.path.dirname(__file__), 'kernels.cu')
 6 | 
 7 | with open(kernels_file, 'r') as f:
 8 |     kernels = f.read()
 9 | 
10 | _hitsearch_float64 = cp.RawKernel(kernels, 'hitsearch_float64')
11 | _hitsearch_float32 = cp.RawKernel(kernels, 'hitsearch_float32')
12 | 
13 | 
14 | def hitsearch(numBlocks, blockSize, call):
15 |     r"""
16 |     Performs hitsearch on the GPU with CUDA. Automatically chooses
17 |     the right floating point precision based on the kernel configuration.
18 | 
19 |     Parameters
20 |     ----------
21 |     numBlocks : tuple
22 |         CUDA Kernel number of blocks.
23 |     blockSize : tuple
24 |         CUDA Kernel block size.
25 |     call : [int, ndarray, float, float, ndarray, ndarray, ndarray, float, float]
26 |         Tuple of parameters required by `hitsearch`.
27 | 
28 |     """
29 | 
30 |     try:
31 |         assert isinstance(call[0], int)
32 |         assert isinstance(call[1], cp.ndarray)
33 |         assert isinstance(call[2], float)
34 |         assert isinstance(call[3], float)
35 |         assert isinstance(call[4], cp.ndarray)
36 |         assert isinstance(call[5], cp.ndarray)
37 |         assert isinstance(call[6], cp.ndarray)
38 |         assert isinstance(call[7], np.float32)
39 |         assert isinstance(call[8], np.float32)
40 |     except:
41 |         raise ValueError("Check the `call` types of the `hitsearch` method.")
42 | 
43 |     if call[1].dtype == cp.float64:
44 |         _hitsearch_float64(numBlocks, blockSize, call)
45 |     if call[1].dtype == cp.float32:
46 |         _hitsearch_float32(numBlocks, blockSize, call)
47 | 


--------------------------------------------------------------------------------
/.github/workflows/python_tests.yml:
--------------------------------------------------------------------------------
 1 | name: Test TurboSETI
 2 | 
 3 | on:
 4 |   push:
 5 |     paths-ignore:
 6 |       - '**.ipynb'
 7 |       - '**.png'
 8 |       - '**.rst'
 9 |       - '**.md'
10 |   pull_request:
11 |     paths-ignore:
12 |       - '**.ipynb'
13 |       - '**.png'
14 |       - '**.rst'
15 |       - '**.md'
16 | 
17 | jobs:
18 |   build:
19 | 
20 |     runs-on: ubuntu-latest
21 |     strategy:
22 |       matrix:
23 |         python-verison: [3.7, 3.8, 3.9]
24 | 
25 |     steps:
26 |       - uses: actions/checkout@v2
27 |       - name: Set up Python ${{ matrix.python-version }}
28 |         uses: actions/setup-python@v2
29 |         with:
30 |           python-version: ${{ matrix.python-version }}
31 |       - name: Install system dependencies
32 |         run: |
33 |           sudo apt update
34 |           cat dependencies.txt | sudo xargs -n 1 apt install -y
35 |       - name: Install dependencies
36 |         run: |
37 |           python3 -m pip install --upgrade pip
38 |           python3 -m pip install git+https://github.com/UCBerkeleySETI/blimpy
39 |           python3 -m pip install -r requirements.txt
40 |           python3 -m pip install -r requirements_test.txt
41 |       - name: Download test files
42 |         run: |
43 |           export PATH=/home/runner/.local/bin:$PATH
44 |           cd test
45 |           python3 download_test_data.py
46 |           cd ..
47 |       - name: Run coverage test
48 |         run: |
49 |           export PATH=/home/runner/.local/bin:$PATH
50 |           python3 -m pytest --cov=./ --cov-report=xml
51 |       - name: Upload coverage to Codecov
52 |         uses: codecov/codecov-action@v1
53 |         with:
54 |           token: ${{ secrets.CODECOV_TOKEN }}
55 |           name: turboseti-codecov-p${{ matrix.python-version }}
56 | 
57 | 


--------------------------------------------------------------------------------
/docs/find_doppler.rst:
--------------------------------------------------------------------------------
 1 | De-Doppler Search
 2 | =================
 3 | 
 4 | turboSETI Command Main Program
 5 | ------------------------------
 6 | .. automodule:: turbo_seti.find_doppler.seti_event
 7 |     :members:
 8 | 
 9 | Find Doppler
10 | ------------
11 | 
12 | .. automodule:: turbo_seti.find_doppler.find_doppler
13 |    :members:
14 | 
15 | Data Handler
16 | ------------
17 | 
18 | .. automodule:: turbo_seti.find_doppler.data_handler
19 |    :members:
20 | 
21 | File Writers
22 | ------------
23 | 
24 | .. automodule:: turbo_seti.find_doppler.file_writers
25 |    :members:
26 | 
27 | Kernels
28 | -------
29 | 
30 | .. automodule:: turbo_seti.find_doppler.kernels
31 |    :members:
32 | 
33 |    Hitsearch
34 |    ----------
35 |    This kernel implements a GPU accelerated version of the :func:`~turbo_seti.find_doppler.find_doppler.hitsearch`
36 |    method written as a RAW CUDA kernel.
37 | 
38 |    .. automodule:: turbo_seti.find_doppler.kernels._hitsearch
39 |       :members:
40 | 
41 |    De-Doppler
42 |    -----------
43 |    This kernel implements a slightly modified version of the Taylor Tree algorithm
44 |    `published <http://articles.adsabs.harvard.edu/pdf/1974A%26AS...15..367T>`_ by J.H. Taylor in 1974.
45 | 
46 |       1. This GPU implementation is based on `Cupy <https://cupy.dev/>`_ array library accelerated with CUDA and ROCm.
47 | 
48 |       .. automodule:: turbo_seti.find_doppler.kernels._taylor_tree._core_cuda
49 |          :members:
50 | 
51 |       2. This CPU implementation is based on `Numba <https://numba.pydata.org/>`_ Just-In-Time compilation.
52 | 
53 |       .. automodule:: turbo_seti.find_doppler.kernels._taylor_tree._core_numba
54 |          :members:
55 | 
56 | Helper Functions
57 | ----------------
58 | 
59 | .. automodule:: turbo_seti.find_doppler.helper_functions
60 |     :members:
61 | 
62 | Merge DAT and LOG Files
63 | -----------------------
64 | .. automodule:: turbo_seti.find_doppler.merge_dats_logs
65 |     :members:
66 | 
67 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | __version__ = "2.3.2"
 4 | 
 5 | with open("turbo_seti/find_doppler/turbo_seti_version.py", "w") as fh:
 6 |     fh.write("TURBO_SETI_VERSION = '{}'\n".format(__version__))
 7 | 
 8 | with open("README.md", "r") as fh:
 9 |     long_description = fh.read()
10 | 
11 | with open("requirements.txt", "r") as fh:
12 |     install_requires = fh.readlines()
13 | 
14 | with open("requirements_test.txt", "r") as fh:
15 |     test_requirements = fh.readlines()
16 | 
17 | entry_points = {
18 |     "console_scripts": [
19 |         "turboSETI  = turbo_seti.find_doppler.seti_event:main",
20 |         "dat_diff   = turbo_seti.find_doppler.dat_diff:main",
21 |         "plotSETI   = turbo_seti.find_event.run_pipelines:main",
22 |         "dat_filter = turbo_seti.find_event.dat_filter:main",
23 |     ]
24 | }
25 | 
26 | package_data = {"turbo_seti": ["drift_indexes/*.txt", "find_doppler/kernels/**/*.cu"]}
27 | 
28 | setup(
29 |     name="turbo_seti",
30 |     version=__version__,
31 |     packages=find_packages(),
32 |     package_data=package_data,
33 |     include_package_data=True,
34 |     install_requires=install_requires,
35 |     tests_require=test_requirements,
36 |     entry_points=entry_points,
37 |     author="Emilio Enriquez",
38 |     author_email="e.enriquez@berkeley.edu",
39 |     description="Analysis tool for the search of narrow band drifting signals in filterbank data",
40 |     long_description=long_description,
41 |     long_description_content_type="text/markdown",
42 |     license="MIT License",
43 |     keywords="astronomy",
44 |     url="https://github.com/UCBerkeleySETI/turbo_seti",
45 |     zip_safe=False,
46 |     options={"bdist_wheel": {"universal": "1"}},
47 |     classifiers=[
48 |         "Development Status :: 4 - Beta",
49 |         "Intended Audience :: Science/Research",
50 |         "Programming Language :: Python :: 3",
51 |         "Topic :: Scientific/Engineering",
52 |     ],
53 | )
54 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/kernels/_taylor_tree/_core_numba.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numba import jit
 3 | 
 4 | from turbo_seti.find_doppler.kernels._bitrev import bitrev
 5 | 
 6 | 
 7 | @jit(nopython=True)
 8 | def flt(outbuf, nchn):
 9 |     """
10 |     This is a function to Taylor-tree-sum a data stream. It assumes that
11 |     the arrangement of data stream is, all points in first spectra, all
12 |     points in second spectra, etc. Data are summed across time.
13 | 
14 |     Parameters
15 |     ----------
16 |     outbuf : array_like
17 |         Input data array, replaced by dedispersed data at the output.
18 |     nchn : int
19 |         Number of timesteps in the data.
20 | 
21 |     References
22 |     ----------
23 |     - R. Ramachandran, 07-Nov-97, nfra. -- Original algorithm.
24 |     - A. Siemion, 2011 -- float/64 bit addressing (C-code)
25 |     - H. Chen, 2014 -- python version
26 |     - E. Enriquez + P.Schellart, 2016 -- cython version
27 |     - L. Cruz, 2020 -- numba version
28 | 
29 |     """
30 |     mlen = len(outbuf)
31 |     nsamp = (mlen / nchn) - (2 * nchn)
32 |     npts = nsamp + nchn
33 |     nstages = int(np.log2(nchn))
34 |     ndat1 = nsamp + 2 * nchn
35 |     nmem = 1
36 | 
37 |     for istages in range(0, nstages):
38 |         nmem *= 2
39 |         nsec1 = int(nchn / nmem)
40 |         nmem2 = nmem - 2
41 | 
42 |         for isec in range(0, nsec1):
43 |             ndelay = -1
44 |             koff = isec * nmem
45 |             for ipair in range(0, nmem2 + 1, 2):
46 |                 ioff1 = int((bitrev(ipair, istages + 1) + koff) * ndat1)
47 |                 i2 = int((bitrev(ipair + 1, istages + 1) + koff) * ndat1)
48 |                 ndelay += 1
49 |                 ndelay2 = ndelay + 1
50 |                 nfin = int(npts + ioff1)
51 | 
52 |                 for i1 in range(ioff1, nfin):
53 |                     itemp = outbuf[i1] + outbuf[i2 + ndelay]
54 |                     outbuf[i2] = outbuf[i1] + outbuf[i2 + ndelay2]
55 |                     outbuf[i1] = itemp
56 |                     i2 += 1
57 | 
58 |     return outbuf
59 | 


--------------------------------------------------------------------------------
/test/run_benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | FILE=blc3_2bit_guppi_57386_VOYAGER1_0002.gpuspec.0000.fil
 4 | if test ! -f "$FILE"; then
 5 |     echo "$FILE not found downloading it..."
 6 |     URL=http://blpd0.ssl.berkeley.edu/voyager_2bit/$FILE
 7 | 
 8 |     if hash aria2c 2>/dev/null
 9 |     then
10 |         aria2c -x 8 -s 8 $URL
11 |     else
12 |         wget $URL
13 |     fi
14 | fi
15 | 
16 | FILE=blc07_guppi_57650_67573_Voyager1_0002.gpuspec.0000.fil
17 | if test ! -f "$FILE"; then
18 |     echo "$FILE not found downloading it..."
19 |     URL=http://blpd0.ssl.berkeley.edu/voyager_8bit/$FILE
20 | 
21 |     if hash aria2c 2>/dev/null
22 |     then
23 |         aria2c -x 8 -s 8 $URL
24 |     else
25 |         wget $URL
26 |     fi
27 | fi
28 | 
29 | FILE=blc3_2bit_guppi_57386_VOYAGER1_0002.gpuspec.0000
30 | if test ! -f "$FILE.h5"; then
31 |     echo "Generating $FILE"
32 |     fil2h5 $FILE.fil
33 | fi
34 | 
35 | FILE=blc07_guppi_57650_67573_Voyager1_0002.gpuspec.0000
36 | if test ! -f "$FILE.h5"; then
37 |     echo "Generating $FILE"
38 |     fil2h5 $FILE.fil
39 | fi
40 | 
41 | echo "====> [BENCHMARK] GPU DOUBLE PRECISION"
42 | turboSETI Voyager1.single_coarse.fine_res.h5 -g y -S n -P n
43 | turboSETI blc3_2bit_guppi_57386_VOYAGER1_0002.gpuspec.0000.h5  -g y -S n -P n
44 | turboSETI blc07_guppi_57650_67573_Voyager1_0002.gpuspec.0000.h5  -g y -S n -P n
45 | 
46 | echo "====> [BENCHMARK] GPU SINGLE PRECISION"
47 | turboSETI Voyager1.single_coarse.fine_res.h5 -g y -S y -P n
48 | turboSETI blc3_2bit_guppi_57386_VOYAGER1_0002.gpuspec.0000.h5  -g y -S y -P n
49 | turboSETI blc07_guppi_57650_67573_Voyager1_0002.gpuspec.0000.h5  -g y -S y -P n
50 | 
51 | echo "====> [BENCHMARK] CPU DOUBLE PRECISION"
52 | turboSETI Voyager1.single_coarse.fine_res.h5 -g n -S n -P n
53 | turboSETI blc3_2bit_guppi_57386_VOYAGER1_0002.gpuspec.0000.h5  -g n -S n -P n
54 | turboSETI blc07_guppi_57650_67573_Voyager1_0002.gpuspec.0000.h5  -g n -S n -P n
55 | 
56 | echo "====> [BENCHMARK] CPU SINGLE PRECISION"
57 | turboSETI Voyager1.single_coarse.fine_res.h5 -g n -S y -P n
58 | turboSETI blc3_2bit_guppi_57386_VOYAGER1_0002.gpuspec.0000.h5  -g n -S y -P n
59 | turboSETI blc07_guppi_57650_67573_Voyager1_0002.gpuspec.0000.h5  -g n -S y -P n


--------------------------------------------------------------------------------
/gen_drift_indexes/ancient_history.py.txt:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | # cython: profile=True
 3 | import numpy as np
 4 | from math import *
 5 | import sys
 6 | from libc.math cimport log2
 7 | cimport numpy as np
 8 | cimport cython
 9 | from cython.view cimport array as cvarray
10 | 
11 | def calc_drift_indexes(n):
12 |     fftlen = 16
13 |     tsteps = 2**n
14 |     tsteps_valid = tsteps
15 |     tdwidth = fftlen + 8*tsteps
16 | 
17 |     tree_dedoppler = np.zeros([tsteps, tdwidth], dtype=np.float32)
18 |     #print tree_dedoppler.shape
19 | 
20 |     for i in range(0, tsteps):
21 |         tree_dedoppler[tsteps_valid-1, i] = i
22 | 
23 |    # print tree_dedoppler[:, 0:20]
24 |     cdef float [:] tree_dedoppler_view = tree_dedoppler.reshape(tsteps*tdwidth)
25 | 
26 |     myrecord = taylor_flt_record(tree_dedoppler_view, tsteps*tdwidth, tsteps)
27 |     test_matrix = np.asarray(tree_dedoppler_view)
28 |     test_matrix = test_matrix.reshape((tsteps, tdwidth))
29 | 
30 |     #print 'Comparing to the original array...\n'
31 |     #for i in range(0, tsteps):
32 |     #    for j in range(0, tdwidth):
33 |     #        print '%d\t'%tree_dedoppler[i, j],
34 |     #    print ' '
35 | 
36 | 
37 |     ibrev = np.zeros(tsteps, dtype='int32')
38 |     drift_indexes_array = np.zeros([tsteps/2 ,tsteps], dtype='int32')
39 | 
40 |     for i in range(0, tsteps):
41 |         ibrev[i] = bitrev(i, int(np.log2(tsteps)))
42 | 
43 |     test_matrix = test_matrix.reshape(tdwidth*tsteps)
44 | 
45 |     k = -1
46 |     test_array = np.zeros(tsteps, dtype=np.int32)
47 |     
48 |     nstages = int(np.log2(tsteps))
49 |     recordbook = myrecord['stage%d'%(nstages-1)]
50 |     for i in range(tsteps/2, tsteps): # here, i -> tsteps_valid -1
51 |         for j in range(0, tsteps):
52 |             ikey = 'row%d_col0'%j
53 |             test_array[j] = recordbook[ikey][i][1]
54 |         #print 'tsteps_valid:\t', i+1
55 |         #print 'first column:\t', test_array
56 |         for j in range(0, tsteps):
57 |             #print "De-doppler rate: %f Hz/sec\n"%i
58 |             indx  = ibrev[j]
59 |             if test_array[indx] != k:
60 |                 k = test_array[indx]
61 |                 drift_indexes_array[i-(tsteps/2)][k]=j
62 |         #print "time index: %02d Sum: %02f"%(i, test_matrix[indx+j])
63 |         #print "drift_indexes[%d] = %d\n"%(k, i)
64 |     #print drift_indexes
65 | 
66 |     np.save('drift_indexes_array_%d'%n, drift_indexes_array)
67 | 


--------------------------------------------------------------------------------
/test/test_pipelines_4.py:
--------------------------------------------------------------------------------
 1 | r'''
 2 | test_pipelines_4.py
 3 | 
 4 | Test plotSETI.
 5 | '''
 6 | 
 7 | import os
 8 | import shutil
 9 | from tempfile import gettempdir
10 | import pytest
11 | from turbo_seti import run_pipelines
12 | 
13 | TESTDIR = gettempdir() + '/pipeline_testing/'
14 | PLOTDIR = TESTDIR + 'plots/'
15 | 
16 | 
17 | def execute_one(counter, args):
18 |     print("\n====test_pipelines_4 [{}]================== args: {}".format(counter, args))
19 |     rc = run_pipelines.main(args)
20 |     print("\n====test_pipelines_4 [{}]================== rc: {}".format(counter, rc))
21 |     return rc
22 | 
23 | 
24 | @pytest.mark.order(2)
25 | def test_pipelines_4a():
26 |     print('\n===== test_pipelines_4: BEGIN =====')
27 |     
28 |     args = [TESTDIR, "-o", PLOTDIR, "-f", "1", "-s", "25.0", "-c", "on"]
29 |     rc = execute_one(1, args)
30 |     assert(rc == 0)
31 | 
32 |     args = [TESTDIR, "-o", PLOTDIR, "-f", "2", "-s", "25.0", "-c", "on"]
33 |     rc = execute_one(2, args)
34 |     assert(rc == 0)
35 | 
36 |     args = [TESTDIR, "-o", PLOTDIR, "-f", "3", "-s", "25.0", "-c", "on"]
37 |     rc = execute_one(3, args)
38 |     assert(rc == 0)
39 | 
40 |     args = [TESTDIR, "-o", PLOTDIR, "-f", "3", "-m", "0.1", "-c", "on"]
41 |     rc = execute_one(4, args)
42 |     assert(rc == 0)
43 | 
44 |     args = [TESTDIR, "-o", PLOTDIR, "-f", "2", "-M", "0.4", "-c", "on"]
45 |     rc = execute_one(5, args)
46 |     assert(rc == 0)
47 | 
48 |     args = [TESTDIR, "-o", PLOTDIR, "-c", "off"]
49 |     rc = execute_one(6, args)
50 |     assert(rc != 0)
51 | 
52 |     args = [TESTDIR, "-o", PLOTDIR, "-c", "complex", "-n", "Rubbish"]
53 |     rc = execute_one(7, args)
54 |     assert(rc != 0)
55 | 
56 |     args = [TESTDIR, "-o", PLOTDIR, "-f", "2", "-c", "complex", "-n", "VOYAGER-1"]
57 |     rc = execute_one(8, args)
58 |     assert(rc == 0)
59 | 
60 |     print('\n===== test_pipelines_4: END =====')
61 | 
62 | @pytest.mark.order(2)
63 | def test_pipelines_4b():
64 | 
65 |     # --h5dat_lists
66 |     LISTDIR = TESTDIR + "/my_lists"
67 |     if not os.path.exists(LISTDIR):
68 |         os.mkdir(LISTDIR)
69 |     LISTH5 = "h5_files.lst"
70 |     LISTDAT = "dat_files.lst"
71 |     shutil.copyfile(TESTDIR + "/" + LISTH5, LISTDIR + "/" + LISTH5)
72 |     shutil.copyfile(TESTDIR + "/" + LISTDAT, LISTDIR + "/" + LISTDAT)
73 |     args = [TESTDIR, "-o", PLOTDIR, "-f", "2", "-M", "0.4",
74 |             "--h5dat_lists", LISTDIR + "/" + LISTH5, LISTDIR + "/" + LISTDAT]
75 |     rc = execute_one(42, args)
76 |     assert(rc == 0)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     #test_pipelines_4a()
81 |     test_pipelines_4b()
82 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('..'))
16 | 
17 | autoclass_content = 'both'
18 | 
19 | # -- Project information -----------------------------------------------------
20 | 
21 | project = 'turbo_seti'
22 | copyright = '2020, Breakthrough Listen'
23 | author = 'E. Enriquez et. al.'
24 | 
25 | 
26 | # -- General configuration ---------------------------------------------------
27 | 
28 | # Add any Sphinx extension module names here, as strings. They can be
29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
30 | # ones.
31 | extensions = [
32 |     'recommonmark',
33 |     'sphinx.ext.autodoc',
34 |     'sphinx.ext.coverage',
35 |     'sphinx.ext.napoleon',
36 |     'sphinx.ext.viewcode',
37 | ]
38 | 
39 | # Add any paths that contain templates here, relative to this directory.
40 | templates_path = ['_templates']
41 | 
42 | # List of patterns, relative to source directory, that match files and
43 | # directories to ignore when looking for source files.
44 | # This pattern also affects html_static_path and html_extra_path.
45 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
46 | 
47 | 
48 | # -- Options for HTML output -------------------------------------------------
49 | 
50 | # The theme to use for HTML and HTML Help pages.  See the documentation for
51 | # a list of builtin themes.
52 | #
53 | html_theme = "sphinx_rtd_theme"
54 | html_theme_options = {
55 |     'logo_only': False,
56 |     'display_version': False,
57 |     'prev_next_buttons_location': 'bottom',
58 |     'style_external_links': False,
59 |     # Toc options
60 |     'collapse_navigation': True,
61 |     'sticky_navigation': True,
62 |     'navigation_depth': 4,
63 |     'includehidden': True,
64 |     'titles_only': False
65 | }
66 | 
67 | # Add any paths that contain custom static files (such as style sheets) here,
68 | # relative to this directory. They are copied after the builtin static files,
69 | # so a file named "default.css" will overwrite the builtin "default.css".
70 | #DELETED# html_static_path = ['_static']
71 | 
72 | 


--------------------------------------------------------------------------------
/test/fb_genref.py:
--------------------------------------------------------------------------------
 1 | r'''
 2 | Package turbo_seti
 3 | test/fb_genref.py
 4 | 
 5 | Generate a reference file for subsequent use by test_fb_cases.py.
 6 | '''
 7 | 
 8 | from os import makedirs
 9 | from tempfile import gettempdir
10 | from shutil import rmtree
11 | import time
12 | import numpy as np
13 | from fb_cases_def import THE_MEANING_OF_LIFE, HERE, MIN_SNR
14 | from fb_cases_util import generate_fil_file, make_one_dat_file, get_case_results
15 | 
16 | TESTDIR = '{}/{}/'.format(gettempdir(), 'test_fb_cases')
17 | PATH_FIL_FILE = TESTDIR + 'abc.fil'
18 | PATH_REF = HERE + '/fb_dat_reference.txt'
19 | MAX_DRIFT = 5
20 | 
21 | 
22 | def add_one(arg_case_num, arg_fh, arg_fdir, arg_drsign):
23 |     r'''Add one case to the reference file'''
24 |     print('fb_genref: Case {} ...'.format(arg_case_num))
25 |     generate_fil_file(PATH_FIL_FILE, arg_fdir, arg_drsign)
26 |     make_one_dat_file(PATH_FIL_FILE, max_drift=MAX_DRIFT, min_snr=MIN_SNR)
27 |     path_dat_file = PATH_FIL_FILE.replace('.fil', '.dat')
28 |     obs_tophit_1, obs_tophit_2 = get_case_results(path_dat_file)
29 |     arg_fh.write('#\n')
30 |     arg_fh.write('# Case {}: frequency {}, drift rate {}\n'
31 |                  .format(arg_case_num, arg_fdir, arg_drsign))
32 |     FMT_RECORD = '{}       {}     {}          {}        {}      {}    {}    {}\n'
33 | 
34 |     record_1 = FMT_RECORD \
35 |         .format(arg_case_num, arg_fdir, arg_drsign, obs_tophit_1.tophit_id, obs_tophit_1.drate,
36 |                 obs_tophit_1.snr, obs_tophit_1.freq, obs_tophit_1.index)
37 |     arg_fh.write(record_1)
38 | 
39 |     record_2 = FMT_RECORD \
40 |         .format(arg_case_num, arg_fdir, arg_drsign, obs_tophit_2.tophit_id, obs_tophit_2.drate,
41 |                 obs_tophit_2.snr, obs_tophit_2.freq, obs_tophit_2.index)
42 |     arg_fh.write(record_2)
43 | 
44 | 
45 | rmtree(TESTDIR, ignore_errors=True)
46 | makedirs(TESTDIR, exist_ok=True)
47 | np.random.seed(THE_MEANING_OF_LIFE) # setigen uses this.
48 | t1 = time.time()
49 | print('fb_genref: Begin generating {}'.format(PATH_REF))
50 | 
51 | with open(PATH_REF, 'w') as file_handle:
52 |     file_handle.write('CaseNr  fdir  drsign     tophit   drate     snr            freq         index\n')
53 |     file_handle.write('#=====  ====  ======     ======   =====     ===            ====         =====\n')
54 |     add_one(1, file_handle, +1, +1)
55 |     add_one(2, file_handle, +1, -1)
56 |     add_one(3, file_handle, -1, +1)
57 |     add_one(4, file_handle, -1, -1)
58 |     file_handle.write('#\n')
59 |     now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
60 |     file_handle.write('#---------END {} ---------------------------\n'.format(now))
61 |     file_handle.close()
62 | 
63 | et = (time.time() - t1) / 60.0
64 | print('fb_genref: Created {}, elapsed time = {:.2f} min'.format(PATH_REF, et))
65 | 


--------------------------------------------------------------------------------
/tutorial/README.md:
--------------------------------------------------------------------------------
 1 | ### Usage of the turbo_seti tutorials ###
 2 | 
 3 | The turbo_seti package is a Python tool, used and developed by SETI researchers at the Berkeley SETI Research Center. The turbo_seti search algorithm looks for narrow band signals that have a doppler drift, a feature expected from an alien source with a non-zero acceleration relative to our receivers on Earth. 
 4 | 
 5 | We'll search some HDF5 files that have been condensed into a single coarse channel and are routinely used for testing the code. They are located here: http://blpd14.ssl.berkeley.edu/voyager_2020/single_coarse_channel/ (total download size of 288 MB).
 6 | 
 7 | Typical SETI searches, such as described by https://arxiv.org/pdf/1906.07750.pdf, have used drift rates of up to ±4 Hz/s and a minimum signal-to-noise ratio of 10. We'll adapt those parameters for these tutorials.
 8 | 
 9 | There are 2 methods of executing a turbo_seti search processing after package installation:
10 | 1) Run the ```turboSETI``` executable at the bash (or Windows equivalent) command line in a terminal window.
11 | 2) Develop and run your own Python program which leverages the ```FindDoppler``` class and its functions.
12 | 
13 | Contained herein are two tutorials that display both methods:
14 | * ```tutorial_1.ipynb``` - The simplest approach for searching and analyzing candidate events.  This is the recommended approach for most scientific work.
15 | * ```tutorial_2.ipynb``` - Functionally equivalent to the first tutorial and exposes details of the event pipeline functions after the search completes.  This is a bit more complex model for use on a daily basis.
16 | 
17 | Both tutorials require the execution of an initialization notebook before using them: ```initialise.ipynb```.  This will download a set of 6 Voyager 2020 HDF5 files into a directory called "turbo_seti_data" under the user's home directory.
18 | 
19 | After the files are downloaded, we'll need to set up a conda environment by running the following commands in the bash terminal (or Windows equivalent):
20 | ```
21 | $ conda deactivate
22 | $ conda create -n turboseti # The string "turboseti" after -n is arbitrary.  The name of your environment can be any of your choosing.
23 | $ conda activate turboseti
24 | $ conda install pip
25 | ```
26 | 
27 | Now let's install the required packages:
28 | ```
29 | $ python3 -m pip install -U blimpy      # <--- used by turbo_seti for Filterbank file access
30 | $ python3 -m pip install -U turbo_seti  # <--- the latest stable turbo_seti
31 | ```
32 | 
33 | If you are trying out a new experimental version of turbo_seti, then the turbo_seti install step is a little different.  For example,
34 | ```
35 | $ python3 -m pip install -U git+https://github.com/texadactyl/turbo_seti
36 | ```
37 | 
38 | Now we need to install into the turboseti environment as an IPython kernel, so we can use it in Jupyter:
39 | ```
40 | $ conda install -c anaconda ipykernel
41 | $ python -m ipykernel install --user --name=turboseti
42 | ```
43 | A ready-to-use version of turboseti is now installed. You will have to restart Jupyter to see this kernel and be able to switch over (a quick refresh of the webpage should work).
44 | 


--------------------------------------------------------------------------------
/test/pipelines_util.py:
--------------------------------------------------------------------------------
 1 | r'test_pipelines_N.py utilities'
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | 
 7 | EXP_SOURCE = ['VOYAGER-1'] * 2
 8 | EXP_TOPHITNUM = [1, 2]
 9 | EXP_STATUS = ['on_table_1', 'on_table_1']
10 | EXP_CHANINDX = [651879, 659989]
11 | SNR_LOW = np.array([21.0, 192.0])
12 | SNR_HIGH = np.array([24.0, 194.0])
13 | CSV_DELIM = ','
14 | 
15 | 
16 | def validate_hittbl(arg_pd_df, arg_csvf, arg_caller, arg_n_events):
17 |     r'''
18 |     Read in the CSV file into a raw Pandas DataFrame.
19 |     Check that specific columns have the expected values:
20 |       Source, TopHitNum, status, ChanIndx, and SNR.
21 |     Check that the CSV and the Pandas dataframe match.
22 |     '''
23 |     prefix = arg_caller + '[validate_hittbl]'
24 |     df = pd.read_csv(arg_csvf, sep=CSV_DELIM)
25 |     #df.drop('Unnamed: 0')
26 |     nrows = len(df)
27 |     if nrows != arg_n_events:
28 |         raise ValueError('validate_csvf: Expected {} rows but observed {} rows'
29 |                          .format(arg_n_events, nrows))
30 | 
31 |     csv_source = df['Source'].tolist()
32 |     csv_tophitnum = df['TopHitNum'].tolist()
33 |     csv_status = df['status'].tolist()
34 |     csv_chanindx = df['ChanIndx'].tolist()
35 |     csv_snr = df['SNR'].tolist()
36 |     if csv_source != EXP_SOURCE:
37 |         raise ValueError('{}: Expected source column {} but observed {}'
38 |                          .format(prefix, EXP_SOURCE, csv_source))
39 |     if csv_tophitnum != EXP_TOPHITNUM:
40 |         raise ValueError('{}: Expected TopHitNum column {} but observed {}'
41 |                          .format(prefix, EXP_TOPHITNUM, csv_tophitnum))
42 |     if csv_status != EXP_STATUS:
43 |         raise ValueError('{}: Expected status column {} but observed {}'
44 |                          .format(prefix, EXP_STATUS, csv_status))
45 |     if csv_chanindx != EXP_CHANINDX:
46 |         raise ValueError('{}: Expected channel index column {} but observed {}'
47 |                          .format(prefix, EXP_CHANINDX, csv_chanindx))
48 |     if np.any(csv_snr > SNR_HIGH) or np.any(csv_snr < SNR_LOW):
49 |         raise ValueError('{}: Expected SNR column in range of {}:{} but observed {}'
50 |                          .format(prefix, SNR_LOW, SNR_HIGH, csv_snr))
51 |     failures = 0
52 | 
53 |     pd_thn = arg_pd_df['TopHitNum'].values
54 |     if not np.all(csv_tophitnum == pd_thn):
55 |         print('*** Oops, {}: pd_thn={}, csv_tophitnum={}'
56 |               .format(prefix, pd_thn, csv_tophitnum))
57 |         failures += 1
58 | 
59 |     pd_chanindx = arg_pd_df['ChanIndx'].values
60 |     if not np.all(csv_chanindx == pd_chanindx):
61 |         print('*** Oops, {}: pd_chanindx={}, csv_chanindx={}'
62 |               .format(prefix, pd_chanindx, csv_chanindx))
63 |         failures += 1
64 | 
65 |     pd_snr = arg_pd_df['SNR'].values
66 |     if not np.all(np.isclose(csv_snr, pd_snr, rtol=0.0001)):
67 |         print('*** Oops, {}: pd_snr={}, csv_snr={}'
68 |               .format(prefix, pd_snr, csv_snr))
69 |         failures += 1
70 | 
71 |     if failures > 0:
72 |         raise ValueError('{}: CSV and pandas table do not agree!'
73 |                          .format(prefix))
74 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/kernels/_taylor_tree/_core_cuda.py:
--------------------------------------------------------------------------------
 1 | import cupy as cp
 2 | import numpy as np
 3 | 
 4 | # Type pairs that we support
 5 | TYPE_PAIRS = {"float": cp.float32, "double": cp.float64}
 6 | 
 7 | # Cuda kernels for the flt function to use.
 8 | # Based on the original C code by Franklin Antonio, available at
 9 | #   https://github.com/UCBerkeleySETI/dedopplerperf/blob/main/CudaTaylor5demo.cu
10 | # It does one round of the Taylor tree algorithm, calculating the sums of length-2^(x+1) paths
11 | # from the sums of length-2^x paths.
12 | CODE = r"""
13 | template<typename T>
14 | __global__ void taylor(const T* A, T* B, int kmin, int kmax, int set_size, int n_time, int n_freq) {
15 |   int tid = blockIdx.x * blockDim.x + threadIdx.x;
16 |   int k = kmin + tid;
17 |   bool worker = (k >= kmin) && (k < kmax) && set_size <= n_time;
18 |   if (!worker) {
19 |     return;
20 |   }
21 |   for (int j = 0; j < n_time; j += set_size) {
22 |     for (int j0 = set_size - 1; j0 >= 0; j0--) {
23 |       int j1 = j0 / 2;
24 |       int j2 = j1 + set_size / 2;
25 |       int j3 = (j0 + 1) / 2;
26 |       if (k + j3 < kmax) {
27 |         B[(j + j0) * n_freq + k] = A[(j + j1) * n_freq + k] + A[(j + j2) * n_freq + k + j3];
28 |       }
29 |     }
30 |   }
31 | }
32 | """
33 | C_TYPES = TYPE_PAIRS.keys()
34 | NAME_EXPS = [f"taylor<{t}>" for t in C_TYPES]
35 | MODULE = cp.RawModule(code=CODE, options=("-std=c++11",), name_expressions=NAME_EXPS)
36 | KERNELS = {}
37 | for c_type, name_exp in zip(C_TYPES, NAME_EXPS):
38 |     KERNELS[c_type] = MODULE.get_function(name_exp)
39 | 
40 | 
41 | def flt(array, n_time):
42 |     """
43 |     Taylor-tree-sum the data in array.
44 | 
45 |     array should be a 1-dimensional cupy array. If reshaped into two dimensions, the
46 |     data would be indexed so that array[time][freq] stores the data at a particular time
47 |     and frequency. So, the same way h5 files are typically stored.
48 | 
49 |     n_time is the number of timesteps in the data.
50 | 
51 |     The algorithm uses one scratch buffer, and in each step of the loop, it calculates
52 |     sums from one buffer and puts the output in the other. Thus, the drift sums we are looking
53 |     for may end up either in the original buffer, or in the scratch buffer. This method
54 |     returns whichever buffer is the one to use, and we leave the other one for cupy to clean up.
55 |     """
56 |     taylor_kernel = None
57 |     for c_type, py_type in TYPE_PAIRS.items():
58 |         if py_type == array.dtype:
59 |             taylor_kernel = KERNELS[c_type]
60 |             break
61 |     else:
62 |         raise RuntimeError(
63 |             f"we have no GPU taylor kernel for the numerical type: {array.dtype}"
64 |         )
65 | 
66 |     assert len(array) % n_time == 0
67 |     n_freq = len(array) // n_time
68 |     buf = cp.zeros_like(array)
69 | 
70 |     # Cuda params
71 |     block_size = 1024
72 |     grid_size = (n_freq + block_size - 1) // block_size
73 | 
74 |     set_size = 2
75 |     while set_size <= n_time:
76 |         taylor_kernel(
77 |             (grid_size,),
78 |             (block_size,),
79 |             (array, buf, 0, n_freq, set_size, n_time, n_freq),
80 |         )
81 |         array, buf = buf, array
82 |         set_size *= 2
83 | 
84 |     return array
85 | 


--------------------------------------------------------------------------------
/test/fb_cases_def.py:
--------------------------------------------------------------------------------
 1 | r'''
 2 | Data definitions for test_fb_cases.py
 3 | '''
 4 | 
 5 | import os
 6 | from tempfile import gettempdir
 7 | from astropy import units as u
 8 | 
 9 | #---------- Constants ------------
10 | TESTDIR = '{}/{}_{}/'.format(gettempdir(), 'test_fb_cases', os.getpid())
11 | HERE = os.path.split(os.path.abspath(__file__))[0]
12 | PATH_FIL_FILE = TESTDIR + 'abc.fil'
13 | MIN_SNR = 50
14 | THE_MEANING_OF_LIFE = 42
15 | DEBUGGING = False
16 | RTOL_DIFF = 0.05 # 5%
17 | 
18 | 
19 | class TestResultRecord:
20 |     r'''    Object definition for a test result record '''
21 | 
22 |     def __init__(self):
23 |         self.fdir = 0 # frequency direction: +1 is ascending, -1 is descending
24 |         self.drsign = 0 # drift rate algebraic sign: +1 is positive, -1 is negative
25 |         self.tophit_id = 0 # identifier of this top hit (1 or 2)
26 |         self.drate = 0.0 # drift rate of this top hit
27 |         self.snr = 0.0
28 |         self.freq = 0.0 # corrected frequency
29 |         self.index = 0 # index to the frequencies
30 | 
31 | 
32 |     def to_string(self):
33 |         '''
34 |         Return a displayable string of attribute values.
35 |         '''
36 |         return 'fdir: ' + str(self.fdir) + ', drsign: ' + str(self.drsign) \
37 |                 + ', tophit_id: ' + str(self.tophit_id) + ', drate: ' + str(self.drate) \
38 |                 + ', snr: ' + str(self.snr) + ', freq: ' + str(self.freq) \
39 |                 + ', index: ' + str(self.index)
40 | 
41 | 
42 | class SetigenParms:
43 |     r'''Object definition for setigen parameters'''
44 | 
45 |     def __init__(self):
46 | 
47 |         # Parameters for all signals
48 |         self.fchans = 1048576 # number of (fine) channels
49 |         self.tchans = 60 # number of time samples
50 |         self.df = 1.0 * u.Hz # fine channel width in Hz
51 |         self.dt = 1.0 * u.s #sampling time in seconds
52 |         self.fch1 = 8421.386717353016 * u.MHz # Starting frequency in MHz
53 |         self.noise_std = 0.05 # Gaussian standard deviation
54 | 
55 |         # Signal 1 parameters
56 |         self.signal_start_1 = self.fchans / 5 # index to frequency columns
57 |         self.drift_rate_1 = 1.3 * u.Hz/u.s # drift rate to inject
58 |         self.width_1 = 1.0 * u.Hz # signal width in Hz
59 |         self.snr_1 = MIN_SNR + 50 # SNR which will determine setigen intensity level
60 | 
61 |         # Signal  2 parameters
62 |         self.signal_start_2 = 4 * self.fchans / 5
63 |         self.drift_rate_2 = 2.6 * u.Hz/u.s
64 |         self.width_2 = 2.0 * u.Hz
65 |         self.snr_2 = MIN_SNR * 6.0
66 | 
67 |         # Signal 3 parameters
68 |         self.signal_start_3 = self.fchans / 3
69 |         self.drift_rate_3 = 3.9 * u.Hz/u.s
70 |         self.width_3 = 40.0 * u.Hz
71 |         self.snr_3 = MIN_SNR - 20.0
72 |         if self.snr_3 < 0:
73 |             self.snr_3 = 0.001
74 | 
75 |         # Signal 4 parameters
76 |         self.signal_start_4 = 9 * self.fchans / 16
77 |         self.drift_rate_4 = 100.0 * u.Hz/u.s
78 |         self.width_4 = 40.0 * u.Hz
79 |         self.snr_4 = MIN_SNR + 20.0
80 | 
81 |         # Signal 5 is similar to signal 4 but drifting in the opposite direction.
82 |         self.signal_start_5 = 7 * self.fchans / 16
83 |         self.drift_rate_5 = -self.drift_rate_4 # opposite direction as signal 4
84 |         self.width_5 = self.width_4
85 |         self.snr_5 = self.snr_4
86 | 


--------------------------------------------------------------------------------
/test/test_fb_cases.py:
--------------------------------------------------------------------------------
 1 | r'''
 2 | Package turbo_seti
 3 | test/test_fb_cases.py
 4 | 
 5 | IMPORTANT:  If the parameters are changed in fb_cases_def.py,
 6 | then the following must be executed:  `python3 fb_genref.py`
 7 | 
 8 | System concept
 9 | --------------
10 | Use setigen to generate small-ish but effective filterbank files.
11 | Then, run turboSETI, producing a DAT file.
12 | Compare resultant DAT file contents to expected results.
13 | 
14 | Design
15 | ------
16 | Entry point for pytest: test_main.
17 | For each maximum drift rate entertained,
18 |     For each test case in the reference (paired records, one each for hit 1 and 2),
19 |         Execute the test case
20 |             1. Generate .fil file with 2 top hits using setigen.
21 |             2. Run turboSETI ---> DAT file.
22 |             3. Get results from DAT file for both hits.
23 |             4. For each hit, compare results to the reference.
24 | '''
25 | 
26 | 
27 | import time
28 | import gc
29 | from shutil import rmtree
30 | import numpy as np
31 | from fb_cases_def import THE_MEANING_OF_LIFE, DEBUGGING, TESTDIR, PATH_FIL_FILE, MIN_SNR
32 | from fb_cases_util import generate_fil_file, initialize, make_one_dat_file, \
33 |                             get_case_results, case_comparison
34 | 
35 | 
36 | def exec_one_case(case_num, path_fil_file, max_drift, ref_tophit_1, ref_tophit_2):
37 |     r'''Execute one test case'''
38 |     if DEBUGGING:
39 |         print('exec_one_case: on entry, max_drift={}\nref_tophit_1:::{}\nref_tophit_2:::{}'
40 |               .format(max_drift, ref_tophit_1.to_string(), ref_tophit_2.to_string()))
41 |     generate_fil_file(path_fil_file, ref_tophit_1.fdir, ref_tophit_1.drsign)
42 |     make_one_dat_file(path_fil_file, max_drift=max_drift, min_snr=MIN_SNR)
43 |     path_dat_file = path_fil_file.replace('.fil', '.dat')
44 |     obs_tophit_1, obs_tophit_2 = get_case_results(path_dat_file)
45 |     obs_tophit_1.fdir = ref_tophit_1.fdir # replace 0 with correct value
46 |     obs_tophit_1.drsign = ref_tophit_1.drsign # replace 0 with correct value
47 |     obs_tophit_2.fdir = ref_tophit_2.fdir # replace 0 with correct value
48 |     obs_tophit_2.drsign = ref_tophit_2.drsign # replace 0 with correct value
49 |     if DEBUGGING:
50 |         print('exec_one_case: case results\nobs_tophit_1:::{}\nobs_tophit_2:::{}'
51 |               .format(obs_tophit_1.to_string(), obs_tophit_2.to_string()))
52 |     case_comparison(obs_tophit_1, ref_tophit_1, max_drift)
53 |     case_comparison(obs_tophit_2, ref_tophit_2, max_drift)
54 |     gc.collect()
55 |     print('=== CASE {} at max drift {} success'.format(case_num, max_drift))
56 | 
57 | 
58 | def run_test_cases(ref_tophit_1, ref_tophit_2, max_drift=None):
59 |     r'''Pytest Entry Point'''
60 |     if max_drift is None:
61 |         raise ValueError('run_test_cases: max_drift not set')
62 |     for jj in range(len(ref_tophit_1)):
63 |         exec_one_case(jj + 1, PATH_FIL_FILE, max_drift, ref_tophit_1[jj], ref_tophit_2[jj])
64 | 
65 | 
66 | def test_main(cleanup=True):
67 |     r'''Pytest Entry Point'''
68 |     np.random.seed(THE_MEANING_OF_LIFE) # setigen uses this.
69 |     print('test_main: PATH_FIL_FILE = {}'.format(PATH_FIL_FILE))
70 |     ref_tophit_1, ref_tophit_2 = initialize(TESTDIR)
71 |     run_test_cases(ref_tophit_1, ref_tophit_2, max_drift=5)
72 |     run_test_cases(ref_tophit_1, ref_tophit_2, max_drift=10)
73 |     run_test_cases(ref_tophit_1, ref_tophit_2, max_drift=20)
74 |     run_test_cases(ref_tophit_1, ref_tophit_2, max_drift=30)
75 |     if cleanup:
76 |         rmtree(TESTDIR)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     t1 = time.time()
81 |     test_main(cleanup=False)
82 |     et = (time.time() - t1) / 60.0
83 |     print('test_fb_cases: Elapsed time = {:.2f} min'.format(et))
84 | 


--------------------------------------------------------------------------------
/test/download_test_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | import h5py
  5 | import hdf5plugin
  6 | 
  7 | try:
  8 |     import pytest_order.settings
  9 |     print("pytest_order (required) is installed.")
 10 | except:
 11 |     print("\n*** Missing required module 'pytest_order'.")
 12 |     print("*** Please run `python3 -m pip install -r requirements_test.txt.\n")
 13 |     sys.exit(86)
 14 | 
 15 | HERE = os.path.split(os.path.abspath(__file__))[0]
 16 | MIN_SIZE = 40000000
 17 | 
 18 | URL1 = "http://blpd0.ssl.berkeley.edu/Voyager_data/"
 19 | FILE1 = "Voyager1.single_coarse.fine_res.h5"
 20 | 
 21 | URL2 = "http://blpd14.ssl.berkeley.edu/voyager_2020/single_coarse_channel/"
 22 | FILE2 = "single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.h5"
 23 | 
 24 | 
 25 | def oops(msg):
 26 |     print("\n*** OOPS, {} !!!\n".format(msg))
 27 |     sys.exit(86)
 28 |     
 29 | 
 30 | def check_h5(path):
 31 |     
 32 |     try:
 33 |         h5 = h5py.File(path, mode='r')
 34 |     except:
 35 |         oops("Is {} really an HDF5 file?  Probably not".format(path))
 36 |     h5.close()
 37 | 
 38 | 
 39 | def download_test_data():
 40 |     """ Download Voyager test data """
 41 | 
 42 |     try:
 43 |         os.system('rm *.h5 *.fil *.dat *.log *.png 2> /dev/null')
 44 |     except:
 45 |         pass
 46 |     print("Begin downloads .....\n")
 47 | 
 48 |     exit_status = os.system("curl --url '{}/{}'  -o ./{}".format(URL2, FILE2, FILE2))
 49 |     if exit_status != 0:
 50 |         oops("cannot download {}".format(FILE2))
 51 |     sz = os.path.getsize(FILE2)
 52 |     if sz < MIN_SIZE:
 53 |         oops("Downloaded file {} is way too small, size={}".format(FILE2, sz))
 54 |     check_h5(FILE2)
 55 | 
 56 |     exit_status = os.system("curl --url '{}/{}'  -o ./{}".format(URL1, FILE1, FILE1))
 57 |     if exit_status != 0:
 58 |         oops("cannot download {}".format(FILE1))
 59 |     sz = os.path.getsize(FILE1)
 60 |     if sz < MIN_SIZE:
 61 |        oops("Downloaded file {} is way too small, size={}".format(FILE1, sz))
 62 |     check_h5(FILE1)
 63 |         
 64 |     print("\nDownloads ok.")
 65 | 
 66 | 
 67 | def create_fil_from_h5(path):
 68 |     """ Create a .fil file from an .h5 file. """
 69 |     exit_status = os.system("h52fil %s" % path)
 70 |     if exit_status != 0:
 71 |         oops("h52fil  {}  FAILED".format(path))
 72 |     print("h52fil ok.")
 73 | 
 74 | 
 75 | def flip_data(filename):
 76 |     """ Flip Voyager data along frequency axis.
 77 | 
 78 |     The flipped file is used to check logic works when frequency is inverted.
 79 |     """
 80 |     print("Generating frequency flipped version of Voyager data...")
 81 |     assert filename.endswith('.h5')
 82 |     flipped_filename = filename.replace('.h5', '.flipped.h5')
 83 |     exit_status = os.system('cp %s %s' % (filename, flipped_filename))
 84 |     if exit_status != 0:
 85 |         oops("cp {} to {} FAILED".format(filename, flipped_filename))
 86 |     with h5py.File(flipped_filename, 'r+') as h:
 87 |         foff_orig = h['data'].attrs['foff']
 88 |         fch1_orig = h['data'].attrs['fch1']
 89 |         nchans    = h['data'].attrs['nchans']
 90 |         fchN      = fch1_orig + (foff_orig * nchans)
 91 |         h['data'].attrs['foff'] = foff_orig * -1
 92 |         h['data'].attrs['fch1'] = fchN
 93 |         h['data'].attrs['source_name'] = 'Voyager1Flipped'
 94 | 
 95 |         for ii in range(h['data'].shape[0]):
 96 |             print('\tFlipping %i/%i' % (ii+1, h['data'].shape[0]))
 97 |             h['data'][ii, 0, :] = h['data'][ii, 0][::-1]
 98 |     print("Done.")
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     download_test_data()
103 |     voyager_full_path = os.path.join(HERE, FILE1)
104 |     flip_data(voyager_full_path)
105 |     create_fil_from_h5(voyager_full_path)
106 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/kernels/__init__.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | 
  3 | from .Scheduler import Scheduler
  4 | 
  5 | 
  6 | class Kernels:
  7 |     r"""
  8 |     Dynamically loads the right modules according to parameters.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     gpu_backend : bool, optional
 13 |         Enable GPU acceleration.
 14 |     precision : int {2: float64, 1: float32}, optional
 15 |         Floating point precision.
 16 | 
 17 |     """
 18 | 
 19 |     def __init__(self, gpu_backend=False, precision=2, gpu_id=0):
 20 |         self.gpu_backend = gpu_backend
 21 |         self.precision = precision
 22 |         self.gpu_id = gpu_id
 23 | 
 24 |         if not self.has_gpu() and self.gpu_backend:
 25 |             raise RuntimeError("cupy is not installed, so the GPU cannot be used.")
 26 | 
 27 |         self._base_lib = "turbo_seti.find_doppler.kernels"
 28 | 
 29 |         self._load_base()
 30 |         self._load_taylor_tree()
 31 |         self._load_hitsearch()
 32 |         self._load_bitrev()
 33 | 
 34 |     def _load_precision(self):
 35 |         if self.precision == 2:
 36 |             return self.xp.float64
 37 |         if self.precision == 1:
 38 |             return self.xp.float32
 39 |         if self.precision == 0:
 40 |             return self.xp.float16
 41 | 
 42 |         raise ValueError("Invalid float precision.")
 43 | 
 44 |     def _load_base(self):
 45 |         if self.gpu_backend:
 46 |             self.xp = importlib.import_module("cupy")
 47 |             self.np = importlib.import_module("numpy")
 48 |             self.xp.cuda.Device(self.gpu_id).use()
 49 |         else:
 50 |             self.xp = importlib.import_module("numpy")
 51 |             self.np = self.xp
 52 | 
 53 |         self.float_type = self._load_precision()
 54 | 
 55 |     def _load_taylor_tree(self):
 56 |         if self.gpu_backend:
 57 |             self.tt = importlib.import_module(
 58 |                 self._base_lib + "._taylor_tree._core_cuda"
 59 |             )
 60 |         else:
 61 |             self.tt = importlib.import_module(
 62 |                 self._base_lib + "._taylor_tree._core_numba"
 63 |             )
 64 | 
 65 |     def _load_hitsearch(self):
 66 |         if self.gpu_backend:
 67 |             self.hitsearch = importlib.import_module(
 68 |                 self._base_lib + "._hitsearch"
 69 |             ).hitsearch
 70 | 
 71 |     def _load_bitrev(self):
 72 |         self.bitrev = importlib.import_module(self._base_lib + "._bitrev").bitrev
 73 | 
 74 |     def get_spectrum(self, tt_output, tsteps, tdwidth, drift_index):
 75 |         """
 76 |         The different Taylor tree kernels have a slightly different output.
 77 |         Both of them you can think of indexed by [row index][frequency], although it is
 78 |         reshaped as a 1-dimensional array.
 79 |         In the GPU version, the row index is the same as the "drift index". 0 is the least drift,
 80 |         1 is the next least drift, et cetera.
 81 |         In the CPU version, the row index is bit-reversed from this.
 82 |         This method lets the caller get data for a particular drift without knowing
 83 |         how the rows are ordered.
 84 |         There's a good chance that one or both of these is suboptimal; please update this
 85 |         comment if you change the underlying algorithm.
 86 |         """
 87 |         if self.gpu_backend:
 88 |             row_index = drift_index
 89 |         else:
 90 |             row_index = self.bitrev(drift_index, int(self.np.log2(tsteps)))
 91 | 
 92 |         tt_start_index = row_index * tdwidth
 93 |         return tt_output[tt_start_index : tt_start_index + tdwidth]
 94 | 
 95 |     @staticmethod
 96 |     def has_gpu():
 97 |         r"""
 98 |         Check if the system has the modules needed for the GPU acceleration.
 99 | 
100 |         Note
101 |         ----
102 |         Modules are listed on `requirements_gpu.txt`.
103 | 
104 |         Returns
105 |         -------
106 |         has_gpu : bool
107 |             True if the system has GPU capabilities.
108 | 
109 |         """
110 |         try:
111 |             import cupy
112 | 
113 |             cupy.__version__
114 |         except:
115 |             return False
116 |         return True
117 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/helper_functions.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import numpy as np
  4 | 
  5 | 
  6 | def chan_freq(header, fine_channel, tdwidth, ref_frame):
  7 |     r"""
  8 |     Find channel frequency.
  9 |     Note issue #98.
 10 | 
 11 |     Parameters
 12 |     ----------
 13 |       header :
 14 |       fine_channel :
 15 |       tdwidth :
 16 |       ref_frame :
 17 | 
 18 |     Returns
 19 |     -------
 20 |     chanfreq : float
 21 | 
 22 |     """
 23 |     fftlen = header['NAXIS1']
 24 |     chan_index = fine_channel - (tdwidth-fftlen)/2
 25 |     chanfreq = header['FCNTR'] + (chan_index - fftlen/2)*header['DELTAF']
 26 |     # apply doppler correction
 27 |     #if ref_frame == 1:
 28 |         # baryv was always 0.  What is the point?
 29 |         # chanfreq = (1 - header['baryv']) * chanfreq
 30 |     return chanfreq
 31 | 
 32 | 
 33 | def bitrev(inval, nbits):
 34 |     r"""
 35 |     This function bit-reverses the given value "inval" with the number of bits, "nbits".
 36 | 
 37 |     Parameters
 38 |     ----------
 39 |     inval : int
 40 |       Number to be bit-reversed.
 41 |     nbits : int
 42 |       The length of inval in bits. If user only wants the bit-reverse of a certain amount of bits of
 43 |       inval, nbits is the amount of bits to be reversed counting from the least significant (rightmost)
 44 |       bit. Any bits beyond this length will not be reversed and will be truncated from the result.
 45 | 
 46 |     Returns
 47 |     -------
 48 |     : int
 49 |       The bit-reverse of inval. If there are more significant bits beyond nbits, they are truncated.
 50 | 
 51 |     References
 52 |     ----------
 53 |     - R. Ramachandran, 10-Nov-97, nfra. -- Original C implementation.
 54 |     - H. Chen, 2014 -- Python version.
 55 |     - R. Elkins (texadactyl), 2020 -- Speedup.
 56 | 
 57 |     """
 58 |     if nbits <= 1:
 59 |         ibitr = inval
 60 |     else:
 61 |         ifact = 2**(nbits - 1)
 62 |         k = inval
 63 |         ibitr = 0 if (1 & k == 0) else ifact
 64 |         for _ in range(2, nbits+1):
 65 |             k = k >> 1
 66 |             ifact = ifact >> 1
 67 |             if 1 & k:
 68 |                 ibitr += ifact
 69 |     return ibitr
 70 | 
 71 | 
 72 | def FlipX(outbuf, xdim, ydim, xp=None):
 73 |     r"""
 74 |     This function takes in an array of values and iteratively flips ydim chunks of values of length xdim.
 75 | 
 76 |     Parameters
 77 |     ----------
 78 |     outbuf : ndarray
 79 |       An array with shape like (int, 1)
 80 |     xdim : int
 81 |       Size of segments to be flipped.
 82 |     ydim : int
 83 |       Amount of segments of size xdim to be flipped.
 84 |     xp : Numpy or Cupy, optional
 85 |       Math module to be used. If `None`, Numpy will be used.
 86 | 
 87 |     Examples
 88 |     --------
 89 |     If you have an array [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] and enter it with xdim = 5 and ydim = 2, the array will be
 90 |     modified to become [5, 4, 3, 2, 1, 10, 9, 8, 7, 6]. Note that if you wish for the whole array to be modified in this
 91 |     way, xdim * ydim should equal the length of the array. If ydim * xdim is greater than the length of the array, this
 92 |     function will error.
 93 | 
 94 |     """
 95 |     if not xp:
 96 |         xp = np
 97 | 
 98 |     xp.copyto(outbuf, outbuf.reshape((ydim, xdim))[:, ::-1].ravel())
 99 | 
100 | 
101 | def comp_stats(np_arr, xp=None):
102 |     """
103 |     Compute median and stddev of floating point vector array in a fast way, discarding outliers.
104 | 
105 |     Parameters
106 |     ----------
107 |     np_arr : ndarray
108 |       Floating point vector array.
109 |     xp : Numpy or Cupy, optional
110 |       Math module to be used. If `None`, Numpy will be used.
111 | 
112 |     Returns
113 |     -------
114 |     the_median, the_stddev : numpy.float32, numpy.float32
115 |       Median and standard deviation of input array with outliers removed.
116 | 
117 |     """
118 |     if not xp:
119 |         xp = np
120 | 
121 |     low, median, high = xp.percentile(np_arr, [5, 50, 95])
122 |     drop_high = np_arr[np_arr <= high]
123 |     drop_outliers = drop_high[drop_high >= low]
124 |     stdev = drop_outliers.std()
125 | 
126 |     return median.astype(xp.float32), stdev.astype(xp.float32)
127 | 


--------------------------------------------------------------------------------
/gen_drift_indexes/gen_drift_indexes.py:
--------------------------------------------------------------------------------
  1 | r''' Create a drift index file. '''
  2 | 
  3 | from argparse import ArgumentParser
  4 | 
  5 | TRACING = False
  6 | 
  7 | 
  8 | def generate_row(arg_ncols):
  9 |     r''' Generate a row template. '''
 10 |     wrow = [0]
 11 |     wrowlen = 1
 12 |     for ii in range(arg_ncols):
 13 |         if ii % 2 == 1:
 14 |             wrow.append(ii)
 15 |             wrowlen += 1
 16 |     padlen = arg_ncols - wrowlen
 17 |     for ii in range(padlen):
 18 |         wrow.append(0)
 19 |     return wrow
 20 | 
 21 | 
 22 | def elem_insert(arg_row, arg_elem):
 23 |     r''' Insert one element into the current row. '''
 24 |     if TRACING:
 25 |         print('TRACE elem_insert row={}, elem={}'.format(arg_row, arg_elem))
 26 |     # Check for duplicate.
 27 |     if arg_elem in arg_row:
 28 |         if TRACING:
 29 |             print('TRACE elem_insert ignoring dup')
 30 |         return
 31 |     # Get index of neighbour to the right.
 32 |     windex = arg_row.index(arg_elem + 1)
 33 |     # Insert just to left of neighbour.
 34 |     arg_row.insert(windex, arg_elem)
 35 |     # If the last element is 0, discard it.
 36 |     if arg_row[-1] == 0:
 37 |         arg_row.pop(-1)
 38 | 
 39 | 
 40 | def writer(arg_fh, arg_row):
 41 |     r''' Write the text of the given row to the output file. '''
 42 |     buffer = ''
 43 |     for elem in arg_row:
 44 |         buffer += ' ' + str(elem)
 45 |     arg_fh.write(buffer + '\n')
 46 | 
 47 | 
 48 | def proc_one_file(arg_dirpath, arg_filenumber):
 49 |     r''' Generate one drift index file. '''
 50 | 
 51 |     filepath = arg_dirpath + '/' + 'drift_indexes_array_{}.txt'.format(arg_filenumber)
 52 |     nrows = 2 ** (arg_filenumber - 1)
 53 |     ncols = 2 ** arg_filenumber
 54 |     print('Begin {}, nrows={}, ncols={}'.format(filepath, nrows, ncols))
 55 | 
 56 |     with open(filepath, "w") as fh:
 57 | 
 58 |         # Write the first row (#0).
 59 |         row_0 = generate_row(ncols)
 60 |         if TRACING:
 61 |             print('TRACE ii=1')
 62 |         writer(fh, row_0)
 63 | 
 64 |         if TRACING:
 65 |             print('TRACE begin row-loop')
 66 | 
 67 |         # Construct and write the remaining rows.
 68 |         for row_num in range(1, nrows):
 69 | 
 70 |             if TRACING:
 71 |                 print('TRACE row_num={}'.format(row_num))
 72 | 
 73 |             # If this is an odd row number, insert nrows into to the row as an element.
 74 |             row = generate_row(ncols)
 75 |             if row_num % 2 == 1: # odd row number?
 76 |                 elem_insert(row, nrows)
 77 | 
 78 |             # For each power of 2 (2, 4, 8, ..., 2^(arg_filenumber - 1)),
 79 |             #    Create a divisor = that power of 2.
 80 |             #    Use the divisor as a selection mechanism for the current row:
 81 |             #       If (row_num // divisor) is odd,
 82 |             #       then insert all of the elements from the range
 83 |             #           nrows // divisor up to ncols by 2 * (nrows // divisor).
 84 |             for jj in range(1, arg_filenumber):
 85 |                 divisor = 2 ** jj
 86 |                 if (row_num // divisor) % 2 == 1:
 87 |                     start_ix = nrows // divisor
 88 |                     for elem in range(start_ix, ncols, 2 * start_ix):
 89 |                         elem_insert(row, elem)
 90 | 
 91 |             # Write the accumulated current row.
 92 |             writer(fh, row)
 93 | 
 94 |     print('End {}'.format(filepath))
 95 | 
 96 | 
 97 | def cmd_tool(args=None):
 98 |     r''' Main program of the command-line tool. '''
 99 |     p = ArgumentParser(description='Generate a drift index file.')
100 |     p.add_argument('dirpath', type=str,
101 |                    help='Path of directory to write the drift index file.')
102 |     p.add_argument('-n', '--file-number', type=int, default=11, dest='max_file_number',
103 |                    help='Maximum value for the log base 2 of the number of time integration steps.')
104 |     if args is None:
105 |         args = p.parse_args()
106 |     else:
107 |         args = p.parse_args(args)
108 |     
109 |     for fn in range(2, args.max_file_number + 1):
110 |         proc_one_file(args.dirpath, fn)
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     cmd_tool()
115 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/merge_dats_logs.py:
--------------------------------------------------------------------------------
 1 | r''' Source file for merge_dats_logs() '''
 2 | 
 3 | from os import chdir, getcwd, listdir, remove, rename
 4 | 
 5 | PREFIX = "_c_"
 6 | DEBUGGING = False
 7 | DATLNFMT = '{:>4s}  {:>12s}  {:>12s}  {:>12s}  {:>12s}  {:>9s}  {:>12s}  {:>12s}' \
 8 |     + '{:>4s}  {:>12s}  {:>5s}  {:>9s}\n'
 9 | 
10 | def merge_dats_logs(arg_h5: str, arg_dir: str, arg_type: str, cleanup='n'):
11 |     r"""
12 |     Merge multiple DAT (or LOG) files.
13 | 
14 |     Parameters
15 |     ----------
16 |     arg_h5 : str
17 |         HDF5 file used by :func:`~turbo_seti.find_doppler.find_doppler.FindDoppler.search`
18 |         to produce the DAT and LOG files.
19 |     arg_dir : str
20 |         Directory holding multiple DAT and LOG files after FindDoppler.search()
21 |         which ran with more than 1 partition.
22 |     arg_type : str
23 |         File extension of interest ('dat' or 'log').
24 | 
25 |     """
26 |     print("merge_dats_logs: dir={}, type={}, cleanup={}"
27 |           .format(arg_dir, arg_type, cleanup))
28 |     RETURN_TO = getcwd() # Save our current directory path
29 |     chdir(arg_dir) # Change directory
30 |     suffix = '.' + arg_type # E.g. .dat
31 |     files = []
32 |     filenamestem = arg_h5.split('/')[-1].replace('.h5', '')
33 |     len_filenamestem = len(filenamestem)
34 |     print('merge_dats_logs: Working on filename-stem {} type {}'
35 |           .format(filenamestem, arg_type))
36 |     sorted_file_list = sorted(listdir(arg_dir))
37 |     counter = 0
38 |     if DEBUGGING:
39 |         print("DEBUG merge_dats_logs: listdir=", sorted_file_list)
40 |     for cur_file in sorted_file_list:
41 |         cur_type = cur_file.split('.')[-1]
42 |         if cur_type == arg_type and not cur_file.startswith(PREFIX):
43 |             # This is the type of file we are looking for.
44 |             # and it is not the combination version we are building.
45 |             # Does cur_file match the HDF5 file?
46 |             if cur_file[0:len_filenamestem] == filenamestem:
47 |                 files.append(cur_file)
48 |                 if DEBUGGING:
49 |                     print("DEBUG merge_dats_logs: Selected for merging: ", cur_file)
50 |                 counter += 1
51 |     if counter < 1:
52 |         print("*** merge_dats_logs: Nothing selected for merging")
53 |         chdir(RETURN_TO)
54 |         return
55 | 
56 |     # Append the combo file with each list member.
57 |     path_prefixed_combo = PREFIX + filenamestem + suffix
58 |     with open(path_prefixed_combo, "w") as outfile:
59 |         # Write first file encountered fully.
60 |         with open(files[0], "r") as fd:
61 |             for line in fd:
62 |                 outfile.write(line)
63 |         # Write subsequent files, filtering out comment lines (start with '#')
64 |         if arg_type == 'dat':
65 |             tophit_counter = 0
66 |         for cur_file in files[1:]:
67 |             with open(cur_file, "r") as fd:
68 |                 for inline in fd:
69 |                     if not inline.startswith("#"): # not a comment
70 |                         if arg_type == 'dat': # renumber tophit number field
71 |                             tophit_counter += 1
72 |                             outlist = inline.split()
73 |                             if DEBUGGING:
74 |                                 print('DEBUG outlst:', outlist)
75 |                             outlist[0] = str(tophit_counter)
76 |                             outfile.write(DATLNFMT.format(*outlist))
77 |                         else: # log file
78 |                             outfile.write(inline)
79 | 
80 |     # if cleanup is requested, do it now.
81 |     if cleanup == 'y':
82 |         # Remove all of the partitions.
83 |         for cur_file in files:
84 |             remove(cur_file)
85 |             if DEBUGGING:
86 |                 print("merge_dats_logs: Removed: ", cur_file)
87 |         # Rename the merged file
88 |         path_merge_file = filenamestem + suffix
89 |         try:
90 |             rename(path_prefixed_combo, path_merge_file)
91 |             print("merge_dats_logs: Merged into", path_merge_file)
92 |         except Exception as exc:
93 |             print("*** os.rename({}, {}) failed, reason:{}\n"
94 |                   .format(path_prefixed_combo, path_merge_file, str(exc)))
95 | 
96 |     # Change back to caller's current directory
97 |     chdir(RETURN_TO)
98 | 


--------------------------------------------------------------------------------
/test/test_drift_rates.py:
--------------------------------------------------------------------------------
  1 | r''' test_drift_rates.py
  2 | Test the FindDoppler min_drift and max_drift parameters.
  3 | '''
  4 | 
  5 | import os
  6 | from tempfile import gettempdir
  7 | from astropy import units as u
  8 | import pandas as pd
  9 | import setigen as stg
 10 | from fb_cases_util import make_one_dat_file
 11 | 
 12 | 
 13 | def gen_fil(arg_path):
 14 |     r''' Generate a Filterbank file '''
 15 |     
 16 |     # Define time and frequency arrays, essentially labels for the 2D data array
 17 |     fchans = 1048576
 18 |     tchans = 16
 19 |     df = 1.0*u.Hz
 20 |     dt = 1.0*u.s
 21 |     fch1 = 6095.214842353016*u.MHz
 22 |     noise_std = 0.05 # Gaussian standard deviation
 23 |     
 24 |     sig_snr_1 = 100.0
 25 |     sig_width_1 = 1.1 * u.Hz
 26 |     drate_1 = 1.6 * u.Hz/u.s
 27 |     f_start_1 = 0
 28 |     
 29 |     sig_snr_2 = 200.0
 30 |     sig_width_2 = 1.2 * u.Hz
 31 |     drate_2 = 1.3 * u.Hz/u.s
 32 |     f_start_2 = fchans * 0.1
 33 |     
 34 |     sig_snr_3 = 300.0
 35 |     sig_width_3 = 1.3 * u.Hz
 36 |     drate_3 = 2.6 * u.Hz/u.s
 37 |     f_start_3 = fchans * 0.2
 38 |     
 39 |     sig_snr_4 = 400.0
 40 |     sig_width_4 = 1.4 * u.Hz
 41 |     drate_4 = 3.2 * u.Hz/u.s
 42 |     f_start_4 = fchans * 0.3
 43 |     
 44 |     # Generate the frame.
 45 |     frame = stg.Frame(fchans=fchans,
 46 |                       tchans=tchans,
 47 |                       df=df,
 48 |                       dt=dt,
 49 |                       fch1=fch1)
 50 |     
 51 |     # Add noise.
 52 |     frame.add_noise(x_mean=0, x_std=noise_std, noise_type='gaussian')
 53 | 
 54 |     # Add signal 1.
 55 |     signal_intensity = frame.get_intensity(snr=sig_snr_1)
 56 |     frame.add_constant_signal(f_start=frame.get_frequency(f_start_1),
 57 |                               drift_rate=drate_1,
 58 |                               level=signal_intensity,
 59 |                               width=sig_width_1,
 60 |                               f_profile_type='gaussian')
 61 |     
 62 |     # Add signal 2.
 63 |     signal_intensity = frame.get_intensity(snr=sig_snr_2)
 64 |     frame.add_constant_signal(f_start=frame.get_frequency(f_start_2),
 65 |                               drift_rate=drate_2,
 66 |                               level=signal_intensity,
 67 |                               width=sig_width_2,
 68 |                               f_profile_type='gaussian')
 69 |     
 70 |     # Add signal 3.
 71 |     signal_intensity = frame.get_intensity(snr=sig_snr_3)
 72 |     frame.add_constant_signal(f_start=frame.get_frequency(f_start_3),
 73 |                               drift_rate=drate_3,
 74 |                               level=signal_intensity,
 75 |                               width=sig_width_3,
 76 |                               f_profile_type='gaussian')
 77 |     
 78 |     # Add signal 4.
 79 |     signal_intensity = frame.get_intensity(snr=sig_snr_4)
 80 |     frame.add_constant_signal(f_start=frame.get_frequency(f_start_4),
 81 |                               drift_rate=drate_4,
 82 |                               level=signal_intensity,
 83 |                               width=sig_width_4,
 84 |                               f_profile_type='gaussian')
 85 |     
 86 |     # Save Filterbank file.
 87 |     frame.save_fil(arg_path)
 88 | 
 89 | 
 90 | def proc_one_dat_file(arg_path_fil, min_drift=0.0, max_drift=4.0):
 91 |     r'''Make a DAT file and process all of its entries'''
 92 |     path_dat = arg_path_fil.replace('.fil', '.dat')
 93 |     make_one_dat_file(arg_path_fil, min_drift=min_drift, max_drift=max_drift, min_snr=25.0, remove_h5=True)
 94 |     df = pd.read_csv(path_dat, header=None, sep=r'\s+', engine='python', comment='#')
 95 |     for ix in range(len(df)):
 96 |         drate = abs(float(df[1][ix]))
 97 |         if drate < min_drift or drate > max_drift:
 98 |             print('test_drift_rates proc_one_dat_file: Oops, dat:drate={}, min_drift={}, max_drift={}'
 99 |                   .format(drate, min_drift, max_drift))
100 |             raise ValueError('Drift rate in DAT file is out of bounds!')
101 |     
102 | 
103 | def test_drift_rate_samples():
104 |     print("\n===== test_drift_rate_samples BEGIN =====")
105 |     TESTDIR = gettempdir() + '/drift_testing/'
106 |     if not os.path.exists(TESTDIR):
107 |         os.mkdir(TESTDIR)
108 |     PATH_FIL = TESTDIR + 'abc.fil'
109 |     gen_fil(PATH_FIL)
110 |     
111 |     proc_one_dat_file(PATH_FIL, min_drift=0.0, max_drift=4.0)
112 |     proc_one_dat_file(PATH_FIL, min_drift=2.0, max_drift=2.6)
113 |     proc_one_dat_file(PATH_FIL, min_drift=2.0, max_drift=2.2)
114 |     print("\n===== test_drift_rate_samples END =====")
115 | 
116 | if __name__ == '__main__':
117 |     test_drift_rate_samples()
118 | 


--------------------------------------------------------------------------------
/MAINTENANCE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | turbo_seti Maintenance & Regression Testing
 3 | ===========================================
 4 | 
 5 | 
 6 | ### Introduction
 7 | 
 8 | The purpose of the regression testing suite is to exercise and validate results from turbo_seti functional modules.  This is important in order to minimize potential inadvertent breakage when new development has occured. It is always best to catch bugs as soon as possible after they are introduced.
 9 | <br>
10 | The primary method of launching regression testing is through the use of the `pytest` executable.  This is invoked in the following ways:
11 | * Manually by a developer, on the command line in a terminal window.  This would follow downloading turbo_seti and setting up the development/testing environment (discussed later). 
12 | * Automatically as part of a Github Pull Request (PR) after finalizing a fork of turbo_seti.
13 | * Automatically as part of a Github Merge after a PR is approved.
14 | <br>
15 | 
16 | ### Development Process Overview
17 | 
18 | * The development of an amendment to `turbo_seti` begins with taking a fork from a github site, normally from `https://github.com/UCBerkeleySETI/turbo_seti`.
19 | * Also, from the same site, `turbo_seti` is downloaded to a local computer.  The download operations can be performed in a few different ways but the simplest might be to download the zip file by clicking on the `Code` button and selecting `Download ZIP`.  Once the the zip file is in a local directory, unzip it and move the turbo_seti directory tree to wherever is appropriate for testing.  The zip file can now be discarded.
20 | * Change directory into the `test` directory and execute `python3 download_test_data.py` which will perform all required regression testing initialization.
21 | * When the previous step has completed, change directory up one level to the top of the `turbo_seti` directory tree.
22 | * Execute: ```python3 -m pip install -U git+https://github.com/UCBerkeleySETI/blimpy```
23 | * Execute: ```python3 setup.py install```
24 | * Execute: ```python3 -m pip install -r requirements_test.txt```
25 | * Regression testing can now begin.
26 | * Running the full suite of regression tests is invoked by executing `pytest` with no parameters specified.  It is possible to run a single regression test file by specifying it as an argument to `pytest`.  For example, if one wishes to only run the find event tests, the following is the command line to use: `pytest test/test_find_event.py`.
27 | * It is **highly encouraged** for developers to perform regression testing frequently in order to avoid surprises later on.
28 | * Once, development activity on the local machine is complete and the last regression test has run verifying the absence of negative side effects, then the new and/or modified turbo_seti files can be uploaded to the developer's fork github site.
29 | * Be sure to bump the version in `setup.py` and add an entry to `VERSION-HISTORY.md`.
30 | * At the fork github site, the developer can request a pull clicking on the `Pull request` button.  This automatically starts the PR process mentioned in the introduction section.
31 | 
32 | ### Testing Artifacts
33 | 
34 | |    File    | Description |
35 | | :-- | :-- |
36 | | `download_test_data.py` | Initialization of regression testing by obtaining and producing files used in test suite execution. |
37 | | `fb_cases_def.py` | Data definitions for `test_fb_cases.py`. |
38 | | `fb_cases_util.py` | Utility functions for `test_fb_cases.py` and `test_pipelines_{1,2,3}`. |
39 | | `fb_dat_reference.txt` | Reference data generated by `fb_genref.py`, hopefully a one-time event. |
40 | | `fb_genref.py` | Hopefully, a one-time generator of `fb_dat_reference.txt` which is used by `test_fb_cases.py` to validate test results.  The reference data values came from an execution of `turbo_seti` version 1.3.0. |
41 | | `pipelines_util.py` | Utility functions for `test_pipelines_{1,2,3}`. |
42 | | `README.md` | This file. |
43 | | `run_benchmark.sh` | A bash script used for measuring performance in the Seti BL data centre. |
44 | | `run_tests.sh` | A bash script used in Github Actions. |
45 | | `test_drift_rates.py` | Test source for generating multiple min_drift and max_drift tests, employing the `setigen` tool. |
46 | | `test_fb_cases.py` | Test source for generating multiple Filterbank tests, employing the `setigen` tool. |
47 | | `test_find_event.py` | Test source for generating multiple tests of the `find_event.py` module. |
48 | | `test_pipelines_1.py` | Test source for exercising/validation of `find_event_pipeline.py` and `plot_event_pipeline.py`. |
49 | | `test_pipelines_2.py` | Same as `test_pipelines_1.py` except it tests observation file cadences that start with "OFF" target instead of starting with "ON" target. |
50 | | `test_pipelines_3.py` | Test source for exercising/validation of `find_event_pipeline.py` when the collection of input files are a complex cadence. |
51 | | `test_pipelines_4.py` | Test source for exercising/validation of `run_pipeline.py`. |
52 | | `test_plot_dat.py` | Test source for exercising/validation of `plot_dat.py`. |
53 | | `test_turbo_seti.py` | Test source for exercising/validation of a variety of turbo_seti functions. |
54 | 
55 | 


--------------------------------------------------------------------------------
/turbo_seti/drift_indexes/drift_indexes_array_6.txt:
--------------------------------------------------------------------------------
 1 |  0 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2 |  0 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 32 33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 3 |  0 1 3 5 7 9 11 13 15 16 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45 47 48 49 51 53 55 57 59 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 4 |  0 1 3 5 7 9 11 13 15 16 17 19 21 23 25 27 29 31 32 33 35 37 39 41 43 45 47 48 49 51 53 55 57 59 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 5 |  0 1 3 5 7 8 9 11 13 15 17 19 21 23 24 25 27 29 31 33 35 37 39 40 41 43 45 47 49 51 53 55 56 57 59 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 6 |  0 1 3 5 7 8 9 11 13 15 17 19 21 23 24 25 27 29 31 32 33 35 37 39 40 41 43 45 47 49 51 53 55 56 57 59 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 7 |  0 1 3 5 7 8 9 11 13 15 16 17 19 21 23 24 25 27 29 31 33 35 37 39 40 41 43 45 47 48 49 51 53 55 56 57 59 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 8 |  0 1 3 5 7 8 9 11 13 15 16 17 19 21 23 24 25 27 29 31 32 33 35 37 39 40 41 43 45 47 48 49 51 53 55 56 57 59 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 9 |  0 1 3 4 5 7 9 11 12 13 15 17 19 20 21 23 25 27 28 29 31 33 35 36 37 39 41 43 44 45 47 49 51 52 53 55 57 59 60 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
10 |  0 1 3 4 5 7 9 11 12 13 15 17 19 20 21 23 25 27 28 29 31 32 33 35 36 37 39 41 43 44 45 47 49 51 52 53 55 57 59 60 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
11 |  0 1 3 4 5 7 9 11 12 13 15 16 17 19 20 21 23 25 27 28 29 31 33 35 36 37 39 41 43 44 45 47 48 49 51 52 53 55 57 59 60 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12 |  0 1 3 4 5 7 9 11 12 13 15 16 17 19 20 21 23 25 27 28 29 31 32 33 35 36 37 39 41 43 44 45 47 48 49 51 52 53 55 57 59 60 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
13 |  0 1 3 4 5 7 8 9 11 12 13 15 17 19 20 21 23 24 25 27 28 29 31 33 35 36 37 39 40 41 43 44 45 47 49 51 52 53 55 56 57 59 60 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
14 |  0 1 3 4 5 7 8 9 11 12 13 15 17 19 20 21 23 24 25 27 28 29 31 32 33 35 36 37 39 40 41 43 44 45 47 49 51 52 53 55 56 57 59 60 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
15 |  0 1 3 4 5 7 8 9 11 12 13 15 16 17 19 20 21 23 24 25 27 28 29 31 33 35 36 37 39 40 41 43 44 45 47 48 49 51 52 53 55 56 57 59 60 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
16 |  0 1 3 4 5 7 8 9 11 12 13 15 16 17 19 20 21 23 24 25 27 28 29 31 32 33 35 36 37 39 40 41 43 44 45 47 48 49 51 52 53 55 56 57 59 60 61 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
17 |  0 1 2 3 5 6 7 9 10 11 13 14 15 17 18 19 21 22 23 25 26 27 29 30 31 33 34 35 37 38 39 41 42 43 45 46 47 49 50 51 53 54 55 57 58 59 61 62 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
18 |  0 1 2 3 5 6 7 9 10 11 13 14 15 17 18 19 21 22 23 25 26 27 29 30 31 32 33 34 35 37 38 39 41 42 43 45 46 47 49 50 51 53 54 55 57 58 59 61 62 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0
19 |  0 1 2 3 5 6 7 9 10 11 13 14 15 16 17 18 19 21 22 23 25 26 27 29 30 31 33 34 35 37 38 39 41 42 43 45 46 47 48 49 50 51 53 54 55 57 58 59 61 62 63 0 0 0 0 0 0 0 0 0 0 0 0 0
20 |  0 1 2 3 5 6 7 9 10 11 13 14 15 16 17 18 19 21 22 23 25 26 27 29 30 31 32 33 34 35 37 38 39 41 42 43 45 46 47 48 49 50 51 53 54 55 57 58 59 61 62 63 0 0 0 0 0 0 0 0 0 0 0 0
21 |  0 1 2 3 5 6 7 8 9 10 11 13 14 15 17 18 19 21 22 23 24 25 26 27 29 30 31 33 34 35 37 38 39 40 41 42 43 45 46 47 49 50 51 53 54 55 56 57 58 59 61 62 63 0 0 0 0 0 0 0 0 0 0 0
22 |  0 1 2 3 5 6 7 8 9 10 11 13 14 15 17 18 19 21 22 23 24 25 26 27 29 30 31 32 33 34 35 37 38 39 40 41 42 43 45 46 47 49 50 51 53 54 55 56 57 58 59 61 62 63 0 0 0 0 0 0 0 0 0 0
23 |  0 1 2 3 5 6 7 8 9 10 11 13 14 15 16 17 18 19 21 22 23 24 25 26 27 29 30 31 33 34 35 37 38 39 40 41 42 43 45 46 47 48 49 50 51 53 54 55 56 57 58 59 61 62 63 0 0 0 0 0 0 0 0 0
24 |  0 1 2 3 5 6 7 8 9 10 11 13 14 15 16 17 18 19 21 22 23 24 25 26 27 29 30 31 32 33 34 35 37 38 39 40 41 42 43 45 46 47 48 49 50 51 53 54 55 56 57 58 59 61 62 63 0 0 0 0 0 0 0 0
25 |  0 1 2 3 4 5 6 7 9 10 11 12 13 14 15 17 18 19 20 21 22 23 25 26 27 28 29 30 31 33 34 35 36 37 38 39 41 42 43 44 45 46 47 49 50 51 52 53 54 55 57 58 59 60 61 62 63 0 0 0 0 0 0 0
26 |  0 1 2 3 4 5 6 7 9 10 11 12 13 14 15 17 18 19 20 21 22 23 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 41 42 43 44 45 46 47 49 50 51 52 53 54 55 57 58 59 60 61 62 63 0 0 0 0 0 0
27 |  0 1 2 3 4 5 6 7 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 25 26 27 28 29 30 31 33 34 35 36 37 38 39 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 57 58 59 60 61 62 63 0 0 0 0 0
28 |  0 1 2 3 4 5 6 7 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 57 58 59 60 61 62 63 0 0 0 0
29 |  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 0 0 0
30 |  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 0 0
31 |  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 0
32 |  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
33 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/dat_diff.py:
--------------------------------------------------------------------------------
  1 | """
  2 | dat_diff - show differences between two dat files.
  3 | 
  4 | Two processes in succession:
  5 | 1) For each entry in dat file dat1, look for a match in dat file dat2.
  6 | 2) For each entry in dat file dat2, look for a match in dat file dat1.
  7 | 
  8 | The comparison between dataframe entries uses the following dataframe columns:
  9 | * Coarse channel number (exact match)
 10 | * Frequency (within rtol) - see numpy.isclose() or math.isclose() for definition.
 11 | * Drift rate (within rtol)
 12 | """
 13 | 
 14 | import os
 15 | import sys
 16 | 
 17 | from argparse import ArgumentParser
 18 | import pandas as pd
 19 | import numpy as np
 20 | 
 21 | DEBUGGING = False
 22 | SEP = r'\s+'
 23 | 
 24 | 
 25 | def oops(msg):
 26 |     """
 27 |     Display an error message and exit to the O/S.
 28 | 
 29 |     Parameters
 30 |     ----------
 31 |     msg : str
 32 |         Error message for display.
 33 | 
 34 |     Returns
 35 |     -------
 36 |     None.
 37 | 
 38 |     """
 39 |     print(f"\n*** Oops, {msg} !!")
 40 |     sys.exit(86)
 41 | 
 42 | 
 43 | def finder(df1, path1, df2, path2, etol):
 44 |     """
 45 |     For each entry in data frame df1, try to find a matching entry in data frame df2.
 46 | 
 47 |     Parameters
 48 |     ----------
 49 |     df1 : Pandas DataFrame
 50 |         Reference dataframe.
 51 |     path1 : str
 52 |         Path of external file where df1 was sourced from.
 53 |     df2 : str
 54 |         Dataframe to be searched for a match.
 55 |     path2 : str
 56 |         Path of external file where df2 was sourced from..
 57 |     etol : float
 58 |         Relative error tolerance.  See RTOL parameter in numpy.isclose() or math.isclose().
 59 | 
 60 |     Returns
 61 |     -------
 62 |     None.
 63 | 
 64 |     """
 65 | 
 66 |     # Extract file names.
 67 |     file1 = os.path.basename(path1)
 68 |     file2 = os.path.basename(path2)
 69 |     print(f"\n===== For each record in {file1}, try to find a corresponding one in {file2}.")
 70 |     ndf1 = len(df1)
 71 |     ndf2 = len(df2)
 72 |     print(f"===== {ndf1} records in {file1}, {ndf2} records in {file2}.")
 73 | 
 74 |     # Main loop.
 75 |     notfound_count = 0
 76 |     for ii in range(ndf1):
 77 |         if DEBUGGING:
 78 |             print(f"DEBUG ii={ii}")
 79 |         drate1 = df1[1][ii]
 80 |         snr1 = df1[2][ii]
 81 |         freq1 = df1[3][ii]
 82 |         cchan1 = df1[10][ii]
 83 |         flag_match = False
 84 |         for jj in range(ndf2):
 85 |             if DEBUGGING:
 86 |                 print(f"DEBUG ii={ii}, jj={jj}")
 87 |             drate2 = df2[1][jj]
 88 |             #snr2 = df2[2][jj]
 89 |             freq2 = df2[3][jj]
 90 |             cchan2 = df2[10][jj]
 91 |             if cchan1 != cchan2:
 92 |                 continue
 93 |             if not np.isclose(drate1, drate2, rtol=etol):
 94 |                 continue
 95 |             if not np.isclose(freq1, freq2, rtol=etol):
 96 |                 continue
 97 |             flag_match = True
 98 |             break
 99 |         if not flag_match:
100 |             notfound_count += 1
101 |             print(f"{file1} {ii} not found in {file2} >  Coarse  {cchan1}  drate  {drate1}  Freq  {freq1}  SNR  {snr1}")
102 | 
103 |     # The End.
104 |     if notfound_count == 0:
105 |         print(f"{file1} : {file2} > All found.")
106 |     else:
107 |         print(f"{file1} : {file2} > Not found count = {notfound_count}")
108 | 
109 | 
110 | def main(args=None):
111 |     """
112 |     Executable program entry point.
113 |     If args=None, then the command-line holds the list of arguments;
114 |     Else, args holds the list of arguments.
115 | 
116 |     Parameters
117 |     ----------
118 |     args : list, optional
119 |         List of arguments. The default is None.
120 | 
121 |     Returns
122 |     -------
123 |     None.
124 | 
125 |     """
126 | 
127 |     # Parse and validate arguments.
128 |     parser = ArgumentParser(description="Find differences between 2 DAT files.")
129 |     parser.add_argument("dat1", type=str,
130 |                         help="Path of the 1st DAT file.")
131 |     parser.add_argument("dat2", type=str,
132 |                         help="Path of the 2nd DAT file.")
133 |     parser.add_argument("-e", "--etol", dest="etol", type=float, default=0.0001,
134 |                         help="Relative error tolerance (see numpy.isclose() or math.isclose(). Default=0.0001 (0.01%%).")
135 |     if args is None:
136 |         args = parser.parse_args()
137 |     else:
138 |         args = parser.parse_args(args)
139 |     print(f"Relative error tolerance = {args.etol}")
140 | 
141 |     # Make sure that the DAT path #1 exists.
142 |     # Load the data frames.
143 |     if not os.path.exists(args.dat1):
144 |         oops(f"DAT file {args.dat1} does not exist")
145 |     df1 = pd.read_csv(args.dat1, header=None, sep=SEP, engine="python", comment="#")
146 |     nrows1 = len(df1)
147 |     if nrows1 < 1:
148 |         oops(f"DAT file {args.dat1} is empty")
149 | 
150 |     # Make sure that the DAT path #2 exists.
151 |     # Load the data frames.
152 |     if not os.path.exists(args.dat2):
153 |         oops(f"DAT file {args.dat2} does not exist")
154 |     df2 = pd.read_csv(args.dat2, header=None, sep=SEP, engine="python", comment="#")
155 |     nrows2 = len(df2)
156 |     if nrows2 < 1:
157 |         oops(f"DAT file {args.dat2} is empty")
158 | 
159 |     finder(df1, args.dat1, df2, args.dat2, args.etol)
160 |     finder(df2, args.dat2, df1, args.dat1, args.etol)
161 | 
162 | 
163 | if __name__ == "__main__":
164 |     main()
165 | 


--------------------------------------------------------------------------------
/FinalReport.md:
--------------------------------------------------------------------------------
 1 | ### Summary
 2 | The goal of this project was to document turboSETI and make any fixes that stuck out along the way. To do this I started running a debugger from seti_event.py. By starting at the entry point where arguments get parsed I was able to follow the whole pipeline of the code. I deviated from this path here and there to handle files that were used a bit less chronologically, like files with helper functions and file writers. All files in turboSETI now have documentation, though there are a couple areas that could be expanded on. I worked exclusively on files in the find_doppler folder, since the rest of the code already had basic documentation.
 3 | 
 4 | ### Changes and Suggested Changes
 5 | One change I made was a performance improvement that can yield up to a 90% speedup in certain files. In data_handler.py in the load_data function, there was a section of code which adds rows of zeros to a numpy matrix in order to get its length to be a power of 2 (this is because the log base 2 is later taken of this length to assign drift indices). These rows of zeros were being added one by one in a loop that was running thousands of times, which was bottlenecking certain files. I removed this loop and replaced it with a single call to the numpy function which adds all the necessary rows in one call. Note that this section does not seem to bottleneck every file, from what I could tell the maximum speedup occurs in files that do not have hits. This is because the doppler correcting forward and reverse takes much longer than this section, and only occurs when hits are found.
 6 | 
 7 | Another thing I noticed was that there are a few functions that are unused or partially implemented. I spent some time especially observing bitrev and its variants since there were three of them which seemed superficially to serve the same purpose. I found that bitrev and bitrev2 in fact do have the same behavior though are implemented differently, meaning bitrev2 is slightly slower. Bitrev3 is the outlier of the three. The bitrev functions serve the purpose of taking in a number, reading it as a binary number, and reversing a certain amount of bits. While bitrev and bitrev2 take that length as an input, allowing the user to flip only the first `nbits` bits (least significant), bitrev3 always assumes that it is dealing with a 32 bit number, meaning that it is more limited than the other two. I also found through testing that bitrev actually behaves inconsistently in its base case. With all inputs of `nbits` > 1, if `nbits` is not the entire length of the number, then any bits past the length are not only not flipped, but are also truncated from the result. For example, decimal 10 = binary 1010 with nbits of 2 returns decimal 1 = binary 0001 instead of decimal 9 = binary 1001. Bitrev2 behaves consistently like this for all `nbits` >0, but bitrev behaves like this for all inputs of `nbits`>1. On an input of `nbits` = 1, bitrev behaves differently, in that it does not truncate. This is because it is caught in a base if-statement that detects an input of 1 and returns the original number. If we want bitrev to behave consistently the way it does with the rest of its inputs, then the first line of the function, 
 8 | ```if nbits <= 1:```
 9 | should be changed to 
10 | ``` if nbits < 1:```
11 | This allows the code to properly continue and truncate as it does with all other inputs. Note however that this change is likely unnecessary, since as long as the proper function length is input, this will never be a problem. For more on this, you can see some tests I wrote which illustrate this inconsistency in the tests folder of the testing branch of this fork. That branch still contains bitrev2 and bitrev3, but they have since been removed from turboSETI’s master branch due to the fact that they are not used.
12 | 
13 | ### Problems and Next Steps
14 | The largest issue I ran into while doing this project was my lack of radio astronomy knowledge. There are a few fairly complicated functions like search_data in find_doppler.py which I was not able to fully understand. While I could figure out what the code was doing, I was unable to figure out what exactly was going on at a more abstract level. Due to this, some functions have a blank param tag or two in places where I was unable to come up with a description for an argument, and other functions may not have docstrings as descriptive as others. 
15 | 
16 | My recommendation for future interns or others interested in adding on to the documentation or otherwise improving this repository would be to work in a group of two, where one person specializes in computer science and one in radio astronomy. Understanding what the code is supposed to do on a higher level would make it easier for the programmer to understand where the code would likely bottleneck, what outputs may be erroneous and what parts of the code may be doing the wrong thing. While one programmer can look things up and read papers on radio astronomy, this is much slower and more difficult than having someone who already knows the stuff sit next to them as they work.
17 | 
18 | Besides that, I strongly recommend any who plan on working on this to use a debugger, as you get a lot more out of what occurs in the code by stepping through it rather than just reading it and running it. It may be a bit difficult to get a debugger working at first but it is worth it. I used intelliJ with a python plug-in because that’s the program I have the most experience with but there are plenty of others. One issue I had with my debugger was that the relative imports in some files were causing the debugger to error. To fix this I just removed the `.` in front of the relative import statements in file_writers.py, seti_event.py, and find_doppler.py. I just had to remember to change it back before making any pull requests. Like I said though, there are plenty of other debuggers that are better for python, so spend some time trying what works.
19 | 
20 | Things that could be done for people who want to continue to work on turboSETI are writing tests, expanding documentation even further and filling out the spots I was unable to, and optimizing. I began writing unit tests for some of the files, though they are in no way exhaustive. They were mainly used for me to examine behavior with certain inputs. These can be found in the tests folder of the testing branch of this fork. Note also that some of these tests are for functions which no longer exist in turboSETI, as some functions which had no usages were removed from the master branch. The functions still exist in the testing branch however.
21 |  
22 | 


--------------------------------------------------------------------------------
/test/test_pipelines_2.py:
--------------------------------------------------------------------------------
  1 | r'''
  2 | test_pipelines.py
  3 | 
  4 | Same as test_pipelines except that the cadence order is reveresed
  5 | OFF-ON-... instead of ON-OFF-...
  6 | '''
  7 | 
  8 | from time import time
  9 | from shutil import rmtree
 10 | from pathlib import Path
 11 | from os import remove, listdir
 12 | import imghdr
 13 | import glob
 14 | from tempfile import gettempdir
 15 | import sys
 16 | 
 17 | from turbo_seti.find_event.find_event_pipeline import find_event_pipeline
 18 | from turbo_seti.find_event.plot_event_pipeline import plot_event_pipeline
 19 | import pipelines_util as utl
 20 | 
 21 | TESTDIR = gettempdir() + '/pipeline_testing/'
 22 | PLOTDIR = TESTDIR + 'plots/'
 23 | PATH_DAT_LIST_FILE = TESTDIR + 'dat_files_2.lst'
 24 | PATH_H5_LIST_FILE = TESTDIR + 'h5_files_2.lst'
 25 | PATH_CSVF = TESTDIR + 'found_event_table_2.csv'
 26 | N_EVENTS = 2
 27 | 
 28 | 
 29 | def oops(arg_text):
 30 |     '''
 31 |     Log the bad news and exit to the O/S with a non-zero exit code.
 32 |     '''
 33 |     print('\n*** Oops, ' + arg_text)
 34 |     sys.exit(86)
 35 | 
 36 | 
 37 | def find_plot_pipelines(filter_threshold=3,
 38 |                         on_off_first='ON',
 39 |                         plot_dir=None,
 40 |                         on_source_complex_cadence=False):
 41 |     r'''
 42 |     Exercise find_event_pipeline() and plot_event_pipeline()
 43 |     '''
 44 | 
 45 |     main_time_start = time()
 46 | 
 47 |     print('find_plot_pipelines_2: Filter threshold = ', filter_threshold)
 48 |     h5_file_list = sorted(glob.glob(TESTDIR + 'single*.h5'))
 49 |     dat_file_list = sorted(glob.glob(TESTDIR + 'single*.dat'))
 50 |     number_in_cadence = len(h5_file_list)
 51 |     if number_in_cadence != 6:
 52 |         raise ValueError('find_plot_pipelines_2: Expected to find 6 h5 files but observed {}'
 53 |                          .format(number_in_cadence))
 54 |     number_in_cadence = len(dat_file_list)
 55 |     if number_in_cadence != 6:
 56 |         raise ValueError('find_plot_pipelines_2: Expected to find 6 dat files but observed {}'
 57 |                          .format(number_in_cadence))
 58 | 
 59 |     # Re-order the H5 and DAT files into OFF-ON-...
 60 |     # In the 2 lists, switch 1 and 2, 3 and 4, 5 and 6
 61 |     for ix in [0, 2, 4]:
 62 |         temp = h5_file_list[ix]
 63 |         h5_file_list[ix] = h5_file_list[ix + 1]
 64 |         h5_file_list[ix + 1] = temp
 65 |         temp = dat_file_list[ix]
 66 |         dat_file_list[ix] = dat_file_list[ix + 1]
 67 |         dat_file_list[ix + 1] = temp
 68 |     fh_h5 = open(PATH_H5_LIST_FILE, 'w')
 69 |     fh_dat = open(PATH_DAT_LIST_FILE, 'w')
 70 |     for ix in range(6):
 71 |         fh_h5.write(h5_file_list[ix] + '\n')
 72 |         fh_dat.write(dat_file_list[ix] + '\n')
 73 |     fh_h5.close()
 74 |     fh_dat.close()
 75 |     print('find_plot_pipelines_2: H5/dat cadence length = ', number_in_cadence)
 76 | 
 77 |     # If CSV exists from a previous execution, remove it.
 78 |     try:
 79 |         remove(PATH_CSVF)
 80 |     except:
 81 |         pass
 82 | 
 83 |     # With the list of DAT files, do find_event_pipeline()
 84 |     print('===== find_event_pipeline BEGIN =====')
 85 |     df_event = find_event_pipeline(PATH_DAT_LIST_FILE,
 86 |                                    sortby_tstart=False,
 87 |                                    filter_threshold=filter_threshold,
 88 |                                    number_in_cadence=number_in_cadence,
 89 |                                    user_validation=False,
 90 |                                    saving=True,
 91 |                                    on_off_first=on_off_first,
 92 |                                    on_source_complex_cadence=on_source_complex_cadence,
 93 |                                    csv_name=PATH_CSVF)
 94 |     print('===== find_event_pipeline END =====')
 95 | 
 96 |     # CSV file created?
 97 |     if not Path(PATH_CSVF).exists():
 98 |         raise ValueError('find_plot_pipelines_2: No CSV of events created')
 99 | 
100 |     # An event CSV was created.
101 |     # Validate the hit table file.
102 |     utl.validate_hittbl(df_event, PATH_CSVF, 'test_pipe_lines_2', N_EVENTS)
103 | 
104 |     # Do the plots for all of the HDF5/DAT file pairs.
105 |     print('===== plot_event_pipeline #1 (plot_dir does not yet exist) BEGIN =====')
106 |     rmtree(plot_dir, ignore_errors=True)
107 |     plot_event_pipeline(PATH_CSVF,
108 |                         PATH_H5_LIST_FILE,
109 |                         filter_spec='f{}'.format(filter_threshold),
110 |                         plot_dir=plot_dir,
111 |                         user_validation=False)
112 |     print('===== plot_event_pipeline #2 (plot_dir already exists) BEGIN =====')
113 |     plot_event_pipeline(PATH_CSVF,
114 |                         PATH_H5_LIST_FILE,
115 |                         filter_spec='f{}'.format(filter_threshold),
116 |                         plot_dir=plot_dir,
117 |                         user_validation=False)
118 | 
119 |     # Check that the right number of PNG files were created.
120 |     print('===== plot_event_pipeline END =====')
121 |     outdir_list = listdir(plot_dir)
122 |     npngs = 0
123 |     for cur_file in outdir_list:
124 |         if cur_file.split('.')[-1] == 'png':
125 |             if imghdr.what(plot_dir + cur_file) != 'png':
126 |                 raise ValueError('find_plot_pipelines_2: File {} is not a PNG file'
127 |                                  .format(cur_file))
128 |             npngs += 1
129 |     if npngs != N_EVENTS:
130 |         raise ValueError('find_plot_pipelines_2: Expected to find 6 PNG files but observed {}'
131 |                          .format(npngs))
132 | 
133 |     # Stop the clock - we're done.
134 |     main_time_stop = time()
135 | 
136 |     print('find_plot_pipelines_2: End, et = {:.1f} seconds'
137 |           .format(main_time_stop - main_time_start))
138 | 
139 | 
140 | def test_pipelines_2(cleanup=False):
141 |     r'''
142 |     This is the pytest entry point.
143 |     Test filter threshold 3 in find_plot_pipelines().
144 |     By default (pytest):
145 |     * Initialization is done only once.
146 |     * Cleanup is not performed at end.
147 |     '''
148 |     print('\n===== test_pipelines_2: BEGIN =====')
149 | 
150 |     find_plot_pipelines(filter_threshold=3,
151 |                         on_off_first='OFF',
152 |                         plot_dir=PLOTDIR,
153 |                         on_source_complex_cadence=False)
154 | 
155 |     if cleanup:
156 |         rmtree(TESTDIR, ignore_errors=True)
157 | 
158 |     print('\n===== test_pipelines_2: END =====')
159 | 
160 | 
161 | if __name__ == '__main__':
162 |     test_pipelines_2(cleanup=False)
163 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/seti_event.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | '''
  3 | Main program module for executable turboSETI
  4 | '''
  5 | 
  6 | import sys
  7 | import os
  8 | import logging
  9 | import time
 10 | import cProfile
 11 | import pstats
 12 | from argparse import ArgumentParser
 13 | 
 14 | from blimpy import __version__ as BLIMPY_VERSION
 15 | from .find_doppler import FindDoppler
 16 | from .kernels import Kernels
 17 | from .turbo_seti_version import TURBO_SETI_VERSION
 18 | 
 19 | 
 20 | def main(args=None):
 21 |     r"""
 22 |     This is the entry-point to turboSETI.
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     args : dict
 27 | 
 28 |     """
 29 |     # Create an option parser to get command-line input/arguments
 30 |     p = ArgumentParser(description='turboSETI doppler drift narrowband search utility version {}.'
 31 |                                     .format(TURBO_SETI_VERSION))
 32 | 
 33 |     p.add_argument('filename', type=str, default='', nargs="?",
 34 |                    help='Name of filename to open (h5 or fil)')
 35 |     p.add_argument('-v', '--version', dest='show_version', default=False, action='store_true',
 36 |                    help='show the turbo_seti and blimpy versions and exit')
 37 |     p.add_argument('-M', '--max_drift', dest='max_drift', type=float, default=10.0,
 38 |                    help='Set the maximum drift rate threshold. Unit: Hz/sec. Default: 10.0')
 39 |     p.add_argument('-m', '--min_drift', dest='min_drift', type=float, default=0.00001,
 40 |                    help='Set the minimum drift rate threshold. Unit: Hz/sec. Default: 0.00001')
 41 |     p.add_argument('-s', '--snr', dest='snr', type=float, default=25.0,
 42 |                    help='Set the minimum SNR threshold. Default: 25.0')
 43 |     p.add_argument('-o', '--out_dir', dest='out_dir', type=str, default='./',
 44 |                    help='Location for output files. Default: local dir. ')
 45 |     p.add_argument('-l', '--loglevel', dest='log_level', type=str, default='info',
 46 |                    help='Specify log level (info, debug, warning)')
 47 |     p.add_argument('-c', '--coarse_chans', dest='coarse_chans', type=str, default=None,
 48 |                    help='Comma separated string list of coarse channels to analyze. E.g. 7,12 to search channels 7 and 12 only.')
 49 |     p.add_argument('-n', '--n_coarse_chan', dest='n_coarse_chan', type=int, default=None,
 50 |                    help='Number of coarse channels to use.')
 51 |     p.add_argument('-p', '--n_parallel', dest='n_parallel', type=int, default=1,
 52 |                    help='Number of dask partitions to run in parallel. Default to 1 (dask not in use)')
 53 |     p.add_argument('-b', '--progress_bar', dest='flag_progress_bar', type=str, default='n',
 54 |                    help='Use a progress bar with dask? (y/n)')
 55 |     p.add_argument('-g', '--gpu', dest='flag_gpu', type=str, default='n',
 56 |                    help='Compute on the GPU? (y/n)')
 57 |     p.add_argument('-z', '--blank_dc', dest='flag_blank_dc', type=str, default='y',
 58 |                    help='Smooth out the DC spike? (y/n)')
 59 |     p.add_argument('-d', '--gpu_id', dest='gpu_id', type=int, default=0,
 60 |                    help='Use which GPU device? (0,1,...)')
 61 |     p.add_argument('-P', '--profile', dest='flag_profile', type=str, default='n',
 62 |                    help='Profile execution? (y/n)')
 63 |     p.add_argument('-S', '--single_precision', dest='flag_single_precision', type=str, default='y',
 64 |                    help='Use single precision (float32)? (y/n)')
 65 |     p.add_argument('-a', '--append_output', dest='flag_append_output', type=str, default='n',
 66 |                    help='Append output DAT & LOG files? (y/n)')
 67 | 
 68 |     if args is None:
 69 |         args = p.parse_args()
 70 |     else:
 71 |         args = p.parse_args(args)
 72 | 
 73 |     if args.show_version:
 74 |         print('turbo_seti: {}'.format(TURBO_SETI_VERSION))
 75 |         print('blimpy: {}'.format(BLIMPY_VERSION))
 76 |         sys.exit(0)
 77 | 
 78 |     if args.filename == '':
 79 |         os.system('turboSETI -h')
 80 |         sys.exit(0)
 81 | 
 82 |     if not os.path.exists(args.filename):
 83 |         print("\nInput file {} does not exist!\n".format(args.filename))
 84 |         sys.exit(86)
 85 | 
 86 |     if args.flag_profile == "y":
 87 |         cProfile.runctx('exec(args)', {'args': args, 'exec': exec_proc}, {}, filename='stats_file.bin')
 88 |         p = pstats.Stats('stats_file.bin')
 89 |         p.strip_dirs().sort_stats('time').print_stats(16)
 90 |     else:
 91 |         exec_proc(args)
 92 | 
 93 | def exec_proc(args):
 94 |     r"""
 95 |     Interface to FindDoppler class, called by main().
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     args : dict
100 | 
101 |     """
102 |     if args.coarse_chans is None:
103 |         coarse_chans = ''
104 |     else:
105 |         coarse_chans = map(int, args.coarse_chans.split(','))
106 | 
107 |     # Setting log level
108 |     if args.log_level == 'info':
109 |         log_level_int = logging.INFO
110 |     elif args.log_level == 'debug':
111 |         log_level_int = logging.DEBUG
112 |     elif args.log_level == 'warning':
113 |         log_level_int = logging.WARNING
114 |     else:
115 |         raise ValueError('Need valid loglevel value (info, debug, warning).')
116 | 
117 |     fmt = '%(name)-15s %(levelname)-8s %(message)s'
118 |     logging.basicConfig(format=fmt, level=logging.WARNING)
119 | 
120 |     if Kernels.has_gpu() and args.flag_gpu == "n":
121 |         print("Info: Your system is compatible with GPU-mode. Use the `-g y` argument to enable it.")
122 | 
123 |     #Doing search
124 |     t0 = time.time()
125 | 
126 |     find_seti_event = FindDoppler(args.filename,
127 |                                   max_drift=args.max_drift,
128 |                                   min_drift=args.min_drift,
129 |                                   snr=args.snr,
130 |                                   out_dir=args.out_dir,
131 |                                   append_output=(args.flag_append_output == "y"),
132 |                                   coarse_chans=coarse_chans,
133 |                                   obs_info=None,
134 |                                   n_coarse_chan=args.n_coarse_chan,
135 |                                   gpu_backend=(args.flag_gpu == "y"),
136 |                                   gpu_id=args.gpu_id,
137 |                                   blank_dc=(args.flag_blank_dc == "y"),
138 |                                   precision=1 if args.flag_single_precision == "y" else 2,
139 |                                   log_level_int=log_level_int)
140 | 
141 |     find_seti_event.search(n_partitions=args.n_parallel,
142 |                            progress_bar=args.flag_progress_bar)
143 | 
144 |     t1 = time.time()
145 |     print('Search time: %5.2f min' % ((t1-t0)/60.))
146 | 
147 | 
148 | if __name__ == '__main__':
149 |     main()
150 | 


--------------------------------------------------------------------------------
/turbo_seti/find_event/plot_event_pipeline.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | r"""
  4 | Front-facing script to plot drifting, narrowband events in a set of generalized
  5 | cadences of ON-OFF radio SETI observations.
  6 | """
  7 | 
  8 | import os
  9 | from operator import attrgetter
 10 | import pandas
 11 | from blimpy import Waterfall
 12 | from . import plot_event
 13 | 
 14 | 
 15 | class PathRecord:
 16 |     r''' Definition of an H5 path record '''
 17 |     def __init__(self, path_h5, tstart, source_name):
 18 |         self.path_h5 = path_h5
 19 |         self.tstart = tstart
 20 |         self.source_name = source_name
 21 |     def __repr__(self):
 22 |         return repr((self.path_h5, self.tstart, self.source_name))
 23 | 
 24 | 
 25 | def plot_event_pipeline(event_csv_string, fils_list_string, user_validation=False,
 26 |                         offset=0, filter_spec=None, sortby_tstart=True, plot_dir=None):
 27 |     r"""
 28 |     This function calls :func:`~turbo_seti.find_event.plot_event.plot_candidate_events` to
 29 |     plot the events in an output .csv file generated by find_event_pipeline.py
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     event_csv_string : str
 34 |         The string name of a .csv file that contains the
 35 |         list of events at a given filter level, created as
 36 |         output from find_event_pipeline.py. The
 37 |         .csv should have a filename containing information
 38 |         about its parameters, for example
 39 |         "kepler1093b_0015_f2_snr10.csv"
 40 |         Remember that the file was created with some cadence
 41 |         (ex. ABACAD) and ensure that the cadence matches the
 42 |         order of the files in fils_list_string
 43 | 
 44 |     fils_list_string : str
 45 |         The string name of a plaintext file ending in .lst
 46 |         that contains the filenames of .fil files, each on a
 47 |         new line, that corresponds to the cadence used to
 48 |         create the .csv file used for event_csv_string.
 49 | 
 50 |     user_validation : bool, optional
 51 |         A True/False flag that, when set to True, asks if the
 52 |         user wishes to continue with their input parameters
 53 |         (and requires a 'y' or 'n' typed as confirmation)
 54 |         before beginning to run the program. Recommended when
 55 |         first learning the program, not recommended for
 56 |         automated scripts.
 57 | 
 58 |     offset : int, optional
 59 |         The amount that the overdrawn "best guess" line from
 60 |         the event parameters in the csv should be shifted from
 61 |         its original position to enhance readability. Can be
 62 |         set to 0 (default; draws line on top of estimated
 63 |         event) or 'auto' (shifts line to the left by an auto-
 64 |         calculated amount, with addition lines showing original
 65 |         position).
 66 |     sortby_tstart : bool
 67 |         If True, the input file list is sorted by header.tstart.
 68 | 
 69 |     Examples
 70 |     --------
 71 |     >>> import plot_event_pipeline;
 72 |     ... plot_event_pipeline.plot_event_pipeline(event_csv_string, fils_list_string,
 73 |     ...                                         user_validation=False, offset=0)
 74 | 
 75 |     """
 76 |     #reading in the .csv containing the events
 77 |     try:
 78 |         candidate_event_dataframe = pandas.read_csv(event_csv_string, comment='#')
 79 |         print("plot_event_pipeline: Opened file {}".format(event_csv_string))
 80 |     except:
 81 |         print("*** plot_event_pipeline: Oops, cannot access file {}".format(event_csv_string))
 82 |         return
 83 | 
 84 |     fil_file_list = []
 85 |     for file in pandas.read_csv(fils_list_string, encoding='utf-8', header=None, chunksize=1):
 86 |         fil_file_list.append(file.iloc[0,0])
 87 | 
 88 |     #obtaining source names
 89 |     source_name_list = []
 90 |     path_record = []
 91 |     for fil in fil_file_list:
 92 |         wf = Waterfall(fil, load_data=False)
 93 |         source_name = wf.container.header["source_name"]
 94 |         source_name_list.append(source_name)
 95 |         tstart = wf.container.header["tstart"]
 96 |         path_record.append(PathRecord(fil, tstart, source_name))
 97 | 
 98 |     # If sorting by header.tstart, then rewrite the dat_file_list in header.tstart order.
 99 |     if sortby_tstart:
100 |         path_record = sorted(path_record, key=attrgetter('tstart'))
101 |         fil_file_list = []
102 |         for obj in path_record:
103 |             fil_file_list.append(obj.path_h5)
104 |             print("plot_event_pipeline: file = {}, tstart = {}, source_name = {}"
105 |                   .format(os.path.basename(obj.path_h5), obj.tstart, obj.source_name))
106 |     else:
107 |         for obj in path_record:
108 |             print("plot_event_pipeline: file = {}, tstart = {}, source_name = {}"
109 |                   .format(os.path.basename(obj.path_h5), obj.tstart, obj.source_name))
110 | 
111 |     #get rid of bytestring "B'"s if they're there (early versions of
112 |     #seti_event.py added "B'"s to all of the source names)
113 |     on_source_name_original = candidate_event_dataframe.Source[0]
114 |     if on_source_name_original[0] == 'B' and on_source_name_original[-1] == '\'':
115 |         on_source_name = on_source_name_original[2:-2]
116 |     else:
117 |         on_source_name = on_source_name_original
118 |     candidate_event_dataframe = candidate_event_dataframe.replace(to_replace=on_source_name_original,
119 |                                            value=on_source_name)
120 | 
121 |     # Establish filter-level from filter_spec (preferred)
122 |     # or 3rd token of the .csv path (don't break an existing caller)
123 |     if filter_spec is None:
124 |         filter_level = event_csv_string.split('_')[2]
125 |     else:
126 |         filter_level = filter_spec
127 | 
128 |     #begin user validation
129 |     print("Plotting some events for: ", on_source_name)
130 |     print("There are " + str(len(candidate_event_dataframe.Source)) + " total events in the csv file " + event_csv_string)
131 |     print("therefore, you are about to make " + str(len(candidate_event_dataframe.Source)) + " .png files.")
132 | 
133 |     if user_validation:
134 |         question = "Do you wish to proceed with these settings?"
135 |         while "the answer is invalid":
136 |             reply = str(input(question+' (y/n): ')).lower().strip()
137 |             if reply == '':
138 |                 return
139 |             if reply[0] == 'y':
140 |                 break
141 |             if reply[0] == 'n':
142 |                 return
143 | 
144 |     #move to plot_event.py for the actual plotting
145 |     plot_event.plot_candidate_events(candidate_event_dataframe,
146 |                                    fil_file_list,
147 |                                    filter_level,
148 |                                    source_name_list,
149 |                                    offset=offset,
150 |                                    plot_dir=plot_dir)
151 | 


--------------------------------------------------------------------------------
/tutorial/initialise.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "c61bac5f",
  7 |    "metadata": {},
  8 |    "outputs": [
  9 |     {
 10 |      "name": "stdout",
 11 |      "output_type": "stream",
 12 |      "text": [
 13 |       "Please wait for the \"End\" message, downloading 6 HDF5 files.\n",
 14 |       "\n",
 15 |       "Begin wget http://blpd14.ssl.berkeley.edu/voyager_2020/single_coarse_channel/single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.h5 -> /home/elkins/turbo_seti_data/single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.h5 .....\n",
 16 |       "End wget (single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.h5), et = 19.5 seconds\n",
 17 |       "Begin wget http://blpd14.ssl.berkeley.edu/voyager_2020/single_coarse_channel/single_coarse_guppi_59046_80354_DIAG_VOYAGER-1_0012.rawspec.0000.h5 -> /home/elkins/turbo_seti_data/single_coarse_guppi_59046_80354_DIAG_VOYAGER-1_0012.rawspec.0000.h5 .....\n",
 18 |       "End wget (single_coarse_guppi_59046_80354_DIAG_VOYAGER-1_0012.rawspec.0000.h5), et = 19.3 seconds\n",
 19 |       "Begin wget http://blpd14.ssl.berkeley.edu/voyager_2020/single_coarse_channel/single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.h5 -> /home/elkins/turbo_seti_data/single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.h5 .....\n",
 20 |       "End wget (single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.h5), et = 24.9 seconds\n",
 21 |       "Begin wget http://blpd14.ssl.berkeley.edu/voyager_2020/single_coarse_channel/single_coarse_guppi_59046_80989_DIAG_VOYAGER-1_0014.rawspec.0000.h5 -> /home/elkins/turbo_seti_data/single_coarse_guppi_59046_80989_DIAG_VOYAGER-1_0014.rawspec.0000.h5 .....\n",
 22 |       "End wget (single_coarse_guppi_59046_80989_DIAG_VOYAGER-1_0014.rawspec.0000.h5), et = 29.6 seconds\n",
 23 |       "Begin wget http://blpd14.ssl.berkeley.edu/voyager_2020/single_coarse_channel/single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.h5 -> /home/elkins/turbo_seti_data/single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.h5 .....\n",
 24 |       "End wget (single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.h5), et = 17.5 seconds\n",
 25 |       "Begin wget http://blpd14.ssl.berkeley.edu/voyager_2020/single_coarse_channel/single_coarse_guppi_59046_81628_DIAG_VOYAGER-1_0016.rawspec.0000.h5 -> /home/elkins/turbo_seti_data/single_coarse_guppi_59046_81628_DIAG_VOYAGER-1_0016.rawspec.0000.h5 .....\n",
 26 |       "End wget (single_coarse_guppi_59046_81628_DIAG_VOYAGER-1_0016.rawspec.0000.h5), et = 20.9 seconds\n",
 27 |       "\n",
 28 |       "All files have been successfully downloaded.\n",
 29 |       "End.\n"
 30 |      ]
 31 |     }
 32 |    ],
 33 |    "source": [
 34 |     "\"\"\"\n",
 35 |     "Download a set of HDF5 files (H5_FILE_LIST)\n",
 36 |     "from the specified web address (URL_DIR)\n",
 37 |     "to the file system directory indicated by DATADIR.\n",
 38 |     "\"\"\"\n",
 39 |     "\n",
 40 |     "from time import time\n",
 41 |     "import sys\n",
 42 |     "from shutil import rmtree\n",
 43 |     "from os import mkdir\n",
 44 |     "from pathlib import Path\n",
 45 |     "from urllib.error import HTTPError\n",
 46 |     "\n",
 47 |     "\n",
 48 |     "def oops(arg_text):\n",
 49 |     "    \"\"\"\n",
 50 |     "    Log the bad news and exit to the O/S with a non-zero exit code.\n",
 51 |     "    \"\"\"\n",
 52 |     "    print(\"\\n*** OOPS, \" + arg_text)\n",
 53 |     "    sys.exit(86)\n",
 54 |     "\n",
 55 |     "\n",
 56 |     "try:\n",
 57 |     "    import wget\n",
 58 |     "except:\n",
 59 |     "    oops(\"Needed: Install the wget package\")\n",
 60 |     "\n",
 61 |     "\n",
 62 |     "DATADIR = str(Path.home()) + \"/turbo_seti_data/\"\n",
 63 |     "URL_DIR = \"http://blpd14.ssl.berkeley.edu/voyager_2020/single_coarse_channel/\"\n",
 64 |     "\n",
 65 |     "H5_FILE_LIST = ['single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.h5',\n",
 66 |     "                'single_coarse_guppi_59046_80354_DIAG_VOYAGER-1_0012.rawspec.0000.h5',\n",
 67 |     "                'single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.h5',\n",
 68 |     "                'single_coarse_guppi_59046_80989_DIAG_VOYAGER-1_0014.rawspec.0000.h5',\n",
 69 |     "                'single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.h5',\n",
 70 |     "                'single_coarse_guppi_59046_81628_DIAG_VOYAGER-1_0016.rawspec.0000.h5']\n",
 71 |     "REQUIRED_MAJOR = 3\n",
 72 |     "REQUIRED_MINOR = 7\n",
 73 |     "\n",
 74 |     "\n",
 75 |     "def check_version():\n",
 76 |     "    if sys.version_info.major < REQUIRED_MAJOR \\\n",
 77 |     "    or sys.version_info.minor < REQUIRED_MINOR:\n",
 78 |     "        print(\"This script requires Python {}.{} or higher!\"\n",
 79 |     "              .format(REQUIRED_MAJOR, REQUIRED_MINOR))\n",
 80 |     "        oops(\"You are using Python {}.{}.\"\n",
 81 |     "              .format(sys.version_info.major, sys.version_info.minor))\n",
 82 |     "\n",
 83 |     "\n",
 84 |     "def wgetter(arg_h5_name):\n",
 85 |     "    \"\"\"\n",
 86 |     "    wget an HDF5 file from the Internet repository.\n",
 87 |     "    arg_h5_name:  HDF5 file name\n",
 88 |     "    \"\"\"\n",
 89 |     "    url_h5 = URL_DIR + arg_h5_name\n",
 90 |     "    path_h5 = DATADIR + arg_h5_name\n",
 91 |     "    print(\"Begin wget {} -> {} .....\".format(url_h5, path_h5))\n",
 92 |     "    time_start = time()\n",
 93 |     "    try:\n",
 94 |     "        wget.download(url_h5, path_h5, bar=False)\n",
 95 |     "    except HTTPError as ex:\n",
 96 |     "        oops(\"init: wget {}, failed: {}\".format(url_h5, repr(ex)))\n",
 97 |     "    time_stop = time()\n",
 98 |     "    print(\"End wget ({}), et = {:.1f} seconds\"\n",
 99 |     "          .format(arg_h5_name, time_stop - time_start))\n",
100 |     "\n",
101 |     "\n",
102 |     "if __name__ == \"__main__\":\n",
103 |     "    check_version()\n",
104 |     "    rmtree(DATADIR, ignore_errors=True)\n",
105 |     "    mkdir(DATADIR)\n",
106 |     "    print(\"Please wait for the \\\"End\\\" message, downloading 6 HDF5 files.\\n\")\n",
107 |     "    \n",
108 |     "    for filename_h5 in H5_FILE_LIST:\n",
109 |     "        wgetter(filename_h5)\n",
110 |     "    \n",
111 |     "    print(\"\\nAll files have been successfully downloaded.\")\n",
112 |     "    print(\"End.\")\n"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "id": "3f0841ac",
118 |    "metadata": {},
119 |    "source": [
120 |     "Please wait for the \"End\" message, downloading 6 HDF5 files."
121 |    ]
122 |   }
123 |  ],
124 |  "metadata": {
125 |   "kernelspec": {
126 |    "display_name": "Python 3 (ipykernel)",
127 |    "language": "python",
128 |    "name": "python3"
129 |   },
130 |   "language_info": {
131 |    "codemirror_mode": {
132 |     "name": "ipython",
133 |     "version": 3
134 |    },
135 |    "file_extension": ".py",
136 |    "mimetype": "text/x-python",
137 |    "name": "python",
138 |    "nbconvert_exporter": "python",
139 |    "pygments_lexer": "ipython3",
140 |    "version": "3.9.5"
141 |   }
142 |  },
143 |  "nbformat": 4,
144 |  "nbformat_minor": 5
145 | }
146 | 


--------------------------------------------------------------------------------
/test/test_find_event.py:
--------------------------------------------------------------------------------
  1 | from tempfile import gettempdir
  2 | from shutil import rmtree
  3 | from os import mkdir
  4 | from numpy import isclose
  5 | from turbo_seti.find_event.find_event import calc_freq_range, find_events
  6 | 
  7 | TESTDIR = gettempdir() + '/test_find_event/'
  8 | RTOL_DIFF = 0.001 # numpy.isclose(), 0.1%
  9 | 
 10 | # Hits 1-3 in table 1 are in all 3 ON tables and in no OFF tables.
 11 | # Hit 4 in table 1 is in one ON table and in no OFF tables.
 12 | # Hit 5 in table 1 is in one ON table and in one OFF table.
 13 |     
 14 | DAT1_TABLE = [
 15 |     [1, -0.392226, 30.612128, 8419.319368, 8419.319368, 739933, 8419.321003, 8419.317740, 0.0, 0.000000, 0, 858],
 16 |     [2, -0.373093, 245.707984, 8419.297028, 8419.297028, 747929, 8419.298662, 8419.295399, 0.0, 0.000000, 0, 858],
 17 |     [3, -0.392226, 31.220652, 8419.274374, 8419.274374, 756037, 8419.276009, 8419.272745, 0.0, 0.000000, 0, 858],
 18 |     [4, -0.392226, 431.220652, 9419.274374, 9419.274374, 756001, 9419.276009, 8419.272745, 0.0, 0.000000, 0, 858],
 19 |     [5, -0.392226, 531.220652, 9319.274374, 9319.274374, 756011, 9319.276009, 8419.272745, 0.0, 0.000000, 0, 858]
 20 | ]
 21 | 
 22 | DAT2_TABLE = [
 23 |     [1, 0.0, 4200.0, 10000.0, 10000.0, 739933, 10000.0, 10000.0, 0.0, 0.000000, 0, 858],
 24 |     [2, 0.0005, 500.0, 10000.0, 10000.0, 747929, 10000.0, 10000.0, 0.0, 0.000000, 0, 858],
 25 |     [3, -2.0005, 700.0, 10000.0, 10000.0, 756037, 10000.0, 10000.0, 0.0, 0.000000, 0, 858],
 26 |     [4, -0.382226, 531.220652, 9319.274374, 9319.274374, 756011, 9319.276009, 8419.272745, 0.0, 0.000000, 0, 858]
 27 | ]
 28 | 
 29 | DAT3_TABLE = [
 30 |     [1, -0.392226, 30.612128, 8419.319368, 8419.319368, 739933, 8419.321003, 8419.317740, 0.0, 0.000000, 0, 858],
 31 |     [2, -0.373093, 245.707984, 8419.297028, 8419.297028, 747929, 8419.298662, 8419.295399, 0.0, 0.000000, 0, 858],
 32 |     [3, -0.412226, 31.220652, 8419.274374, 8419.274374, 756037, 8419.276009, 8419.272745, 0.0, 0.000000, 0, 858]
 33 | ]
 34 | 
 35 | DAT4_TABLE = []
 36 | 
 37 | DAT5_TABLE = [
 38 |     [1, -0.392226, 30.612128, 8419.319368, 8419.319368, 739933, 8419.321003, 8419.317740, 0.0, 0.000000, 0, 858],
 39 |     [2, -0.373093, 245.707984, 8419.297028, 8419.297028, 747929, 8419.298662, 8419.295399, 0.0, 0.000000, 0, 858],
 40 |     [3, -0.392226, 31.220652, 8419.274374, 8419.274374, 756037, 8419.276009, 8419.272745, 0.0, 0.000000, 0, 858]
 41 | ]
 42 | 
 43 | HEADER_LINES = [
 44 |     "# -------------------------- o --------------------------",
 45 |     "# File ID: Voyager1.single_coarse.fine_res.h5 ",
 46 |     "# -------------------------- o --------------------------",
 47 |     "# Source:Voyager1",
 48 |     "# MJD: 57650.782094907408	RA: 17h10m03.984s	DEC: 12d10m58.8s",
 49 |     "# DELTAT:  18.253611	DELTAF(Hz):  -2.793968	max_drift_rate:   4.000000	obs_length: 292.057776",
 50 |     "# --------------------------",
 51 |     "# Top_Hit_# 	Drift_Rate 	SNR 	Uncorrected_Frequency 	Corrected_Frequency 	Index 	freq_start 	freq_end 	SEFD 	SEFD_freq 	Coarse_Channel_Number 	Full_number_of_hits",
 52 |     "# --------------------------"
 53 | ]
 54 | 
 55 | #=============================== Functions ==============================
 56 | 
 57 | 
 58 | def write_one_dat_file(arg_table, arg_path):
 59 |     with open(arg_path, "w") as fh:
 60 |         for textline in HEADER_LINES:
 61 |             fh.write(textline + "\n")
 62 |         for hit_entry in arg_table:
 63 |             textline = ""
 64 |             for item in hit_entry:
 65 |                 textline += "{}\t".format(item)
 66 |             fh.write(textline + "\n")
 67 | 
 68 | 
 69 | def write_all_dat_files(dat_table_list, dat_file_list):
 70 |     rmtree(TESTDIR, ignore_errors=True)
 71 |     mkdir(TESTDIR)
 72 |     ix = 0
 73 |     for dat_table in dat_table_list:
 74 |         dat_file = dat_file_list[ix]
 75 |         write_one_dat_file(dat_table, dat_file)
 76 |         ix += 1
 77 | 
 78 | 
 79 | def subtest_calc_freq_range():
 80 |     hit = {
 81 |         "DELTAF": 4.2,
 82 |         "DELTAT": 0.42,
 83 |         "DriftRate": 0.0,
 84 |         'Freq': 4200.0,
 85 |         'obs_length': 292.3,
 86 |         'max_drift_rate': 4.0
 87 |     }
 88 |     low1, high1 = calc_freq_range(hit, delta_t=4.2, max_dr=False, follow=True)
 89 |     print("subtest_calc_freq_range 1: low={}, high={}".format(low1, high1))
 90 |     low2, high2 = calc_freq_range(hit, delta_t=4.2, max_dr=True, follow=False)
 91 |     print("subtest_calc_freq_range 2: low={}, high={}".format(low2, high2))
 92 |     assert isclose(low1, low2, rtol=RTOL_DIFF)
 93 |     assert isclose(high1, high2, rtol=RTOL_DIFF)
 94 |     
 95 | 
 96 | def test_find_event():
 97 |     dat_table_list = [
 98 |         DAT1_TABLE,
 99 |         DAT2_TABLE,
100 |         DAT3_TABLE,
101 |         DAT4_TABLE,
102 |         DAT5_TABLE               
103 |     ]
104 | 
105 |     dat_file_list = [
106 |         TESTDIR + "dat1_hits.dat",
107 |         TESTDIR + "dat2_off.dat",
108 |         TESTDIR + "dat3_hits.dat",
109 |         TESTDIR + "dat4_off.dat",
110 |         TESTDIR + "dat5_hits.dat"              
111 |     ]
112 | 
113 |     write_all_dat_files(dat_table_list, dat_file_list)
114 | 
115 |     subtest_calc_freq_range()
116 |     
117 |     # Assert that 3 hits are in tables 1, 3, and 5 but only 3 events.
118 |     evt_table = find_events(dat_file_list, SNR_cut=10, check_zero_drift=False, 
119 |                             filter_threshold=3, on_off_first='ON', complex_cadence=False)
120 |     print("evt_table:", evt_table)
121 |     assert len(evt_table) == 3
122 | 
123 |     # Using the SNR threshold, weed out all but hit 2 in tables 1, 3, and 5 but only 1 event.
124 |     evt_table = find_events(dat_file_list, SNR_cut=200, check_zero_drift=False, 
125 |                             filter_threshold=3, on_off_first='ON', complex_cadence=False)
126 |     print("evt_table:", evt_table)
127 |     assert len(evt_table) == 1
128 | 
129 |     # Drop filter threshold to 2.  Drop SNR back to 10.
130 |     # Hit 4 in table 1 should be added as a 10th event.
131 |     evt_table = find_events(dat_file_list, SNR_cut=10, check_zero_drift=False, 
132 |                             filter_threshold=2, on_off_first='ON', complex_cadence=False)
133 |     print("evt_table:", evt_table)
134 |     assert len(evt_table) == 10
135 | 
136 |     # Drop filter threshold to 1.
137 |     # Hit 5 in table 1 should be added as an 11th event.
138 |     evt_table = find_events(dat_file_list, SNR_cut=10, check_zero_drift=False, 
139 |                             filter_threshold=1, on_off_first='ON', complex_cadence=False)
140 |     print("evt_table:", evt_table)
141 |     assert len(evt_table) == 11
142 | 
143 |     # Keep filter threshold at 1.
144 |     # Add min_drift_rate and max_drift_rate filters.
145 |     # Drop SNR filter.
146 |     evt_table = find_events(dat_file_list, check_zero_drift=False,
147 |                             min_drift_rate=0.1, max_drift_rate=0.4,
148 |                             filter_threshold=1, on_off_first='ON', complex_cadence=False)
149 |     print("evt_table:", evt_table)
150 |     assert len(evt_table) == 10
151 | 
152 |     # Use on_off_first='OFF'.
153 |     # No events should be found at filter threshold 3.
154 |     evt_table = find_events(dat_file_list, SNR_cut=10, check_zero_drift=False, 
155 |                             filter_threshold=3, on_off_first='OFF', complex_cadence=False)
156 |     print("evt_table:", evt_table)
157 |     assert evt_table == None
158 | 
159 |     rmtree(TESTDIR, ignore_errors=True)
160 | 
161 | if __name__ == "__main__":
162 |     test_find_event()
163 | 


--------------------------------------------------------------------------------
/turbo_seti/find_event/dat_filter.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Read a .dat file.  Use the following filtering parameters to prune it,
  3 | saving the original content in .dat.old:
  4 |     * min_drift_rate (Hz/s)
  5 |     * max_drift_rate (Hz/s)
  6 |     * min_snr
  7 | """
  8 | 
  9 | import sys
 10 | import os
 11 | import shutil
 12 | from argparse import ArgumentParser, RawDescriptionHelpFormatter
 13 | import pandas as pd
 14 | from pandas.errors import EmptyDataError
 15 | 
 16 | DEBUGGING = False
 17 | SEP = r"\s+"
 18 | HDR_LINE_COUNT = 9
 19 | COPIED = False
 20 | 
 21 | HELP_EPILOGUE = \
 22 |     """
 23 | Read a .dat file.  Use the following filtering parameters to prune it,
 24 | saving the original content in .dat.original:
 25 |     * min_drift_rate (Hz/s)
 26 |     * max_drift_rate (Hz/s)
 27 |     * min_snr
 28 | 
 29 | Exit status:
 30 |     0 : All went well, even if 0 top hits were read or retained.
 31 |     1 : Some sort of error was reported.
 32 | """
 33 | 
 34 | 
 35 | def oops(arg_text):
 36 |     print(f"\n*** dat_filter: Oops, {arg_text} !!")
 37 |     if COPIED:
 38 |         print("*** dat_filter: Note that the original input file has a new name.  Check the dat files carefully !!")
 39 |     sys.exit(1)
 40 | 
 41 | 
 42 | def main(args=None):
 43 |     r"""
 44 |     This is the entry point.
 45 | 
 46 |     Parameters
 47 |     ----------
 48 |     args : dict
 49 | 
 50 |     """
 51 |     # Create an option parser to get command-line input/arguments
 52 |     parser = ArgumentParser(description="dat_filter - prune a .dat file.",
 53 |                                         formatter_class=RawDescriptionHelpFormatter,
 54 |                                         epilog=HELP_EPILOGUE)
 55 | 
 56 |     parser.add_argument("dat_file", type=str,
 57 |                         help="Path of the .dat file to prune")
 58 |     parser.add_argument("-s", "--min_snr", dest="min_snr", type=float, default=None,
 59 |                         help="Filter parameter: The SNR below which top hits will be discarded.")
 60 |     parser.add_argument("-m", "--min_drift_rate", dest="min_drift_rate", type=float, default=None,
 61 |                         help="Filter parameter: TThe drift rate below which top hits will be discarded.")
 62 |     parser.add_argument("-M", "--max_drift_rate", dest="max_drift_rate", type=float, default=None,
 63 |                         help="Filter parameter: TThe drift rate above which top hits will be discarded.")
 64 | 
 65 |     if args is None:
 66 |         args = parser.parse_args()
 67 |     else:
 68 |         args = parser.parse_args(args)
 69 | 
 70 |     full_path = os.path.abspath(args.dat_file)
 71 |     if not os.path.exists(full_path):
 72 |         oops(f"The .dat file {args.dat_file} does not exist")
 73 | 
 74 |     if args.min_drift_rate is None and args.max_drift_rate is None and args.min_snr is None:
 75 |         oops("At least one filter parameter must be specified")
 76 | 
 77 |     if args.min_drift_rate is not None and args.min_drift_rate < 0:
 78 |         oops("The min_drift parameter must be > 0 (absolute value)")
 79 |     if args.max_drift_rate is not None and args.max_drift_rate < 0:
 80 |         oops("The max_drift parameter must be > 0 (absolute value)")
 81 |     if args.min_snr is not None and args.min_snr < 0:
 82 |         oops("The min_snr parameter must be > 0 (absolute value)")
 83 | 
 84 |     return execute_pruner(full_path, args.min_drift_rate, args.max_drift_rate, args.min_snr)
 85 | 
 86 | 
 87 | def execute_pruner(dat_file, min_drift_rate, max_drift_rate, min_snr):
 88 |     r"""
 89 |     Prune the dat file, governed by the filter parameters.
 90 |     Called by main().
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     dat_file : str
 95 |         Path of the .dat file
 96 |     min_drift_rate : float
 97 |         Minimum allowed drift rate in Hz/s
 98 |     max_drift_rate : float
 99 |         Maximum allowed drift rate in Hz/s
100 |     min_snr : float
101 |         Minimum allowed SNR
102 |     """
103 | 
104 |     global COPIED
105 | 
106 |     # Initisalise.
107 |     tophit_count = 0
108 |     discarded_count = 0
109 |     dat_file_original = dat_file + ".original"
110 | 
111 |     # Copy the original .dat file.
112 |     try:
113 |         shutil.copyfile(dat_file, dat_file_original)
114 |     except:
115 |         oops(f"Copying {dat_file} to {dat_file_original} failed")
116 |     COPIED = True
117 |     print(f"dat_filter: Copyied {dat_file} to {dat_file_original} - ok")
118 |     print(f"dat_filter: Will now open {dat_file} for output")
119 | 
120 |     # Input file --> output file.
121 |     with open(dat_file, "w") as out_file:
122 | 
123 |         # Get all input lines and output the header lines.
124 |         with open(dat_file_original, "r") as in_file:
125 |             for jj in range(HDR_LINE_COUNT):
126 |                 try:
127 |                     hdr_line = in_file.readline()
128 |                 except:
129 |                     oops("The input file does not appear to be a dat file")
130 |                 out_file.write(hdr_line)
131 | 
132 |         # Make a pandas dataframe out of the tophit lines.
133 |         try:
134 |             df = pd.read_csv(dat_file_original, header=None, sep=SEP, engine="python", comment="#")
135 |         except EmptyDataError:
136 |             # Create empty dataframe.
137 |             df = pd.DataFrame()
138 |         input_count = len(df)
139 | 
140 |         # Process each tophit.
141 |         for jj in range(input_count):
142 |             drift_rate = abs(float(df[1][jj]))
143 |             snr = abs(float(df[2][jj]))
144 |             if min_drift_rate is not None:
145 |                 if abs(drift_rate) < min_drift_rate:
146 |                     discarded_count += 1
147 |                     continue
148 |             if max_drift_rate is not None:
149 |                 if abs(drift_rate) > max_drift_rate:
150 |                     discarded_count += 1
151 |                     continue
152 |             if min_snr is not None:
153 |                 if abs(snr) < min_snr:
154 |                     discarded_count += 1
155 |                     continue
156 |             # Include this one.
157 |             tophit_count += 1
158 |             tophit_line = '%06d\t'    % (tophit_count)    # Top Hit number
159 |             tophit_line += '%10.6f\t' % df[1][jj]    # Drift Rate
160 |             tophit_line += '%10.6f\t' % df[2][jj]    # SNR
161 |             tophit_line += '%14.6f\t' % df[3][jj]    # Uncorrected Frequency
162 |             tophit_line += '%14.6f\t' % df[4][jj]    # Corrected Frequency
163 |             tophit_line += '%d\t'     % df[5][jj]    # Index
164 |             tophit_line += '%14.6f\t' % df[6][jj]    # freq_start
165 |             tophit_line += '%14.6f\t' % df[7][jj]    # freq_end
166 |             tophit_line += '%s\t'     % df[8][jj]    # SEFD
167 |             tophit_line += '%14.6f\t' % df[9][jj]    # SEFD_mid_freq
168 |             tophit_line += '%i\t'     % df[10][jj]   # fine channel index
169 |             tophit_line += '%i\t'     % df[11][jj]   # total candidates
170 |             out_file.write(tophit_line + "\n")
171 |             if DEBUGGING:
172 |                 print(tophit_line)
173 | 
174 |     print(f"dat_filter: Processed {input_count} top hits.")
175 |     print(f"dat_filter: Retained {tophit_count}.")
176 |     print(f"dat_filter: Discarded {discarded_count}.")
177 |     print(f"dat_filter: Updated file {dat_file}.")
178 |     print(f"dat_filter: The original dat contents are in file {dat_file_original}.")
179 | 
180 |     return 0
181 | 
182 | 
183 | if __name__ == "__main__":
184 |     # Start the show!
185 |     main()
186 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build Status](https://travis-ci.org/UCBerkeleySETI/turbo_seti.svg?branch=master)](https://travis-ci.org/UCBerkeleySETI/turbo_seti)
  2 | [![Documentation Status](https://readthedocs.org/projects/turbo-seti/badge/?version=latest)](https://turbo-seti.readthedocs.io/en/latest/?badge=latest)
  3 | [![codecov](https://codecov.io/gh/UCBerkeleySETI/turbo_seti/branch/master/graph/badge.svg)](https://codecov.io/gh/UCBerkeleySETI/turbo_seti)
  4 | 
  5 | 
  6 | 
  7 | TURBO_SETI
  8 | =============================
  9 | 
 10 | ***turbo*SETI** is an analysis tool for the search of narrow band drifting signals in filterbank data (frequency vs. time).
 11 | The main purpose of the code is to hopefully one day find signals of extraterrestrial origin!!
 12 | It can search the data for hundreds of drift rates (in Hz/sec). It can handle either .fil or .h5 file formats.
 13 | 
 14 | **NOTE**:
 15 | This code is stable, but new features are currently under development.
 16 | 
 17 | Some details for the expert eye:
 18 | 
 19 | - Python based, with taylor tree in Numba for improved performance.
 20 | - Pre-calculated `drift index arrays`.
 21 | - Output plain text file with information on each hit.
 22 | - Including output reader into a pandas DataFrame.
 23 | 
 24 | It was originally based on `dedoppler` [dedoppler](http://github.com/cs150bf/gbt_seti/); which is based on  `rawdopplersearch.c`  [`gbt_seti/src/rawdopplersearch.c`](https://github.com/UCBerkeleySETI/gbt_seti/tree/master/src/rawdopplersearch.c))
 25 | 
 26 | &nbsp;
 27 | 
 28 | -------------------
 29 | 
 30 | ### Dependencies
 31 | 
 32 | - Python 3.7+
 33 | - astropy
 34 | - numpy
 35 | - blimpy 2.0.34+ (Breakthrough Listen I/O Methods for Python :  https://github.com/UCBerkeleySETI/blimpy)
 36 | - pandas
 37 | - toolz
 38 | - fsspec
 39 | - dask
 40 | - dask[bag]
 41 | - numba
 42 | - cloudpickle
 43 | - cupy (NVIDIA GPU mode only)
 44 | 
 45 | &nbsp;
 46 | 
 47 | --------------------------
 48 | 
 49 | ## Installation
 50 | 
 51 | If you have not yet installed blimpy, do so in this manner:
 52 | 
 53 | `python3 -m pip install --user -U blimpy`
 54 | 
 55 | Then, install turbo_seti:
 56 | 
 57 | `python3 -m pip install --user -U turbo_seti`
 58 | 
 59 | ## NVIDIA GPU Users
 60 | 
 61 | Already included is NUMBA Just-in-Time (JIT) CPU performance enhancements. However, if you have NVIDIA GPU hardware on the computer where turbo_seti is going to execute, you can get significant additional performance improvement.  Enable GPU enhanced processing with these steps:
 62 | 
 63 | 1. Install pypi package "cupy":  `python3 -m pip install cupy`
 64 | 2. Run the executable this way:  `turboSETI <PATH_TO_INPUT_HDF5_FILE> -g y [OTHER OPTIONS]`
 65 | 
 66 | ## Usage
 67 | 
 68 | ### Expected Input File Format
 69 | 
 70 | At the moment, the `turboSETI` command line and the `FindDoppler` object expect a Filterbank HDF5 file (.h5) or a Filterbank SIGPROC file (.fil).  If a SIGPROC file is supplied, it will automatically be converted to an HDF5 file which resides in the same directory as the SIGPROC file.
 71 | 
 72 | ### Usage as a Command Line
 73 | 
 74 | Run with data: `turboSETI <PATH_TO_INPUT_HDF5_FILE> [OPTIONS]`
 75 | 
 76 | For an explanation of the program parameters: `turboSETI -h`
 77 | 
 78 | ### Usage as a Python Package
 79 | 
 80 | ```
 81 | from turbo_seti.find_doppler.find_doppler import FindDoppler
 82 | fdop = FindDoppler(datafile=my_HDF5_file, ...)
 83 | fdop.search(...)
 84 | ```
 85 | 
 86 | ### Example Usage as a Python Package
 87 | 
 88 | ```
 89 | import time
 90 | from blimpy import Waterfall
 91 | from turbo_seti.find_doppler.find_doppler import FindDoppler
 92 | 
 93 | H5DIR = "/path_to_seti_data/voyager/"
 94 | H5PATH = H5DIR + "Voyager1.single_coarse.fine_res.h5"
 95 | OUT_DIR = "/path_to_output_directory"
 96 | 
 97 | print("\nUsing HDF5 file: {}\nHeader and data shape:".format(H5PATH))
 98 | # -- Get a report of header and data shape
 99 | wf = Waterfall(H5PATH)
100 | wf.info()
101 | # -- Instantiate FindDoppler.
102 | print("\nInstantiating the FindDoppler object.")
103 | fdop = FindDoppler(datafile=H5PATH, max_drift=4, snr=25, out_dir=OUT_DIR)
104 | # -- Search for hits and report elapsed time.
105 | print("\nBegin doppler search.  Please wait ...")
106 | t1 = time.time()
107 | fdop.search()
108 | elapsed_time = time.time() - t1
109 | print("\nFindDoppler.search() elapsed time = {} seconds".format(elapsed_time))
110 | ```
111 | 
112 | 
113 | ### Sample DAT File Output
114 | 
115 | ```
116 | # -------------------------- o --------------------------
117 | # File ID: Voyager1.single_coarse.fine_res.h5 
118 | # -------------------------- o --------------------------
119 | # Source:Voyager1
120 | # MJD: 57650.782094907408	RA: 17h10m03.984s	DEC: 12d10m58.8s
121 | # DELTAT:  18.253611	DELTAF(Hz):  -2.793968
122 | # --------------------------
123 | # Top_Hit_# 	Drift_Rate 	SNR 	Uncorrected_Frequency 	Corrected_Frequency 	Index 	freq_start 	freq_end 	SEFD 	SEFD_freq 	Coarse_Channel_Number 	Full_number_of_hits 	
124 | # --------------------------
125 | 001	 -0.392226	 30.612128	   8419.319368	   8419.319368	739933	   8419.321003	   8419.317740	0.0	      0.000000	0	858	
126 | 002	 -0.373093	245.707984	   8419.297028	   8419.297028	747929	   8419.298662	   8419.295399	0.0	      0.000000	0	858	
127 | 003	 -0.392226	 31.220652	   8419.274374	   8419.274374	756037	   8419.276009	   8419.272745	0.0	      0.000000	0	858	
128 | ```
129 | 
130 | 
131 | ### Sample Console Logging (level=INFO) Output
132 | Note that the coarse channel number appears as a suffix of the logger name.  For example, "find_doppler.8" depicts logging for find_doppler.py in coarse channel number 8 (relative to 0).
133 | ```
134 | Using HDF5 file: /seti_data/voyager/Voyager1.single_coarse.fine_res.h5
135 | Header and data shape:
136 | 
137 | --- File Info ---
138 | DIMENSION_LABELS :   ['frequency' 'feed_id' 'time']
139 |         az_start :                              0.0
140 |        data_type :                                1
141 |             fch1 :            8421.386717353016 MHz
142 |             foff :      -2.7939677238464355e-06 MHz
143 |            ibeam :                                1
144 |       machine_id :                               20
145 |           nbeams :                                1
146 |            nbits :                               32
147 |           nchans :                          1048576
148 |             nifs :                                1
149 |      rawdatafile : guppi_57650_67573_Voyager1_0002.0000.raw
150 |      source_name :                         Voyager1
151 |          src_dej :                       12:10:58.8
152 |          src_raj :                     17:10:03.984
153 |     telescope_id :                                6
154 |            tsamp :                     18.253611008
155 |    tstart (ISOT) :          2016-09-19T18:46:13.000
156 |     tstart (MJD) :                57650.78209490741
157 |         za_start :                              0.0
158 | 
159 | Num ints in file :                               16
160 |       File shape :                 (16, 1, 1048576)
161 | --- Selection Info ---
162 | Data selection shape :                 (16, 1, 1048576)
163 | Minimum freq (MHz) :                8418.457032646984
164 | Maximum freq (MHz) :                8421.386717353016
165 | 
166 | Instantiating the FindDoppler object.
167 | find_doppler.0  INFO     {'DIMENSION_LABELS': array(['frequency', 'feed_id', 'time'], dtype=object), 'az_start': 0.0, 'data_type': 1, 'fch1': 8421.386717353016, 'foff': -2.7939677238464355e-06, 'ibeam': 1, 'machine_id': 20, 'nbeams': 1, 'nbits': 32, 'nchans': 1048576, 'nifs': 1, 'rawdatafile': 'guppi_57650_67573_Voyager1_0002.0000.raw', 'source_name': 'Voyager1', 'src_dej': <Angle 12.183 deg>, 'src_raj': <Angle 17.16777333 hourangle>, 'telescope_id': 6, 'tsamp': 18.253611008, 'tstart': 57650.78209490741, 'za_start': 0.0}
168 | 
169 | Begin doppler search.  Please wait ...
170 | find_doppler.0  INFO     File: /seti_data/voyager/Voyager1.single_coarse.fine_res.h5
171 |  drift rates (min, max): (0.000000, 4.000000)
172 |  SNR: 25.000000
173 | 
174 | Starting ET search using /seti_data/voyager/Voyager1.single_coarse.fine_res.h5
175 | find_doppler.0  INFO     Parameters: datafile=/seti_data/voyager/Voyager1.single_coarse.fine_res.h5, max_drift=4, min_drift=0.0, snr=25, out_dir=/seti_data/voyager/, coarse_chans=None, flagging=False, n_coarse_chan=None, kernels=None, gpu_backend=False, precision=2, append_output=False, log_level_int=20, obs_info={'pulsar': 0, 'pulsar_found': 0, 'pulsar_dm': 0.0, 'pulsar_snr': 0.0, 'pulsar_stats': array([0., 0., 0., 0., 0., 0.]), 'RFI_level': 0.0, 'Mean_SEFD': 0.0, 'psrflux_Sens': 0.0, 'SEFDs_val': [0.0], 'SEFDs_freq': [0.0], 'SEFDs_freq_up': [0.0]}
176 | find_doppler.0  INFO     Top hit found! SNR 30.612128, Drift Rate -0.392226, index 739933
177 | find_doppler.0  INFO     Top hit found! SNR 245.707984, Drift Rate -0.373093, index 747929
178 | find_doppler.0  INFO     Top hit found! SNR 31.220652, Drift Rate -0.392226, index 756037
179 | 
180 | FindDoppler.search() elapsed time = 9.972093105316162 seconds
181 | ```
182 | 
183 | &nbsp;
184 | --------------------------
185 | 


--------------------------------------------------------------------------------
/turbo_seti/find_doppler/file_writers.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import numpy as np
  4 | from .helper_functions import chan_freq
  5 | 
  6 | 
  7 | class GeneralWriter:
  8 |     r"""
  9 |     Wrapper class for file operations.
 10 |     
 11 |     """
 12 |     def __init__(self, filename='', mode='a'):
 13 |         r"""
 14 |         Initializes GeneralWriter object. Opens given file with given mode, sets new object's filehandle to the file
 15 |         object, sets the new object's filename to the file's name, then closes the file.
 16 | 
 17 |         Parameters
 18 |         ----------
 19 |         filename : str
 20 |             Name of file on which we would like to perform operations.
 21 |         mode : str {'a', 'r', 'w', 'x'}, optional
 22 |             Mode which we want to use to open file, same modes as the built-in python
 23 |             built-in open function: read (`r`), append (`a`), write (`w`), or create (`x`).
 24 | 
 25 |         """
 26 |         with open(filename, mode) as myfile:
 27 |             self.filehandle = myfile
 28 |             self.filename = filename
 29 | 
 30 |     def close(self):
 31 |         r"""
 32 |         Closes file object if it is open.
 33 | 
 34 |         """
 35 |         if self.filehandle.closed:
 36 |             pass
 37 |         else:
 38 |             self.filehandle.close()
 39 | 
 40 |     def open(self, mode='a'):
 41 |         r"""
 42 |         Opens the file with the inputted mode, then closes it. Does not actually leave the file opened, only used for
 43 |         changing mode.
 44 | 
 45 |         Parameters
 46 |         ----------
 47 |         mode : str {'a', 'r', 'w', 'x'}, optional
 48 |             Mode which we want to assign to this file, same modes as the built-in python
 49 |             built-in open function: read (`r`), append (`a`), write (`w`), or create (`x`).
 50 | 
 51 |         """
 52 |         if self.filehandle.closed:
 53 |             with open(self.filename, mode) as myfile:
 54 |                 self.filehandle = myfile
 55 |         elif self.filehandle.mode == mode:
 56 |             return
 57 |         else:
 58 |             self.close()
 59 |             with open(self.filename, mode) as myfile:
 60 |                 self.filehandle = myfile
 61 | 
 62 |     def is_open(self):
 63 |         r"""
 64 |         Checks if file is open.
 65 | 
 66 |         Returns
 67 |         ------- 
 68 |           : boolean
 69 |             True if file is open, False otherwise.
 70 | 
 71 |         """
 72 |         return not self.filehandle.closed
 73 | 
 74 |     def writable(self):
 75 |         r"""
 76 |         Checks if file is open, and if it is, checks that mode is either write or append.
 77 | 
 78 |         Returns
 79 |         ------- 
 80 |           : boolean
 81 |             True if file is open and writeable, False otherwise.
 82 | 
 83 |         """
 84 |         return self.is_open() and (('w' in self.filehandle.mode) or ('a' in self.filehandle.mode))
 85 | 
 86 |     def write(self, info_str, mode='a'):
 87 |         r"""
 88 |         Sets file mode to a writeable mode and opens it if it is not already open in a writeable mode, writes info_str
 89 |         to it, and then closes it. If the file was not previously open when this is called, the file is closed after
 90 |         writing in order to maintain the state the filewriter was in before.
 91 | 
 92 |         Parameters
 93 |         ----------
 94 |         info_str : str
 95 |             Data to be written to file.
 96 |         mode : str {'a', 'w'}, optional
 97 |             Mode for file. If it is not a writeable mode, it will be set to a writeable mode.
 98 | 
 99 |         """
100 |         if mode not in ('a', 'w'):
101 |             mode = 'a'
102 |         if not self.writable():
103 |             with open(self.filename, mode) as myfile:
104 |                 myfile.write(info_str)
105 |                 self.filehandle = myfile
106 |         else:
107 |             self.filehandle.write(info_str)
108 | 
109 | 
110 | class FileWriter(GeneralWriter):
111 |     r"""
112 |     Used to write information to turboSETI output files.
113 |     
114 |     """
115 |     def __init__(self, filename, header):
116 |         r"""
117 |         Initializes FileWriter object and writes its header.
118 | 
119 |         Parameters
120 |         ----------
121 |         filename : str
122 |             Name of file on which we would like to perform operations.
123 |         header : dict
124 |             Information to be written to header of file filename.
125 | 
126 |         """
127 |         GeneralWriter.__init__(self, filename)
128 |         self.write('# -------------------------- o --------------------------\n')
129 |         self.write('# File ID: %s \n'%(filename.split('/')[-1].replace('.dat','')+'.h5'))
130 |         self.write('# -------------------------- o --------------------------\n')
131 |         self.report_header(header)
132 | 
133 |         self.tophit_count = 0
134 | 
135 |     def report_header(self, header):
136 |         r"""
137 |         Write header information per given obs.
138 | 
139 |         Parameters
140 |         ----------
141 |         header : dict
142 |             Information to be written to file header.
143 | 
144 |         """
145 |         info_str = '# Source:{}\n# MJD: {:18.12f}\tRA: {}\tDEC: {}\n# DELTAT: {:10.6f}\tDELTAF(Hz): {:10.6f}\tmax_drift_rate: {:10.6f}\tobs_length: {:10.6f}\n' \
146 |                    .format(header['SOURCE'],header['MJD'], header['RA'], header['DEC'], header['DELTAT'], header['DELTAF']*1e6, header['max_drift_rate'], header['obs_length'])
147 | 
148 |         self.write(info_str)
149 |         self.write('# --------------------------\n')
150 |         info_str = '# Top_Hit_# \t'
151 |         info_str += 'Drift_Rate \t'
152 |         info_str += 'SNR \t'
153 |         info_str += 'Uncorrected_Frequency \t'
154 |         info_str += 'Corrected_Frequency \t'
155 |         info_str += 'Index \t'
156 |         info_str += 'freq_start \t'
157 |         info_str += 'freq_end \t'
158 |         info_str += 'SEFD \t'
159 |         info_str += 'SEFD_freq \t'
160 |         info_str += 'Coarse_Channel_Number \t'
161 |         info_str += 'Full_number_of_hits \t'
162 |         info_str +='\n'
163 |         self.write(info_str)
164 |         self.write('# --------------------------\n')
165 | 
166 |     def report_tophit(self, max_val, ind, ind_tuple, tdwidth, fftlen, header, total_n_candi, obs_info=None):
167 |         r"""
168 |         This function looks into the top hit in a region, basically finds the local maximum and saves that.
169 | 
170 |         Parameters
171 |         ----------
172 |           max_val : findopp
173 |           ind : int
174 |             Index at which top hit is located in max_val's maxdrift and maxsnr.
175 |           ind_tuple: tuple(int, int) (lbound, ubound)
176 |           tdwidth : int
177 |           fftlen : int
178 |             Length of the fast fourier transform matrix.
179 |           header : dict
180 |             Contains info on coarse channel to be written to file.
181 |           total_n_candi : int
182 |           obs_info:  dict, optional
183 |             Used to hold info found on file, including info about pulsars, RFI, and SEFD.
184 | 
185 |         Returns
186 |         -------
187 |           : FileWriter object that called this function.
188 | 
189 |         """
190 |         offset = int((tdwidth - fftlen)/2)
191 |         tdwidth =  len(max_val.maxsnr)
192 | 
193 |         self.tophit_count += 1
194 |         freq_start = chan_freq(header, ind_tuple[0]-offset, tdwidth, 0)
195 |         freq_end   = chan_freq(header, ind_tuple[1]-1-offset, tdwidth, 0)
196 | 
197 |         uncorr_freq = chan_freq(header, ind-offset, tdwidth, 0)
198 |         corr_freq = chan_freq(header, ind-offset, tdwidth, 1)
199 | 
200 |         #Choosing the index of given SEFD and freq.
201 |         if obs_info['SEFDs_freq'][0] > 0.:
202 |             this_one = np.arange(len(obs_info['SEFDs_freq']))[ (obs_info['SEFDs_freq_up']>uncorr_freq) ][0]
203 |         else:
204 |             this_one = 0
205 | 
206 |         info_str = '%06d\t'%(self.tophit_count)  #Top Hit number
207 |         info_str += '%10.6f\t'%max_val.maxdrift[ind]  #Drift Rate
208 |         info_str += '%10.6f\t'%max_val.maxsnr[ind]  #SNR
209 |         info_str += '%14.6f\t'%uncorr_freq #Uncorrected Frequency:
210 |         info_str += '%14.6f\t'%corr_freq #Corrected Frequency:
211 |         info_str += '%d\t'%(ind - offset) #Index:
212 |         info_str += '%14.6f\t'%freq_start #freq_start:
213 |         info_str += '%14.6f\t'%freq_end #freq_end:
214 |         info_str += '%s\t'%obs_info['SEFDs_val'][this_one] #SEFD:
215 |         info_str += '%14.6f\t'%obs_info['SEFDs_freq'][this_one] #SEFD_mid_freq:
216 |         info_str += '%i\t'%header['cchan_id']
217 |         info_str += '%i\t'%total_n_candi #
218 |         info_str +='\n'
219 |         self.write(info_str)
220 | 
221 |         return self
222 | 
223 | class LogWriter(GeneralWriter):
224 |     r"""
225 |     Used to write data to log.
226 | 
227 |     """
228 |     def info(self, info_str):
229 |         r"""
230 |         Writes info_str to file.
231 | 
232 |         Parameters
233 |         ----------
234 |         info_str : str
235 |             String to be written to file.
236 | 
237 |         """
238 |         self.write(info_str + '\n')
239 | 


--------------------------------------------------------------------------------
/test/test_pipelines_1.py:
--------------------------------------------------------------------------------
  1 | r'''
  2 | test_pipelines_1.py
  3 | 
  4 | Using the 0000.h5 Voyager 2020 set of HDF5 files
  5 | from http://blpd14.ssl.berkeley.edu/voyager_2020/single_coarse_channel/
  6 | test the following:
  7 | * FindDoppler seaarch
  8 | * find_event_pipeline
  9 | * plot_event_pipeline
 10 | '''
 11 | 
 12 | from time import time
 13 | from shutil import rmtree
 14 | from pathlib import Path
 15 | from os import mkdir, listdir
 16 | from tempfile import gettempdir
 17 | import sys
 18 | from urllib.error import HTTPError
 19 | from argparse import ArgumentParser
 20 | import imghdr
 21 | import wget
 22 | import pytest
 23 | 
 24 | from turbo_seti.find_doppler.find_doppler import FindDoppler
 25 | from turbo_seti.find_event.find_event_pipeline import find_event_pipeline
 26 | from turbo_seti.find_event.plot_event_pipeline import plot_event_pipeline
 27 | import pipelines_util as utl
 28 | 
 29 | TESTDIR = gettempdir() + '/pipeline_testing/'
 30 | PATH_DAT_LIST_FILE = TESTDIR + 'dat_files.lst'
 31 | PATH_H5_LIST_FILE = TESTDIR + 'h5_files.lst'
 32 | PATH_CSVF = TESTDIR + 'found_event_table.csv'
 33 | FILTER_THRESHOLD = 3
 34 | N_EVENTS = 2
 35 | 
 36 | URL_DIR = 'http://blpd14.ssl.berkeley.edu/voyager_2020/single_coarse_channel/'
 37 | H5_FILE_LIST = ['single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.h5',
 38 |                 'single_coarse_guppi_59046_80354_DIAG_VOYAGER-1_0012.rawspec.0000.h5',
 39 |                 'single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.h5',
 40 |                 'single_coarse_guppi_59046_80989_DIAG_VOYAGER-1_0014.rawspec.0000.h5',
 41 |                 'single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.h5',
 42 |                 'single_coarse_guppi_59046_81628_DIAG_VOYAGER-1_0016.rawspec.0000.h5']
 43 | MAX_DRIFT = 2.0
 44 | MIN_SNR = 10.0
 45 | 
 46 | 
 47 | def oops(arg_text):
 48 |     '''
 49 |     Log the bad news and exit to the O/S with a non-zero exit code.
 50 |     '''
 51 |     print('\n*** Oops, ' + arg_text)
 52 |     sys.exit(86)
 53 | 
 54 | 
 55 | def initialize():
 56 |     r'''
 57 |     Recreate working directory, TESTDIR.
 58 |     '''
 59 |     rmtree(TESTDIR, ignore_errors=True)
 60 |     mkdir(TESTDIR)
 61 |     print('test_pipelines_1: Initialized')
 62 | 
 63 | 
 64 | def wgetter(arg_h5_name):
 65 |     r'''
 66 |     wget an HDF5 file from the Internet repository.
 67 |     arg_h5_name:  HDF5 file name
 68 |     '''
 69 |     url_h5 = URL_DIR + arg_h5_name
 70 |     path_h5 = TESTDIR + arg_h5_name
 71 |     print('test_pipelines_1: Begin wget {} -> {} .....'.format(url_h5, path_h5))
 72 |     time_start = time()
 73 |     try:
 74 |         wget.download(url_h5, path_h5, bar=False)
 75 |     except HTTPError as ex:
 76 |         oops('test_pipelines_1: wget {}, failed: {}'.format(url_h5, repr(ex)))
 77 |     time_stop = time()
 78 |     print('test_pipelines_1: End wget ({}), et = {:.1f} seconds'
 79 |           .format(arg_h5_name, time_stop - time_start))
 80 | 
 81 | 
 82 | def make_one_dat_file(arg_h5_name):
 83 |     r'''
 84 |     Make a single DAT file:
 85 |     * Instantiate the FindDoppler class object.
 86 |     * With the object, search the H5, creating the DAT file
 87 |       and a LOG file (not used).
 88 | 
 89 |     Note that a max drift of 1 assumes a drift rate of +/- 1
 90 |     SNR threshold = 25
 91 |     '''
 92 |     print('make_one_dat_file: Begin FindDoppler({}) .....'.format(arg_h5_name))
 93 |     h5_path = TESTDIR + arg_h5_name
 94 |     time_start = time()
 95 |     doppler = FindDoppler(h5_path,
 96 |                           max_drift=MAX_DRIFT,
 97 |                           snr=MIN_SNR,
 98 |                           out_dir=TESTDIR)
 99 |     time_stop = time()
100 |     print('make_one_dat_file: End FindDoppler({}), et = {:.1f} seconds'
101 |           .format(arg_h5_name, time_stop - time_start))
102 | 
103 |     print('make_one_dat_file: Begin Doppler search({}) .....'
104 |           .format(arg_h5_name))
105 | 
106 |     # ----------------------------------------------------------------------------
107 |     # No more than 1 execution of this program because of dask methodology!
108 |     # To do multiple dask partitions would cause initialization & cleanup chaos.
109 |     time_start = time()
110 |     doppler.search(n_partitions=1)
111 |     time_stop = time()
112 |     # ----------------------------------------------------------------------------
113 | 
114 |     print('make_one_dat_file: End Doppler search({}), et = {:.1f} seconds'
115 |           .format(arg_h5_name, time_stop - time_start))
116 | 
117 | 
118 | def make_all_dat_files():
119 |     r'''
120 |     For each HDF5 file name,
121 |     * Make one DAT file.
122 |     * Add its name to the list of DAT files.
123 |     '''
124 |     with open(PATH_DAT_LIST_FILE, 'w') as file_handle:
125 |         for filename_h5 in H5_FILE_LIST:
126 |             make_one_dat_file(filename_h5)
127 |             filename_dat = filename_h5.replace('.h5', '.dat')
128 |             file_handle.write('{}\n'.format(TESTDIR + filename_dat))
129 | 
130 | 
131 | def find_plot_pipelines(need_init=True, filter_threshold=FILTER_THRESHOLD):
132 |     r'''
133 |     Exercise find_event_pipeline() and plot_event_pipeline()
134 |     '''
135 | 
136 |     main_time_start = time()
137 | 
138 |     # If configured to do so, initialize temp directory
139 |     # and fetch all of the HDF5 files from the Internet.
140 |     if need_init:
141 |         initialize()
142 |         for filename_h5 in H5_FILE_LIST:
143 |             wgetter(filename_h5)
144 |         # Make all of the DAT files.
145 |     
146 |     # For each h5, make a dat file.
147 |     make_all_dat_files()
148 | 
149 |     print('find_plot_pipelines: Filter threshold = ', filter_threshold)
150 |     number_in_cadence = len(H5_FILE_LIST)
151 |     print('find_plot_pipelines: Cadence length = ', number_in_cadence)
152 |     print('find_plot_pipelines: find_event_pipeline({}) ...'
153 |           .format(PATH_DAT_LIST_FILE))
154 | 
155 |     # With the list of DAT files, do find_event_pipeline()
156 |     df_event = find_event_pipeline(PATH_DAT_LIST_FILE,
157 |                                    filter_threshold=filter_threshold,
158 |                                    number_in_cadence=number_in_cadence,
159 |                                    user_validation=False,
160 |                                    saving=True,
161 |                                    csv_name=PATH_CSVF)
162 | 
163 |     # CSV file created?
164 |     if not Path(PATH_CSVF).exists():
165 |         raise ValueError('find_plot_pipelines: No CSV of events created')
166 | 
167 |     # An event CSV was created.
168 |     # Validate the hit table file.
169 |     utl.validate_hittbl(df_event, PATH_CSVF, 'test_pipe_lines', N_EVENTS)
170 | 
171 |     # Make a list of the HDF5 files.
172 |     print('find_plot_pipelines: making a list of HDF5 files in ({}) ...'
173 |           .format(PATH_DAT_LIST_FILE))
174 |     with open(PATH_H5_LIST_FILE, 'w') as file_handle:
175 |         for filename_h5 in H5_FILE_LIST:
176 |             file_handle.write('{}\n'.format(TESTDIR + filename_h5))
177 | 
178 |     # Do the plots for all of the HDF5/DAT file pairs.
179 |     print('find_plot_pipelines: plot_event_pipeline({}, {}) ...'
180 |           .format(PATH_CSVF, PATH_H5_LIST_FILE))
181 |     plot_event_pipeline(PATH_CSVF,
182 |                         PATH_H5_LIST_FILE,
183 |                         filter_spec='f{}'.format(filter_threshold),
184 |                         user_validation=False)
185 | 
186 |     # Check that the right number of PNG files were created.
187 |     outdir_list = listdir(TESTDIR)
188 |     npngs = 0
189 |     for cur_file in outdir_list:
190 |         if cur_file.split('.')[-1] == 'png':
191 |             if imghdr.what(TESTDIR + cur_file) != 'png':
192 |                 raise ValueError('find_plot_pipelines: File {} is not a PNG file'
193 |                                  .format(cur_file))
194 |             npngs += 1
195 |     if npngs != N_EVENTS:
196 |         raise ValueError('find_plot_pipelines: Expected to find {} PNG files but observed {}'
197 |                          .format(N_EVENTS, npngs))
198 | 
199 |     # Stop the clock - we're done.
200 |     main_time_stop = time()
201 | 
202 |     print('find_plot_pipelines: End, et = {:.1f} seconds'
203 |           .format(main_time_stop - main_time_start))
204 | 
205 | 
206 | @pytest.mark.order(1)
207 | def test_pipelines(need_init=True):
208 |     r'''
209 |     This is the pytest entry point.
210 |     Test filter threshold 3 in find_plot_pipelines().
211 |     By default (pytest):
212 |     * Initialization is done only once.
213 |     '''
214 |     print('\n===== test_pipelines_1: BEGIN =====')
215 | 
216 |     find_plot_pipelines(need_init=need_init, filter_threshold=FILTER_THRESHOLD)
217 | 
218 |     print('\n===== test_pipelines_1: END =====')
219 | 
220 | 
221 | def main(args=None):
222 |     r'''Main Function Entry Point'''
223 |     pobj = ArgumentParser(description='Test find|plot pipelines.')
224 |     pobj.add_argument('-i', '--initialize', dest='flag_init', type=str,
225 |                       default="y",
226 |                       help='Initialize/download? (y/n).  Default: y')
227 | 
228 |     if args is None:
229 |         args = pobj.parse_args()
230 |     else:
231 |         args = pobj.parse_args(args)
232 | 
233 |     test_pipelines(need_init=(args.flag_init == 'y'))
234 | 
235 | 
236 | if __name__ == '__main__':
237 |     main()
238 | 


--------------------------------------------------------------------------------
/VERSION-HISTORY.md:
--------------------------------------------------------------------------------
  1 | This file is a version history of turbo_seti amendments, beginning with version 2.0.0.  Entries appear in version descending order (newest first, oldest last).
  2 | <br>
  3 | <br>
  4 | 
  5 | | `YYYY_MM_DD` | `Version` | `Contents` |
  6 | | :--: | :--: | :-- |
  7 | | 2022-05-16 | 2.3.0 | New utility (dat_diff) to show differences between two dat files (issue  #311). |
  8 | | 2022-05-05 | 2.2.4 | Bug fix in the logic for removing duplicate hits. |
  9 | | 2022-05-05 | 2.2.3 | Bug fix in drift rate calculation. |
 10 | | 2022-04-04 | 2.2.2 | Performance improvement in GPU mode: Use a cupy RawKernel for the 'flt' function. |
 11 | | 2022-03-30 | 2.2.1 | Expose --plot_offset in plotSETI (issue #305). |
 12 | | | | Fix plot offset output in plot_event.py overlay_drift function  (issue #305). |
 13 | | | | Get rid of Waterfall max_load parameters and object deletes in plot_event.py make_waterfall_plots function. |
 14 | | 2022-03-24 | 2.2.0 | Introduced dat_filter utility (issue #303). |
 15 | | | | Enhance event analysis (plotSETI et al) to filter by SNR and drift rate (issue #303). |
 16 | | 2022-02-23 | 2.1.25 | Fix to the wrong drift rates when the number of time integrations is not a power of 2 (issue #302). |
 17 | | 2022-02-07 | 2.1.24 | Print versions of hdf5plugin and the HDF5 library (issue #299). |
 18 | | | | Enable the display of HDF5 library error messages which are inhibited by default (issue #299). |
 19 | | 2022-01-23 | 2.1.23 | Stop mangled path file names in data_handler.py & find_event_pipeline.py (issue #297). |
 20 | | | | Dependent on blimpy version >= 2.0.34 |
 21 | | 2021-12-05 | 2.1.22 | Add the ability to entertain .h5 files and .dat files in separate directories - part 2 (issue #294) |
 22 | | 2021-12-04 | 2.1.21 | Add the ability to entertain .h5 files and .dat files in separate directories - part 1 (issue #291) |
 23 | | 2021-12-01 | 2.1.20 | Add source code reference to the Read the Docs documentation. |
 24 | | 2021-11-29 | 2.1.19 | Fix to find_doppler.py for potentially lost signals in hitsearch. (issue #290) |
 25 | | 2021-11-19 | 2.1.18 | Fix to data_handler.py for handling of the NFPC header field from the new rawspec. (issue #285) |
 26 | | 2021-11-10 | 2.1.17 | Fix to find_event.py which was generating too many events & plots. (issue #283) |
 27 | | 2021-10-28 | 2.1.16 | Print a 2x3 data postage stamp when loading data for coarse channel 0 only. (issue #280, part 2) |
 28 | | 2021-10-23 | 2.1.15 | Print a 2x3 data postage stamp when loading data. (issue #280) |
 29 | | 2021-10-22 | 2.1.14 | Support new metadata field, NFPC. (issue #278). |
 30 | | 2021-09-13 | 2.1.13 | Make find_doppler easier to read and amend. (issue #274). |
 31 | | 2021-08-17 | 2.1.12 | Fix "AttributeError: module 'cupy' has no attribute '_core'". (issue #272). |
 32 | | 2021-08-12 | 2.1.11 | Specific MeerKAT files cause erratic behaviour in GPU mode (issue #270). |
 33 | | 2021-07-22 | 2.1.10 | The data_handler crashed during conversion of a 59 GiB filterbank file (issue #267). |
 34 | | 2021-07-22 | 2.1.9 | Performance improvement in gpu mode: default to single-precision (32-bit). |
 35 | | 2021-07-20 | 2.1.8 | Performance improvements and fix min_drift to prevent near-min-drift hits. |
 36 | | 2021-07-18 | 2.1.7 | Create a turbo_seti clone of blank_dc that is optional and uses a different strategy (issue #262). |
 37 | | 2021-07-15 | 2.1.6 | Calculate normalized value inside hitsearch kernel on GPU-mode. |
 38 | | 2021-07-16 | 2.1.5 | Failed to pass the gpu_id from find_doppler.py to data_handler.py (issue #254). |
 39 | | 2021-07-15 | 2.1.4 | Add GPU device selection with cli argument gpu_id. (issue #254). |
 40 | | 2021-07-15 | 2.1.3 | Diagnose out of range time steps with correct messages (issue #256). |
 41 | | | | Also, stop catching exceptions in seti_event.py which causes a cascade in tracebacks. |
 42 | | 2021-07-10 | 2.1.2 | Diagnose non-cadence sets of files in find_event_pipeline (issue #250). |
 43 | | 2021-07-09 | 2.1.1 | New turbo_seti utility: plotSETI. |
 44 | | 2021-07-04 | 2.1.0 | The function calc_freq_range uses hardcoded parameter values. These should instead be derived from the data. |
 45 | | | | See issue #231 for the full description and the resolution approach. |
 46 | | 2021-06-26 | 2.0.23 | Make data_handler.py provide useful info during exceptions (issue #243).
 47 | | | | Cleared up median vs mean confusion (issue #244).
 48 | | | | Stop using a Python3 reserved word for a function name (issue #245).
 49 | | 2021-06-14 | 2.0.22 | Pre-delete HDF5 file when input is a Filterbank file (.fil) (issue #241).
 50 | | 2021-06-11 | 2.0.21 | Log n_coarse_chan value when calculated by blimpy (issue #238).
 51 | | 2021-06-06 | 2.0.20 | Log drift_rate_resolution value (issue #236).
 52 | | 2021-04-21 | 2.0.19 | Change min_drift default to disallow near-zero drift.
 53 | | 2021-04-13 | 2.0.18 | Add GPU enabled Docker image build.
 54 | | 2021-04-07 | 2.0.17 | Fixed issue #230 - Added turbo_seti/find_event/plot_dat.py which makes a plot similar to the one produced by plot_candidate_events, but also includes the hits detected, in addition to the candidate signal. |
 55 | | 2021-04-03 | 2.0.16 | Fixed issue #225 - Ensure proper order of regression test execution. |
 56 | | | | Fixed issue #226 - Apparently useless plot_event.py code became a bug source in latest matplotlib. |
 57 | | | | Fixed issue #227 - Allow color & alpha selection in plot_event.py overlay_drift function. |
 58 | | | | Fixed issue #228 - test_pipelines_1 fails SNR comparison on MacOS. |
 59 | | 2021-03-20 | 2.0.15 | Fixed issue #205 - Reverse-engineered the original drift index files. |
 60 | | | | Fixed issue #218 - Replaced drift index file 8 (broken). |
 61 | | | | Fixed issue #94 - removed unused code from plot_event_pipeline.py and plot_event.py |
 62 | | 2021-03-10 | 2.0.14 | Fixed issue #213 - Doppler search dies when using GPU (string format issue). |
 63 | | | | Fixed issue #214 - Need some testing for plot_dir parameter of plot_event_pipeline. |
 64 | | 2021-03-09 | PR #212 | Support specification of an output directory for plotting at multiple levels in plot_event_pipeline.py and plot_event.py. |
 65 | | 2021-03-05 | 2.0.13 | Support very large data arrays.  See blimpy issue #180. |
 66 | | 2021-03-03 | 2.0.12 | Fixed issue #207 - flexible DAT line scanning in find_event.py read_dat(). |
 67 | | 2021-03-02 | 2.0.11 | Fixed issue #89 - min_drift & max_drift in find_doppler.py. |
 68 | | | | Fixed issue #162 - Announce turbo_seti and blimpy versions in use at start of Doppler search (find_doppler.py). |
 69 | | | | Fixed issue #200 - Cleanup/speedup of test_turbo_seti.py. |
 70 | | | | Fixed issue #201 - Created test_drift_rates for testing find_doppler.py min and max drift rates. |
 71 | | | | Fixed issue #202 - Amend plot_event.py to compute the blimpy Waterfall max_load parameter value for data arrays exceeding 1 GB in size. |
 72 | | | | Fixed issue #203 - Show turbo_seti version as part of turboSETI --help. Add a -v/--version parameter. |
 73 | | | | Fixed issue #204 - Stop the pipelines from loading data when they are only interested in Waterfall header fields. |
 74 | | 2021-02-25 | 2.0.10 | Addressed issue #197 - Added file path ordering by header.tstart in {find,plot}_event_pipeline.py. |
 75 | | 2021-02-25 | 2.0.9 | Fixed issue #195 - Stop find_event_pipeline() from crashing when there are no complex cadence matches. |
 76 | | | | Fixed issue #194 - Implemented complex cadence testing.
 77 | | | | **Still outstanding**: Issue #89 (min_drift parameter is broken). |
 78 | | 2021-02-23 | 2.0.8.2 | Fixed issue #190 - Stop find_events() from crashing when a complex cadence has been specified. |
 79 | | 2021-02-20 | 2.0.8.1 | Address issue #188 - Enhance plot_event.py to handle both interactive and noninteractive matplotlib backends. |
 80 | | | | Removed references to numpy from setup.py. |
 81 | | 2021-01-30 | 2.0.8 | Added test/{test_fb_cases.py, fb_*} to implement checking against known valid DAT file results. |
 82 | | | | Added a --min_drift parameter to turboSETI (issue #178).  **Alas, issue #89 has been re-opened.** |
 83 | | | | Fixed the DAT file formatting and top hit numbering when multipatitioning with dask (issue #179). |
 84 | | 2021-01-20 | 2.0.7 | Fixed issues #135 & #150 (confirmed) by making code-structure in find_doppler.py search_coarse_channel() to be more like version 1.3.0. |
 85 | | 2021-01-19 | 2.0.6.6 | Fixed issue #169 - Fixed find_event_pipeline IndexError crash. |
 86 | | 2021-01-18 | 2.0.6.5 | Fixed issue #167 - LogWriter support for unattended testing on data centre compute nodes was enhanced to provide feedback concerning success/failure. |
 87 | | 2021-01-16 | 2.0.6.4 | Fixed issue #164 - Progress bar in dask partitioning is now OFF by default. |
 88 | | 2021-01-13 | 2.0.6.3 | Fixed issue #159 - Removed invalid bash script generation in setup.py. |
 89 | | 2021-01-12 | 2.0.6.2 | Fixed issue #157 - Logging enhancement in find_doppler.py. |
 90 | | 2021-01-09 | 2.0.6.1 | Fixed issue #154 - Enhanced test/test_pipelines.py. |
 91 | | 2021-01-05 | 2.0.6 | Fixed issue #152 - plot_event() by yanking the PR #82 code. Reprocussions to Parkes data? |
 92 | | 2021-01-04 | 2.0.5 | Rolling back previous fix to #141 (left open), hoping to fix issue #150 (related to issue #135). |
 93 | | 2020-12-31 | 2.0.4.1 | Fixed issue #141 which prevented searching in one of the drift block ranges. |
 94 | | 2020-12-24 | 2.0.4 | Added GitHub Actions Workflows for CI instead of Travis CI. |
 95 | | 2020-12-22 | 2.0.3 | Reverted changes made by PRs #121 and #113, hoping to fix issue #135. |
 96 | | 2020-12-21 | 2.0.2 | Amended `find_doppler.py`, `seti_event.py`, and `plot_event.py` to ressurect logging. See issue \#134. |
 97 | | 2020-12-20 | 2.0.1 | Amended `plot_event_pipeline.py` to accept a new filter_spec parameter. See issue \#127. |
 98 | | | | Amended `plot_event.py` to stop generating "RuntimeWarning: More than 20 figures have been opened".
 99 | | | | Default write-mode for DAT & LOG files is changed to "w" (replace). Append requires new optional `-a y` parameter.
100 | | | | GPU-mode performance improvements.
101 | | 2020-11-17 | 2.0.0 | Support NUMBA JIT compilation (CPU) and CUPY (NVIDIA GPU). |
102 | | | | Made `turboSETI -n ...` work (set the number of coarse channels).
103 | | | | No longer keeping Voyager test data in this repository.
104 | | | | Fixed everal data-dependent crash-bug fixes.
105 | 
106 | 


--------------------------------------------------------------------------------
/test/fb_cases_util.py:
--------------------------------------------------------------------------------
  1 | r'''
  2 | Utility functions for test_fb_cases.py
  3 | '''
  4 | 
  5 | from os import mkdir, remove
  6 | from os.path import dirname
  7 | from shutil import rmtree
  8 | import logging
  9 | import pandas as pd
 10 | import numpy as np
 11 | import setigen as stg
 12 | from turbo_seti.find_doppler.find_doppler import FindDoppler
 13 | from fb_cases_def import HERE, DEBUGGING, RTOL_DIFF, TestResultRecord, SetigenParms
 14 | 
 15 | DF_REFERENCE = HERE + '/fb_dat_reference.txt'
 16 | SEP = r'\s+'
 17 | 
 18 | 
 19 | def using_gpu():
 20 |     r"""
 21 |     Using GPU acceleration?
 22 | 
 23 |     Returns
 24 |     -------
 25 |     using_gpu : bool
 26 |         True : use GPU
 27 |         False : use CPU
 28 | 
 29 |     """
 30 |     try:
 31 |         import cupy
 32 |         cupy.__version__
 33 |     except:
 34 |         return False
 35 |     return True
 36 | 
 37 | 
 38 | def initialize(arg_dir):
 39 |     r'''
 40 |     Recreate working directory, TESTDIR.
 41 |     Load result reference tables (2).
 42 |     '''
 43 |     rmtree(arg_dir, ignore_errors=True)
 44 |     mkdir(arg_dir)
 45 |     df = pd.read_csv(DF_REFERENCE, sep=SEP, engine='python', comment='#')
 46 |     nrows = len(df)
 47 |     if nrows < 1:
 48 |         raise ValueError('initialize: Empty reference table')
 49 |     if nrows % 2 != 0:
 50 |         raise ValueError('initialize: Reference table row count ({}) is not divisible by 2'
 51 |                          .format(nrows))
 52 |     if DEBUGGING:
 53 |         print('initialize: Test case reference results: \n', df)
 54 |     ref_tophit_1 = []
 55 |     ref_tophit_2 = []
 56 |     jj = 0
 57 |     while jj < nrows:
 58 |         record = TestResultRecord()
 59 |         record.fdir = int(df['fdir'][jj])
 60 |         record.drsign = int(df['drsign'][jj])
 61 |         record.tophit_id = int(df['tophit'][jj])
 62 |         record.drate = float(df['drate'][jj])
 63 |         record.snr = float(df['snr'][jj])
 64 |         record.freq = float(df['freq'][jj])
 65 |         record.index = int(df['index'][jj])
 66 |         ref_tophit_1.append(record)
 67 |         if DEBUGGING:
 68 |             print('initialize: appended for hit_1:\n', record.to_string() )
 69 |         jj += 1
 70 |         del record
 71 |         record = TestResultRecord()
 72 |         record.fdir = int(df['fdir'][jj])
 73 |         record.drsign = int(df['drsign'][jj])
 74 |         record.tophit_id = int(df['tophit'][jj])
 75 |         record.drate = float(df['drate'][jj])
 76 |         record.snr = float(df['snr'][jj])
 77 |         record.freq = float(df['freq'][jj])
 78 |         record.index = int(df['index'][jj])
 79 |         ref_tophit_2.append(record)
 80 |         if DEBUGGING:
 81 |             print('initialize: appended for hit_2:\n', record.to_string() )
 82 |         jj += 1
 83 |     if DEBUGGING:
 84 |         print('initialize: {} test cases loaded.'.format(len(ref_tophit_1)))
 85 |     return ref_tophit_1, ref_tophit_2
 86 | 
 87 | 
 88 | def generate_fil_file(outpath, flag_fascending, flag_sign_drift_rate):
 89 |     r'''
 90 |     Using setigen, generate a filterbank file.
 91 | 
 92 |     Parameters:
 93 |         outpath - full path of where to store the resultant filterbank file.
 94 |         flag_fascending - use an ascending (+1) or descending (-1) sequence of frequencies
 95 |         flag_sign_drift_rate - use a positive (+1) or negative (-1) drift rate
 96 |     '''
 97 |     if DEBUGGING:
 98 |         print('generate_fil_file: flag_fascending={}, flag_sign_drift_rate={}'
 99 |               .format(flag_fascending, flag_sign_drift_rate))
100 | 
101 |     # Set up setigne parameters
102 |     stg_parms = SetigenParms()
103 |     if flag_sign_drift_rate < 0:
104 |         stg_parms.drift_rate_1 = -stg_parms.drift_rate_1
105 |         stg_parms.drift_rate_2 = -stg_parms.drift_rate_2
106 |         stg_parms.drift_rate_3 = -stg_parms.drift_rate_3
107 |         stg_parms.drift_rate_4 = -stg_parms.drift_rate_4
108 |         stg_parms.drift_rate_5 = -stg_parms.drift_rate_5
109 | 
110 |     # Instantiate a setigen Frame object
111 |     frame = stg.Frame(fchans=stg_parms.fchans,
112 |                       tchans=stg_parms.tchans,
113 |                       df=stg_parms.df,
114 |                       dt=stg_parms.dt,
115 |                       fch1=stg_parms.fch1,
116 |                       ascending=(flag_fascending > 0))
117 | 
118 |     # Add noise to stg object.
119 |     frame.add_noise(x_mean=0, x_std=stg_parms.noise_std, noise_type='gaussian')
120 | 
121 |     # Signal 1 will be detected.
122 |     signal_1_intensity = frame.get_intensity(snr=stg_parms.snr_1)
123 |     frame.add_constant_signal(f_start=frame.get_frequency(stg_parms.signal_start_1),
124 |                               drift_rate=stg_parms.drift_rate_1,
125 |                               level=signal_1_intensity,
126 |                               width=stg_parms.width_1,
127 |                               f_profile_type='gaussian')
128 | 
129 |     # Signal 2 will be detected.
130 |     signal_2_intensity = frame.get_intensity(snr=stg_parms.snr_2)
131 |     frame.add_constant_signal(f_start=frame.get_frequency(stg_parms.signal_start_2),
132 |                               drift_rate=stg_parms.drift_rate_2,
133 |                               level=signal_2_intensity,
134 |                               width=stg_parms.width_2,
135 |                               f_profile_type='gaussian')
136 | 
137 |     # Signal 3 is a symmetric signal with three Gaussians
138 |     # that will fall below the SNR requirements.
139 |     signal_3_intensity = frame.get_intensity(snr=stg_parms.snr_3)
140 |     frame.add_signal(stg.constant_path(f_start=frame.get_frequency(stg_parms.signal_start_3),
141 |                                        drift_rate=stg_parms.drift_rate_3),
142 |                      stg.constant_t_profile(level=1),
143 |                      stg.multiple_gaussian_f_profile(width=stg_parms.width_3),
144 |                      stg.constant_bp_profile(level=signal_3_intensity))
145 | 
146 |     # Signal 4 is a symmetric signal with three Gaussians
147 |     # that will be drifting too quickly.
148 |     signal_4_intensity = frame.get_intensity(snr=stg_parms.snr_4)
149 |     frame.add_signal(stg.constant_path(f_start=frame.get_frequency(stg_parms.signal_start_4),
150 |                                        drift_rate=stg_parms.drift_rate_4),
151 |                      stg.constant_t_profile(level=1),
152 |                      stg.multiple_gaussian_f_profile(width=stg_parms.width_4),
153 |                      stg.constant_bp_profile(level=signal_4_intensity))
154 | 
155 |     # Signal 5 is similar to signal 4 but drifting in the opposite direction.
156 |     signal_5_intensity = frame.get_intensity(snr=stg_parms.snr_5)
157 |     frame.add_signal(stg.constant_path(f_start=frame.get_frequency(stg_parms.signal_start_5),
158 |                                        drift_rate=stg_parms.drift_rate_5),
159 |                      stg.constant_t_profile(level=1),
160 |                      stg.multiple_gaussian_f_profile(width=stg_parms.width_5),
161 |                      stg.constant_bp_profile(level=signal_5_intensity))
162 | 
163 |     # Save the frame as a filterbank file.
164 |     frame.save_fil(filename=outpath)
165 |     print("generate_fil_file: generated {}".format(outpath))
166 |     del frame
167 | 
168 | 
169 | def make_one_dat_file(arg_path_fil, min_drift=0.0, max_drift=4.0, min_snr=25.0, remove_h5=True):
170 |     r'''
171 |     Make a single DAT file:
172 |     * Instantiate the FindDoppler class object.
173 |     * With the object, search the H5, creating the DAT file
174 |       and a LOG file (not used).
175 |     '''
176 |     if max_drift is None:
177 |         raise ValueError('make_one_dat_file: max_drift not set')
178 |     woutdir = dirname(arg_path_fil)
179 |     fdop = FindDoppler(datafile=arg_path_fil,
180 |                        min_drift=min_drift,
181 |                        max_drift=max_drift,
182 |                        snr=min_snr,
183 |                        log_level_int=logging.WARNING,
184 |                        gpu_backend = using_gpu(),
185 |                        out_dir=woutdir)
186 |     fdop.search()
187 |     path_h5_file = arg_path_fil.replace('.fil', '.h5')
188 |     if remove_h5:
189 |         remove(path_h5_file)
190 | 
191 | 
192 | def get_case_results(arg_path_dat):
193 |     r'''From the DAT file, extract the data for all top hits.'''
194 |     df = pd.read_csv(arg_path_dat, header=None, sep=SEP, engine='python', comment='#')
195 |     nrows = len(df)
196 |     if nrows != 2:
197 |         raise ValueError('get_case_results: Expected 2 rows in DAT but observed {} rows'
198 |               .format(nrows))
199 | 
200 |     obs_tophit_1 = TestResultRecord()
201 |     obs_tophit_1.tophit_id = int(df[0][0]) # 1st col, 1st row
202 |     obs_tophit_1.drate = float(df[1][0])
203 |     obs_tophit_1.snr = float(df[2][0])
204 |     obs_tophit_1.freq = float(df[4][0])
205 |     obs_tophit_1.index = int(df[5][0])
206 | 
207 |     obs_tophit_2 = TestResultRecord()
208 |     obs_tophit_2.tophit_id = int(df[0][1]) # 1st col, 2nd row
209 |     obs_tophit_2.drate = float(df[1][1])
210 |     obs_tophit_2.snr = float(df[2][1])
211 |     obs_tophit_2.freq = float(df[4][1])
212 |     obs_tophit_2.index = int(df[5][1])
213 | 
214 |     return obs_tophit_1, obs_tophit_2
215 | 
216 | 
217 | def case_comparison(obs_tophit, ref_tophit, max_drift):
218 |     r'''Compare DAT file observations to the reference.'''
219 |     if obs_tophit is None:
220 |         if ref_tophit is None:
221 |             return # success, both None
222 |         # ref_tophit defined, obs_tophit is None
223 |         raise ValueError('case_comparison: FAILED, max_drift={}\nobs_tophit is None\nref_tophit:::{}'
224 |                              .format(max_drift, ref_tophit.to_string()))
225 |     if ref_tophit is None:  # obs_tophit defined, ref_tophit is None
226 |         raise ValueError('case_comparison: FAILED, max_drift={}\nref_tophit is None\nobs_tophit:::{}'
227 |                              .format(max_drift, obs_tophit.to_string()))
228 | 
229 |     if obs_tophit.tophit_id == ref_tophit.tophit_id \
230 |     and np.isclose(obs_tophit.drate, ref_tophit.drate, rtol=RTOL_DIFF) \
231 |     and np.isclose(obs_tophit.snr, ref_tophit.snr, rtol=RTOL_DIFF) \
232 |     and np.isclose(obs_tophit.freq, ref_tophit.freq, rtol=RTOL_DIFF) \
233 |     and obs_tophit.index == ref_tophit.index:
234 |         return # success
235 | 
236 |     # Some field(s) did not compare correctly.
237 |     raise ValueError('case_comparison: FAILED, max_drift={}\nobs_tophit:::{}\nref_tophit:::{}'
238 |                      .format(max_drift, obs_tophit.to_string(), ref_tophit.to_string()))
239 | 
240 | if __name__ == '__main__':
241 |     # __main__ is a developer unit test, not normally to be executed.
242 |     from fb_cases_def import TESTDIR, PATH_FIL_FILE, MIN_SNR
243 |     rmtree(TESTDIR, ignore_errors=True)
244 |     mkdir(TESTDIR)
245 |     generate_fil_file(PATH_FIL_FILE, -1, -1)
246 |     make_one_dat_file(PATH_FIL_FILE, max_drift=5, min_snr=MIN_SNR)
247 | 


--------------------------------------------------------------------------------
/test/test_pipelines_3.py:
--------------------------------------------------------------------------------
  1 | r''' test_pipelines_3.py
  2 | 
  3 | Complex Cadence tests:
  4 | * test_pipeline_h5_dat_colocated: .h5 and .dat files are in the same directory.
  5 | * test_pipeline_h5_dat_separated: .h5 and .dat files are in separate directories.
  6 | * test_pipeline_same_source: result is identical to test_pipeline.
  7 | * test_pipeline_wrong_source: nil hit table result.
  8 | * test_pipeline_mixed: try all of the DAT_MIXED lists.
  9 | '''
 10 | 
 11 | import os
 12 | from shutil import rmtree
 13 | from tempfile import gettempdir
 14 | from pathlib import Path
 15 | from turbo_seti.find_event.find_event_pipeline import find_event_pipeline
 16 | from turbo_seti.find_event.plot_event_pipeline import plot_event_pipeline
 17 | from fb_cases_util import generate_fil_file, make_one_dat_file
 18 | import pipelines_util as utl
 19 | 
 20 | N_EVENTS = 2
 21 | TESTDIR = gettempdir() + '/pipeline_testing/'
 22 | ALT_DAT_DIR = TESTDIR + 'dat_dir/'
 23 | PATH_DAT_LIST_FILE = TESTDIR + 'dat_files_5.lst'
 24 | PATH_H5_LIST_FILE = TESTDIR + 'h5_files_5.lst'
 25 | PATH_CSVF = TESTDIR + 'found_event_table_3.csv'
 26 | CSV_DELIM = ','
 27 | DAT_LIST_ONS = [TESTDIR + 'single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.dat',
 28 |                 TESTDIR + 'single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.dat',
 29 |                 TESTDIR + 'single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.dat'
 30 |                ]
 31 | ALT_DAT_LIST_ONS = [ALT_DAT_DIR + 'single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.dat',
 32 |                 ALT_DAT_DIR + 'single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.dat',
 33 |                 ALT_DAT_DIR + 'single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.dat'
 34 |                ]
 35 | H5_LIST_ONS = [TESTDIR + 'single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.h5',
 36 |                 TESTDIR + 'single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.h5',
 37 |                 TESTDIR + 'single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.h5'
 38 |                ]
 39 | PATH_IRRELEVANT_FIL = TESTDIR + 'abc.fil'
 40 | FILE_IRRELEVANT_DAT = 'abc.dat'
 41 | 
 42 | DAT_LIST_MIXED_1 = [
 43 |                 TESTDIR + 'single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.dat',
 44 |                 TESTDIR + 'single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.dat',
 45 |                 TESTDIR + 'single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.dat',
 46 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 47 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 48 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 49 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 50 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 51 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 52 |                ]
 53 | 
 54 | DAT_LIST_MIXED_2 = [
 55 |                 TESTDIR + 'single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.dat',
 56 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 57 |                 TESTDIR + 'single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.dat',
 58 |                 TESTDIR + 'single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.dat',
 59 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 60 |                ]
 61 | 
 62 | DAT_LIST_MIXED_3 = [
 63 |                 TESTDIR + 'single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.dat',
 64 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 65 |                 TESTDIR + 'single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.dat',
 66 |                 TESTDIR + 'single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.dat',
 67 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 68 |                 TESTDIR + FILE_IRRELEVANT_DAT
 69 |                ]
 70 | 
 71 | DAT_LIST_MIXED_4 = [
 72 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 73 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 74 |                 TESTDIR + 'single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.dat',
 75 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 76 |                 TESTDIR + 'single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.dat',
 77 |                 TESTDIR + 'single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.dat',
 78 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 79 |                 TESTDIR + FILE_IRRELEVANT_DAT
 80 |                ]
 81 | 
 82 | DAT_LIST_MIXED_5 = [
 83 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 84 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 85 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 86 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 87 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 88 |                 TESTDIR + FILE_IRRELEVANT_DAT,
 89 |                 TESTDIR + 'single_coarse_guppi_59046_80036_DIAG_VOYAGER-1_0011.rawspec.0000.dat',
 90 |                 TESTDIR + 'single_coarse_guppi_59046_80672_DIAG_VOYAGER-1_0013.rawspec.0000.dat',
 91 |                 TESTDIR + 'single_coarse_guppi_59046_81310_DIAG_VOYAGER-1_0015.rawspec.0000.dat',
 92 |                ]
 93 | 
 94 | def test_pipeline_h5_dat_colocated():
 95 |     print('\n===== test_pipeline_h5_dat_colocated: BEGIN =====')
 96 | 
 97 |     # Make the dat list.
 98 |     with open(PATH_DAT_LIST_FILE, 'w') as fh:
 99 |         for path_dat in DAT_LIST_ONS:
100 |             fh.write('{}\n'.format(path_dat))
101 | 
102 |     # With the list of DAT files, do find_event_pipeline()
103 |     df_event = find_event_pipeline(PATH_DAT_LIST_FILE,
104 |                                    filter_threshold=3,
105 |                                    number_in_cadence=3,
106 |                                    user_validation=False,
107 |                                    saving=True,
108 |                                    on_source_complex_cadence='VOYAGER-1',
109 |                                    csv_name=PATH_CSVF)
110 | 
111 |     # df_event should not be nil.
112 |     if df_event is None:
113 |         raise ValueError('test_pipeline_same_source: returned pandas df is None!')
114 | 
115 |     # CSV file created?
116 |     if not Path(PATH_CSVF).exists():
117 |         raise ValueError('test_pipeline_same_source: No CSV of events created')
118 | 
119 |     # An event CSV was created.
120 |     # Validate the hit table file.
121 |     utl.validate_hittbl(df_event, PATH_CSVF, 'test_pipeline_same_source', N_EVENTS)
122 |     print('\n===== test_pipeline_h5_dat_colocated: END =====')
123 | 
124 | def test_pipeline_h5_dat_separated():
125 |     print('\n===== test_pipeline_h5_dat_separated: BEGIN =====')
126 | 
127 |     # If there is an old ALT_DAT_DIR, recreate it empty.
128 |     rmtree(ALT_DAT_DIR, ignore_errors=True)
129 |     os.mkdir(ALT_DAT_DIR)
130 | 
131 |     # Copy the .dat files to ALT_DAT_DIR.
132 |     for path_dat in DAT_LIST_ONS:
133 |         cmd = 'cp ' + path_dat + " " + ALT_DAT_DIR
134 |         os.system(cmd)
135 | 
136 |     # Make the dat list relative to ALT_DAT_DIR.
137 |     with open(PATH_DAT_LIST_FILE, 'w') as fh:
138 |         for path_dat in ALT_DAT_LIST_ONS:
139 |             fh.write('{}\n'.format(path_dat))
140 | 
141 |     # Make the h5 list.
142 |     with open(PATH_H5_LIST_FILE, 'w') as fh:
143 |         for path_dat in H5_LIST_ONS:
144 |             fh.write('{}\n'.format(path_dat))
145 | 
146 |     # With the list of separated .dat and .h5 files, do find_event_pipeline()
147 |     df_event = find_event_pipeline(PATH_DAT_LIST_FILE,
148 |                                    PATH_H5_LIST_FILE,
149 |                                    filter_threshold=3,
150 |                                    number_in_cadence=3,
151 |                                    user_validation=False,
152 |                                    saving=True,
153 |                                    on_source_complex_cadence='VOYAGER-1',
154 |                                    csv_name=PATH_CSVF)
155 | 
156 |     # df_event should not be nil.
157 |     if df_event is None:
158 |         raise ValueError('test_pipeline_same_source: returned pandas df is None!')
159 | 
160 |     # CSV file created?
161 |     if not Path(PATH_CSVF).exists():
162 |         raise ValueError('test_pipeline_same_source: No CSV of events created')
163 | 
164 |     # An event CSV was created.
165 |     # Validate the hit table file.
166 |     utl.validate_hittbl(df_event, PATH_CSVF, 'test_pipeline_same_source', N_EVENTS)
167 | 
168 |     # Plots!
169 |     plot_event_pipeline(PATH_CSVF,
170 |                         PATH_H5_LIST_FILE,
171 |                         filter_spec=3,
172 |                         user_validation=False)
173 | 
174 |     print('\n===== test_pipeline_h5_dat_separated: END =====')
175 | 
176 | def test_pipeline_wrong_source():
177 |     print('\n===== test_pipeline_wrong_source: BEGIN =====')
178 | 
179 |     # Make the dat list.
180 |     with open(PATH_DAT_LIST_FILE, 'w') as fh:
181 |         for path_dat in DAT_LIST_ONS:
182 |             fh.write('{}\n'.format(path_dat))
183 | 
184 |     # With the list of DAT files, do find_event_pipeline()
185 |     df_event = find_event_pipeline(PATH_DAT_LIST_FILE,
186 |                                    filter_threshold=3,
187 |                                    number_in_cadence=3,
188 |                                    user_validation=False,
189 |                                    saving=True,
190 |                                    on_source_complex_cadence='VOYAGER-42',
191 |                                    csv_name=PATH_CSVF)
192 | 
193 |     # df_event should be nil
194 |     if not df_event is None:
195 |         raise ValueError('test_pipeline_wrong_source: returned pandas df has entries but should be nil!')
196 | 
197 |     print('\n===== test_pipeline_wrong_source: END =====')
198 | 
199 | 
200 | def try_mixed(arg_list, init_needed=True):
201 |     print('\n===== try_mixed: BEGIN =====')
202 | 
203 |     # If init needed, make the off-cadence file and one DAT file.
204 |     if init_needed:
205 |         generate_fil_file(PATH_IRRELEVANT_FIL, -1, -1)
206 |         make_one_dat_file(PATH_IRRELEVANT_FIL, max_drift=10.0, min_snr=20.0, remove_h5=False)
207 | 
208 |     # Make the dat list.
209 |     with open(PATH_DAT_LIST_FILE, 'w') as fh:
210 |         for path_dat in arg_list:
211 |             fh.write('{}\n'.format(path_dat))
212 | 
213 |     # With the list of DAT files, do find_event_pipeline()
214 |     df_event = find_event_pipeline(PATH_DAT_LIST_FILE,
215 |                                    sortby_tstart=False,
216 |                                    filter_threshold=3,
217 |                                    number_in_cadence=len(arg_list),
218 |                                    user_validation=False,
219 |                                    saving=True,
220 |                                    on_source_complex_cadence='VOYAGER-1',
221 |                                    csv_name=PATH_CSVF)
222 | 
223 |     # df_event should not be nil.
224 |     if df_event is None:
225 |         raise ValueError('try_mixed: returned pandas df is None!')
226 | 
227 |     # CSV file created?
228 |     if not Path(PATH_CSVF).exists():
229 |         raise ValueError('try_mixed: No CSV of events created')
230 | 
231 |     # An event CSV was created.
232 |     # Validate the hit table file.
233 |     utl.validate_hittbl(df_event, PATH_CSVF, 'test_pipeline_mixed', N_EVENTS)
234 | 
235 |     print('\n===== try_mixed: END =====')
236 | 
237 | 
238 | def test_pipeline_mixed(init_needed=True):
239 |     print('\n===== test_pipeline_mixed: BEGIN =====')
240 |     try_mixed(DAT_LIST_MIXED_1, init_needed=init_needed)
241 |     try_mixed(DAT_LIST_MIXED_2, init_needed=False)
242 |     try_mixed(DAT_LIST_MIXED_3, init_needed=False)
243 |     try_mixed(DAT_LIST_MIXED_4, init_needed=False)
244 |     try_mixed(DAT_LIST_MIXED_5, init_needed=False)
245 |     print('\n===== test_pipeline_mixed: END =====')
246 | 
247 | 
248 | if __name__ == '__main__':
249 |     test_pipeline_h5_dat_colocated()
250 |     test_pipeline_h5_dat_separated()
251 |     test_pipeline_wrong_source()
252 |     test_pipeline_mixed(init_needed=True)
253 | 


--------------------------------------------------------------------------------