├── .coveragerc
├── .gitattributes
├── .github
    ├── scripts
    │   ├── adjust-coverage-config
    │   └── find-gnu-tar
    └── workflows
    │   └── ci-unittest.yml
├── .gitignore
├── .mailmap
├── .readthedocs.yaml
├── CITATION.cff
├── Example_Jupyter_Notebook.ipynb
├── LICENSE.md
├── MANIFEST.in
├── README.md
├── docs
    ├── CITATION.cff
    ├── LICENSE.md
    ├── code_of_conduct.md
    ├── environment.yml
    ├── index.md
    ├── jlitebadge.svg
    ├── pdr_api.md
    ├── pdr_joss_paper.pdf
    ├── pdr_pdart_proposal_roses20.pdf
    ├── supported_datasets.md
    └── version_history.md
├── environment.yml
├── minimal_environment.yml
├── mkdocs.yml
├── pdr
    ├── __init__.py
    ├── _scaling.py
    ├── bit_handling.py
    ├── browsify.py
    ├── datatypes.py
    ├── errors.py
    ├── formats
    │   ├── __init__.py
    │   ├── cassini.py
    │   ├── checkers.py
    │   ├── clementine.py
    │   ├── dawn.py
    │   ├── diviner.py
    │   ├── epoxi.py
    │   ├── galileo.py
    │   ├── ground.py
    │   ├── ihw.py
    │   ├── iue.py
    │   ├── juno.py
    │   ├── lp.py
    │   ├── lro.py
    │   ├── lroc.py
    │   ├── mariner.py
    │   ├── mer.py
    │   ├── mex.py
    │   ├── mgn.py
    │   ├── mgs.py
    │   ├── mro.py
    │   ├── msl_apxs.py
    │   ├── msl_ccam.py
    │   ├── msl_cmn.py
    │   ├── msl_places.py
    │   ├── msl_rems.py
    │   ├── msx.py
    │   ├── nh.py
    │   ├── odyssey.py
    │   ├── phoenix.py
    │   ├── pvo.py
    │   ├── rosetta.py
    │   ├── saturn_rpx.py
    │   ├── themis.py
    │   ├── ulysses.py
    │   ├── vega.py
    │   ├── viking.py
    │   └── voyager.py
    ├── func.py
    ├── loaders
    │   ├── __init__.py
    │   ├── _helpers.py
    │   ├── astrowrap.py
    │   ├── datawrap.py
    │   ├── dispatch.py
    │   ├── handlers.py
    │   ├── image.py
    │   ├── queries.py
    │   ├── table.py
    │   ├── text.py
    │   └── utility.py
    ├── np_utils.py
    ├── parselabel
    │   ├── __init__.py
    │   ├── pds3.py
    │   ├── pds4.py
    │   └── utils.py
    ├── pd_utils.py
    ├── pdr.py
    ├── pdrtypes.py
    ├── pds4_tools
    │   ├── CREDITS
    │   ├── LICENSES
    │   ├── __about__.py
    │   ├── __init__.py
    │   ├── extern
    │   │   ├── __init__.py
    │   │   ├── appdirs.py
    │   │   ├── argparse.py
    │   │   ├── cached_property.py
    │   │   ├── ordered_dict.py
    │   │   ├── six.py
    │   │   └── zscale.py
    │   ├── reader
    │   │   ├── __init__.py
    │   │   ├── array_objects.py
    │   │   ├── core.py
    │   │   ├── data.py
    │   │   ├── data_types.py
    │   │   ├── general_objects.py
    │   │   ├── header_objects.py
    │   │   ├── label_objects.py
    │   │   ├── read_arrays.py
    │   │   ├── read_headers.py
    │   │   ├── read_label.py
    │   │   ├── read_tables.py
    │   │   └── table_objects.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── compat.py
    │   │   ├── constants.py
    │   │   ├── data_access.py
    │   │   ├── deprecation.py
    │   │   ├── exceptions.py
    │   │   ├── helpers.py
    │   │   └── logging.py
    ├── pil_utils.py
    ├── pvl_utils.py
    ├── tests
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── data
    │   │   ├── F187B51_cycle_3.gif
    │   │   ├── Simple_Animated_Clock.webp
    │   │   ├── catseye_1.png
    │   │   ├── concert.jpeg
    │   │   ├── kings_river_canyon.tiff
    │   │   ├── squirrel.jpg
    │   │   └── weather.bmp
    │   ├── objects.py
    │   ├── test_bit_handling.py
    │   ├── test_browsify.py
    │   ├── test_data.py
    │   ├── test_datatypes.py
    │   ├── test_func.py
    │   ├── test_image.py
    │   ├── test_import.py
    │   ├── test_loader_helpers.py
    │   ├── test_metadata.py
    │   ├── test_np_utils.py
    │   ├── test_parselabel_pds3.py
    │   ├── test_parselabel_pds4.py
    │   ├── test_primary_desktop_image.py
    │   ├── test_primary_fits.py
    │   ├── test_queries.py
    │   ├── test_scaling.py
    │   └── test_table.py
    └── utils.py
├── pyproject.toml
└── setup.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source_pkgs = pdr
3 | omit =
4 |     */formats/*
5 |     */pds4_tools/extern/*
6 |     */pvl_utils.py
7 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-language=Python
2 | 


--------------------------------------------------------------------------------
/.github/scripts/adjust-coverage-config:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | 
  3 | """
  4 | Read a .coveragerc from stdin, adjust it for use in a CI build, and
  5 | write it back out to stdout.
  6 | 
  7 | If files are listed on the command line, they are assumed to be
  8 | coverage databases, and a [paths] section is added to the .coveragerc
  9 | (replacing any existing [paths] section) that instructs coverage.py
 10 | to treat the common path prefix of each coverage database's files
 11 | as equivalent.  When used this way, coverage.py must be importable.
 12 | """
 13 | 
 14 | import sys
 15 | 
 16 | from argparse import ArgumentParser
 17 | from configparser import ConfigParser
 18 | from pathlib import Path
 19 | 
 20 | 
 21 | DATABASE_NAME = "coverage.dat"
 22 | 
 23 | 
 24 | def remap_paths_for_databases(cfg, databases):
 25 |     """
 26 |     Compute a set of path remapping rules that will render all of
 27 |     the databases in DATABASES mergeable, by stripping out the common
 28 |     path prefix found in each database.
 29 |     """
 30 |     from collections import defaultdict
 31 |     from coverage import CoverageData
 32 |     from os.path import commonprefix
 33 |     from pathlib import PurePosixPath, PureWindowsPath
 34 | 
 35 |     prefixes = set()
 36 |     for db_fname in databases:
 37 |         db = CoverageData(basename=db_fname)
 38 |         db.read()
 39 |         prefixes.add(commonprefix(list(db.measured_files())))
 40 | 
 41 |     packages = defaultdict(set)
 42 |     for p in prefixes:
 43 |         if '\\' in p or (len(p) >= 2 and p[0].isalpha() and p[1] == ':'):
 44 |             name = PureWindowsPath(p).name
 45 |         else:
 46 |             name = PurePosixPath(p).name
 47 |         packages[name].add(p)
 48 | 
 49 |     pkg_names = sorted(packages.keys())
 50 | 
 51 |     cfg["run"]["relative_files"] = "true"
 52 |     cfg["run"]["source_pkgs"] = " ".join(pkg_names)
 53 | 
 54 |     cfg["paths"] = {}
 55 |     for pkg in pkg_names:
 56 |         pkg_paths = ['', pkg + '/']
 57 |         pkg_paths.extend(sorted(packages[pkg]))
 58 |         cfg["paths"]["src_" + pkg] = "\n".join(pkg_paths)
 59 | 
 60 | 
 61 | def adjust_omit(cfg):
 62 |     """
 63 |     Adjust the "omit" setting to be more appropriate for use in CI;
 64 |     the stock .coveragerc is tailored for interactive use.
 65 |     """
 66 |     GLOBS_TO_DROP = (
 67 |         "*/formats/*",
 68 |         "*/pvl_utils.py",
 69 |     )
 70 | 
 71 |     run_section = cfg["run"]
 72 |     pruned_omit_globs = []
 73 |     for glob in run_section.get("omit", "").splitlines():
 74 |         glob = glob.strip()
 75 |         if glob not in GLOBS_TO_DROP:
 76 |             pruned_omit_globs.append(glob)
 77 | 
 78 |     if (
 79 |             len(pruned_omit_globs) == 0
 80 |             or len(pruned_omit_globs) == 1 and pruned_omit_globs[0] == ""
 81 |     ):
 82 |         del run_section["omit"]
 83 |     else:
 84 |         run_section["omit"] = "\n".join(pruned_omit_globs)
 85 | 
 86 | 
 87 | def change_database_name(cfg):
 88 |     """
 89 |     Give the coverage database a more convenient name for use in
 90 |     cross-platform CI.
 91 |     """
 92 |     cfg["run"]["data_file"] = str(Path.cwd() / DATABASE_NAME)
 93 | 
 94 | 
 95 | def main():
 96 |     ap = ArgumentParser(description=__doc__)
 97 |     ap.add_argument("databases", nargs="*",
 98 |                     help="Coverage databases to be merged")
 99 |     args = ap.parse_args()
100 | 
101 |     # this must match how coverage.py initializes ConfigParser
102 |     cfg = ConfigParser(interpolation=None)
103 | 
104 |     with sys.stdin as ifp:
105 |         cfg.read_file(ifp, source="<stdin>")
106 | 
107 |     if args.databases:
108 |         remap_paths_for_databases(cfg, args.databases)
109 | 
110 |     adjust_omit(cfg)
111 |     change_database_name(cfg)
112 | 
113 |     with sys.stdout as ofp:
114 |         cfg.write(ofp)
115 | 
116 | 
117 | main()
118 | 


--------------------------------------------------------------------------------
/.github/scripts/find-gnu-tar:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | 
 3 | """
 4 | Find GNU tar, whose pathname transformation options we need, and which
 5 | is named 'tar' on Github's Linux and Windows CI runners but 'gtar' on
 6 | their MacOS runners.
 7 | """
 8 | 
 9 | import os
10 | import stat
11 | import sys
12 | 
13 | from argparse import ArgumentParser
14 | from pathlib import Path
15 | 
16 | 
17 | if os.name == "nt":
18 |     EXE_SUFFIX = ".exe"
19 |     def is_executable_mode(mode):
20 |         return True
21 | else:
22 |     EXE_SUFFIX = ""
23 |     def is_executable_mode(mode):
24 |         return (stat.S_IMODE(mode) & 0o111) != 0
25 | 
26 | 
27 | def is_executable_file(path, debug):
28 |     if debug:
29 |         sys.stderr.write(f"  {path}: ")
30 |     try:
31 |         st = os.stat(path)
32 |     except FileNotFoundError:
33 |         if debug:
34 |             sys.stderr.write("not found\n")
35 |         return False
36 | 
37 |     if not stat.S_ISREG(st.st_mode):
38 |         if debug:
39 |             sys.stderr.write("not a regular file (mode={})\n"
40 |                              .format(stat.filemode(st.st_mode)))
41 |         return False
42 | 
43 |     if not is_executable_mode(st.st_mode):
44 |         if debug:
45 |             sys.stderr.write("not executable (mode={}, os={})\n"
46 |                              .format(stat.filemode(st.st_mode, os.name)))
47 |         return False
48 | 
49 |     if debug:
50 |         sys.stderr.write(" ok\n")
51 |     return True
52 | 
53 | 
54 | 
55 | def find_gnu_tar(debug=False):
56 |     GTAR_CMD = "gtar" + EXE_SUFFIX
57 |     TAR_CMD = "tar" + EXE_SUFFIX
58 |     candidate = None
59 |     for d in os.get_exec_path():
60 |         # Resolve symlinks in the directory components of the path,
61 |         # but *not* the command name, because changing the command
62 |         # name might alter the behavior of the command.
63 |         p = Path(d).resolve()
64 |         if debug:
65 |             sys.stderr.write(f"checking {p}\n")
66 |         gtar = p / GTAR_CMD
67 |         tar = p / TAR_CMD
68 |         if is_executable_file(gtar, debug):
69 |             # gtar is preferred
70 |             return gtar
71 |         if is_executable_file(tar, debug):
72 |             # use tar only if we don't find a gtar later in the path
73 |             candidate = tar
74 |     if candidate is not None:
75 |         return candidate
76 |     sys.stderr.write(f"neither {GTAR_CMD} nor {TAR_CMD} found in PATH\n")
77 |     sys.exit(1)
78 | 
79 | 
80 | def main():
81 |     ap = ArgumentParser(description=__doc__)
82 |     ap.add_argument("--debug", action="store_true",
83 |                     help="Print debugging information during the search")
84 |     args = ap.parse_args()
85 | 
86 |     sys.stdout.write(str(find_gnu_tar(args.debug)) + "\n")
87 |     sys.exit(0)
88 | 
89 | 
90 | main()
91 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *ipynb*
 2 | *pycache*
 3 | /test_files
 4 | /output_files
 5 | /input_files
 6 | /pdr.egg-info/
 7 | /.idea/
 8 | /pdrtestsuite/
 9 | *scratch.py
10 | *scratch.ipynb
11 | /pdr/tests/data/
12 | /pdr/oldtests/
13 | **pdrtests.log**
14 | /pdr/tests/reference/temp/
15 | *.dot
16 | *.profile
17 | data/**
18 | build/**
19 | dist/**
20 | git
21 | meta.yaml
22 | *.DS_store
23 | */.tracker_logs
24 | /.coverage
25 | /.pytest_cache/
26 | /htmlcov/
27 | six_old.py
28 | 


--------------------------------------------------------------------------------
/.mailmap:
--------------------------------------------------------------------------------
 1 | Chase Million <chase@millionconcepts.com> <1483210+cmillion@users.noreply.github.com>
 2 | Chase Million <chase@millionconcepts.com> cmillion <chase@millionconcepts.com>
 3 | Chase Million <chase@millionconcepts.com> cmillion <>
 4 | 
 5 | Michael Aye <michaelaye@users.noreply.github.com>
 6 | 
 7 | Michael St. Clair <mstclair@millionconcepts.com> <michael@millionconcepts.com>
 8 | Michael St. Clair <mstclair@millionconcepts.com> M. St. Clair <64057573+m-stclair@users.noreply.github.com>
 9 | Michael St. Clair <mstclair@millionconcepts.com> m-stclair <64057573+m-stclair@users.noreply.github.com>
10 | Michael St. Clair <mstclair@millionconcepts.com> michael <64057573+m-stclair@users.noreply.github.com>
11 | Michael St. Clair <mstclair@millionconcepts.com> michael mstclair@millionconcepts.com
12 | 
13 | Sabrina Curtis <scurtis@millionconcepts.com> curtiss9 <98858647+curtiss9@users.noreply.github.com>
14 | 
15 | Sierra Brown <sierra@millionconcepts.com> <88336748+Sierra-MC@users.noreply.github.com>
16 | Sierra Brown <sierra@millionconcepts.com> Sierra V. Kaufman
17 | Sierra Brown <sierra@millionconcepts.com> Sierra-MC
18 | 
19 | Zack Weinberg <zack@millionconcepts.com> <zack@owlfolio.org>
20 | Zack Weinberg <zack@millionconcepts.com> <zackw@panix.com>
21 | 
22 | GitHub <noreply@github.com>
23 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for MkDocs projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the version of Python and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "mambaforge-22.9"
12 | 
13 | mkdocs:
14 |   configuration: mkdocs.yml
15 | 
16 | # Optionally declare the Python requirements required to build your docs
17 | conda:
18 |   environment: docs/environment.yml
19 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | If you use pdr for any published work, please cite it using the reference below:
 2 | 
 3 | @article{Brown2024,
 4 |     author = {{Brown}, Sierra V. and {St. Clair}, Michael and {Million}, Chase
 5 |               and {Curtis}, Sabrina and {Aye}, K. -Michael and {Weinberg}, Zack},
 6 |     year = {2024},
 7 |     journal = {Journal of Open Source Software},
 8 |     title = {PDR: The Planetary Data Reader},
 9 |     year = {2024},
10 |     url = {https://doi.org/10.21105/joss.07256},
11 |     publisher = {The Open Journal},
12 |     volume = {9},
13 |     number = {102},
14 |     pages = {7256},
15 |     doi = {10.21105/joss.07256}
16 |     }
17 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # No, it isn't possible to do this in pyproject.toml :-(
2 | include .coveragerc
3 | include environment.yml
4 | include minimal_environment.yml
5 | include docs/version_history.md
6 | include pdr/pds4_tools/CREDITS
7 | include pdr/pds4_tools/LICENSES
8 | include pdr/tests/data/*
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | README.md
 2 | ## The Planetary Data Reader (pdr)
 3 | 
 4 | This tool provides a single command---`read(‘/path/to/file’)`---for ingesting
 5 | _all_ common planetary data types. It reads almost all "primary observational 
 6 | data" products currently archived in the PDS (under PDS3 or PDS4), and the 
 7 | fraction of products it does not read is continuously shrinking.
 8 | [Currently-supported datasets are listed here.](docs/supported_datasets.md) 
 9 | 
10 | If the software fails while attempting to read from datasets that we have 
11 | listed as supported, please submit an issue with a link to the file and 
12 | information about the error (if applicable). There might also be datasets that 
13 | work but are not listed. We would like to hear about those too. If a dataset 
14 | is not yet supported that you would like us to consider prioritizing, 
15 | [please fill out this request form](https://docs.google.com/forms/d/1JHyMDzC9LlXY4MOMcHqV5fbseSB096_PsLshAMqMWBw/viewform).
16 | 
17 | ### Attribution
18 | If you use _pdr_ in your work, please cite us using our [JOSS Paper](docs/pdr_joss_paper.pdf): [![DOI](https://joss.theoj.org/papers/10.21105/joss.07256/status.svg)](https://doi.org/10.21105/joss.07256).
19 | A BibTex style citation is available in [CITATION.cff](CITATION.cff).
20 | 
21 | ### Installation
22 | _pdr_ is now on `conda` and `pip`. We recommend (and only officially support) 
23 | installation into a `conda` environment. You can do this like so: 
24 | 
25 | ```
26 | conda create --name pdrenv
27 | conda activate pdrenv
28 | conda install -c conda-forge pdr
29 | ```
30 | The minimum supported version of Python is _3.9_.
31 | 
32 | Using the conda install will install some optional dependencies in the environment.yml 
33 | file for pdr including: `astropy` and `pillow`. If you'd prefer to forego those 
34 | optional dependencies, please use minimal_environment.yml in your 
35 | installation. This is not supported through a direct conda install as 
36 | described above and will require additional steps. Optional dependencies 
37 | and the added functionality they support are listed below:
38 | 
39 |   - `pvl`: allows `Data.load("LABEL", as_pvl=True)`, which will load PDS3 
40 |      labels as `pvl` objects rather than plain text
41 |   - `astropy`: adds support for FITS files
42 |   - `jupyter`: allows usage of the Example Jupyter Notebook (and other jupyter 
43 |      notebooks you create)
44 |   - `pillow`: adds support for reading a variety of 'desktop' image formats 
45 |     (TIFF, JPEG, etc.) and for browse image rendering
46 |   - `Levenshtein`: allows use of `metaget_fuzzy`, a fuzzy-matching metadata 
47 |     parsing function
48 | 
49 | For pip users, no optional dependencies will be packaged with pdr. The extras 
50 | tags are:
51 |   - `pvl`: installs `pvl`
52 |   - `fits`: installs `astropy`
53 |   - `notebooks`: installs `jupyter`
54 |   - `pillow`: installs `pillow`
55 |   - `fuzzy`: installs `Levenshtein`
56 | 
57 | Example syntax for using pip to install syntax with `astropy` and `pillow` optional
58 | dependencies:
59 | ```
60 | pip install pdr[fits, pillow]
61 | ```
62 | 
63 | #### NOTE: `pdr` is not currently compatible with python 3.13 when installed with `pip`, it can be used with python 3.13 through `conda`
64 | 
65 | ### Usage
66 | 
67 | You can check out our example Notebook on a JupyterLite server for a 
68 | quick interactive demo of functionality: 
69 | [![JupyterLite](docs/jlitebadge.svg)](https://millionconcepts.github.io/jlite-pdr-demo/)
70 | 
71 | Additional information on usage including examples, output data types, notes 
72 | and caveats, tests, etc. can now be accessed in our documentation on 
73 | readthedocs at: https://pdr.readthedocs.io [![Documentation Status](https://readthedocs.org/projects/pdr/badge/?version=latest)](https://pdr.readthedocs.io/en/latest/?badge=latest)
74 | 
75 | 
76 | ### Contributing
77 | 
78 | Thank you for wanting to contribute to `pdr` and improving efforts to make 
79 | planetary science data accessible. Please review our code of conduct before
80 | contributing. [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](docs/code_of_conduct.md)
81 | 
82 | If you have found a bug, a dataset that we claim to support that's not opening
83 | properly, or you have a feature request, please file an issue. We will also
84 | review pull requests, but would probably prefer you start the conversation with
85 | us first, so we can expect your contributions and make sure they will be within
86 | scope.
87 | 
88 | If you need general support you can find us on [OpenPlanetary Slack](https://app.slack.com/client/T04CWPQL9/C04CWPQM5)
89 | (available to [OpenPlanetary members](https://www.openplanetary.org/join))
90 | or feel free to [email](mailto:sierra@millionconcepts.com) the team.
91 | 
92 | ---
93 | This work is supported by NASA grant No. 80NSSC21K0885.
94 | 


--------------------------------------------------------------------------------
/docs/CITATION.cff:
--------------------------------------------------------------------------------
 1 | If you use pdr for any published work, please cite it using the reference below:
 2 | 
 3 | @article{Brown2024,
 4 |     author = {{Brown}, Sierra V. and {St. Clair}, Michael and {Million}, Chase
 5 |               and {Curtis}, Sabrina and {Aye}, K. -Michael and {Weinberg}, Zack},
 6 |     year = {2024},
 7 |     journal = {Journal of Open Source Software},
 8 |     title = {PDR: The Planetary Data Reader},
 9 |     year = {2024},
10 |     url = {https://doi.org/10.21105/joss.07256},
11 |     publisher = {The Open Journal},
12 |     volume = {9},
13 |     number = {102},
14 |     pages = {7256},
15 |     doi = {10.21105/joss.07256}
16 |     }
17 | 


--------------------------------------------------------------------------------
/docs/code_of_conduct.md:
--------------------------------------------------------------------------------
  1 | # Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, caste, color, religion, or sexual
 10 | identity and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the overall
 26 |   community
 27 | 
 28 | Examples of unacceptable behavior can be found [here](https://www.contributor-covenant.org/version/2/1/code_of_conduct/#:~:text=Examples%20of%20unacceptable,a%20professional%20setting).
 29 | 
 30 | ## Enforcement Responsibilities
 31 | 
 32 | Community leaders are responsible for clarifying and enforcing our standards of
 33 | acceptable behavior and will take appropriate and fair corrective action in
 34 | response to any behavior that they deem inappropriate, threatening, offensive,
 35 | or harmful.
 36 | 
 37 | Community leaders have the right and responsibility to remove, edit, or reject
 38 | comments, commits, code, wiki edits, issues, and other contributions that are
 39 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 40 | decisions when appropriate.
 41 | 
 42 | ## Scope
 43 | 
 44 | This Code of Conduct applies within all community spaces, and also applies when
 45 | an individual is officially representing the community in public spaces.
 46 | Examples of representing our community include using an official e-mail address,
 47 | posting via an official social media account, or acting as an appointed
 48 | representative at an online or offline event.
 49 | 
 50 | ## Enforcement
 51 | 
 52 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 53 | reported to [Sierra Brown](mailto:sierra@millionconcepts.com).
 54 | All complaints will be reviewed and investigated promptly and fairly.
 55 | 
 56 | All community leaders are obligated to respect the privacy and security of the
 57 | reporter of any incident.
 58 | 
 59 | ## Enforcement Guidelines
 60 | 
 61 | Community leaders will follow these Community Impact Guidelines in determining
 62 | the consequences for any action they deem in violation of this Code of Conduct:
 63 | 
 64 | ### 1. Correction
 65 | 
 66 | **Community Impact**: Use of inappropriate language or other behavior deemed
 67 | unprofessional or unwelcome in the community.
 68 | 
 69 | **Consequence**: A private, written warning from community leaders, providing
 70 | clarity around the nature of the violation and an explanation of why the
 71 | behavior was inappropriate. A public apology may be requested.
 72 | 
 73 | ### 2. Warning
 74 | 
 75 | **Community Impact**: A violation through a single incident or series of
 76 | actions.
 77 | 
 78 | **Consequence**: A warning with consequences for continued behavior. No
 79 | interaction with the people involved, including unsolicited interaction with
 80 | those enforcing the Code of Conduct, for a specified period of time. This
 81 | includes avoiding interactions in community spaces as well as external channels
 82 | like social media. Violating these terms may lead to a temporary or permanent
 83 | ban.
 84 | 
 85 | ### 3. Temporary Ban
 86 | 
 87 | **Community Impact**: A serious violation of community standards, including
 88 | sustained inappropriate behavior.
 89 | 
 90 | **Consequence**: A temporary ban from any sort of interaction or public
 91 | communication with the community for a specified period of time. No public or
 92 | private interaction with the people involved, including unsolicited interaction
 93 | with those enforcing the Code of Conduct, is allowed during this period.
 94 | Violating these terms may lead to a permanent ban.
 95 | 
 96 | ### 4. Permanent Ban
 97 | 
 98 | **Community Impact**: Demonstrating a pattern of violation of community
 99 | standards, including sustained inappropriate behavior, harassment of an
100 | individual, or aggression toward or disparagement of classes of individuals.
101 | 
102 | **Consequence**: A permanent ban from any sort of public interaction within the
103 | community.
104 | 
105 | ## Attribution
106 | 
107 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
108 | version 2.1, available at
109 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
110 | 
111 | Community Impact Guidelines were inspired by
112 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
113 | 
114 | For answers to common questions about this code of conduct, see the FAQ at
115 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
116 | [https://www.contributor-covenant.org/translations][translations].
117 | 
118 | [homepage]: https://www.contributor-covenant.org
119 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
120 | [Mozilla CoC]: https://github.com/mozilla/diversity
121 | [FAQ]: https://www.contributor-covenant.org/faq
122 | [translations]: https://www.contributor-covenant.org/translations
123 | 


--------------------------------------------------------------------------------
/docs/environment.yml:
--------------------------------------------------------------------------------
 1 | name: pdr
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 | dependencies:
 6 |   - python=3.11
 7 |   - mkdocs
 8 |   - mkdocstrings
 9 |   - mkdocstrings-python
10 |   - mkdocs-material


--------------------------------------------------------------------------------
/docs/pdr_api.md:
--------------------------------------------------------------------------------
  1 | # pdr
  2 | 
  3 | ::: pdr
  4 |     options:
  5 |         heading_level: 2
  6 | 
  7 | ## _scaling
  8 | 
  9 | ::: pdr._scaling
 10 |     options:
 11 |         heading_level: 3
 12 | 
 13 | ## bit_handling
 14 | 
 15 | ::: pdr.bit_handling
 16 |     options:
 17 |         heading_level: 3
 18 | 
 19 | ## browsify
 20 | 
 21 | ::: pdr.browsify
 22 |     options:
 23 |         heading_level: 3
 24 | 
 25 | ## datatypes
 26 | 
 27 | ::: pdr.datatypes
 28 |     options:
 29 |         heading_level: 3
 30 | 
 31 | ## errors
 32 | 
 33 | ::: pdr.errors
 34 |     options:
 35 |         heading_level: 3
 36 | 
 37 | ## formats
 38 | 
 39 | ::: pdr.formats
 40 |     options:
 41 |         heading_level: 3
 42 | 
 43 | ### formats.cassini
 44 | 
 45 | ::: pdr.formats.cassini
 46 |     options:
 47 |         heading_level: 4
 48 | 
 49 | ### formats.checkers
 50 | 
 51 | ::: pdr.formats.checkers
 52 |     options:
 53 |         heading_level: 4
 54 | 
 55 | ### formats.clementine
 56 | 
 57 | ::: pdr.formats.clementine
 58 |     options:
 59 |         heading_level: 4
 60 | 
 61 | ### formats.dawn
 62 | 
 63 | ::: pdr.formats.dawn
 64 |     options:
 65 |         heading_level: 4
 66 | 
 67 | ### formats.diviner
 68 | 
 69 | ::: pdr.formats.diviner
 70 |     options:
 71 |         heading_level: 4
 72 | 
 73 | ### formats.epoxi
 74 | 
 75 | ::: pdr.formats.epoxi
 76 |     options:
 77 |         heading_level: 4
 78 | 
 79 | ### formats.galileo
 80 | 
 81 | ::: pdr.formats.galileo
 82 |     options:
 83 |         heading_level: 4
 84 | 
 85 | ### formats.ground
 86 | 
 87 | ::: pdr.formats.ground
 88 |     options:
 89 |         heading_level: 4
 90 | 
 91 | ### formats.ihw
 92 | 
 93 | ::: pdr.formats.ihw
 94 |     options:
 95 |         heading_level: 4
 96 | 
 97 | ### formats.juno
 98 | 
 99 | ::: pdr.formats.juno
100 |     options:
101 |         heading_level: 4
102 | 
103 | ### formats.lro
104 | 
105 | ::: pdr.formats.lro
106 |     options:
107 |         heading_level: 4
108 | 
109 | ### formats.lroc
110 | 
111 | ::: pdr.formats.lroc
112 |     options:
113 |         heading_level: 4
114 | 
115 | ### formats.mariner
116 | 
117 | ::: pdr.formats.mariner
118 |     options:
119 |         heading_level: 4
120 | 
121 | ### formats.mer
122 | 
123 | ::: pdr.formats.mer
124 |     options:
125 |         heading_level: 4
126 | 
127 | ### formats.mex
128 | 
129 | ::: pdr.formats.mex
130 |     options:
131 |         heading_level: 4
132 | 
133 | ### formats.mgn
134 | 
135 | ::: pdr.formats.mgn
136 |     options:
137 |         heading_level: 4
138 | 
139 | ### formats.mgs
140 | 
141 | ::: pdr.formats.mgs
142 |     options:
143 |         heading_level: 4
144 | 
145 | ### formats.mro
146 | 
147 | ::: pdr.formats.mro
148 |     options:
149 |         heading_level: 4
150 | 
151 | ### formats.msl_apxs
152 | 
153 | ::: pdr.formats.msl_apxs
154 |     options:
155 |         heading_level: 4
156 | 
157 | ### formats.msl_ccam
158 | 
159 | ::: pdr.formats.msl_ccam
160 |     options:
161 |         heading_level: 4
162 | 
163 | ### formats.msl_cmn
164 | 
165 | ::: pdr.formats.msl_cmn
166 |     options:
167 |         heading_level: 4
168 | 
169 | ### formats.msl_places
170 | ::: pdr.formats.msl_places
171 |     options:
172 |         heading_level: 4
173 | 
174 | ### formats.msl_rems
175 | ::: pdr.formats.msl_rems
176 |     options:
177 |         heading_level: 4
178 | 
179 | ### formats.nh
180 | 
181 | ::: pdr.formats.nh
182 |     options:
183 |         heading_level: 4
184 | 
185 | ### formats.odyssey
186 | 
187 | ::: pdr.formats.odyssey
188 |     options:
189 |         heading_level: 4
190 | 
191 | ### formats.phoenix
192 | 
193 | ::: pdr.formats.phoenix
194 |     options:
195 |         heading_level: 4
196 | 
197 | ### formats.pvo
198 | 
199 | ::: pdr.formats.pvo
200 |     options:
201 |         heading_level: 4
202 | 
203 | ### formats.rosetta
204 | 
205 | ::: pdr.formats.rosetta
206 |     options:
207 |         heading_level: 4
208 | 
209 | ### formats.saturn_rpx
210 | 
211 | ::: pdr.formats.saturn_rpx
212 |     options:
213 |         heading_level: 4
214 | 
215 | ### formats.themis
216 | 
217 | ::: pdr.formats.themis
218 |     options:
219 |         heading_level: 4
220 | 
221 | ### formats.ulysses
222 | 
223 | ::: pdr.formats.ulysses
224 |     options:
225 |         heading_level: 4
226 | 
227 | ### formats.vega
228 | 
229 | ::: pdr.formats.vega
230 |     options:
231 |         heading_level: 4
232 | 
233 | ### formats.viking
234 | 
235 | ::: pdr.formats.viking
236 |     options:
237 |         heading_level: 4
238 | 
239 | ### formats.voyager
240 | 
241 | ::: pdr.formats.voyager
242 |     options:
243 |         heading_level: 4
244 | 
245 | ## func
246 | 
247 | ::: pdr.func
248 |     options:
249 |         heading_level: 3
250 | 
251 | ## loaders
252 | 
253 | ::: pdr.loaders
254 |     options:
255 |         heading_level: 3
256 | 
257 | ### loaders._helpers
258 | 
259 | ::: pdr.loaders._helpers
260 |     options:
261 |         heading_level: 4
262 | 
263 | ### loaders.astrowrap
264 | ::: pdr.loaders.astrowrap
265 |     options:
266 |         heading_level: 4
267 | 
268 | ### loaders.datawrap
269 | 
270 | ::: pdr.loaders.datawrap
271 |     options:
272 |         heading_level: 4
273 | 
274 | ### loaders.dispatch
275 | 
276 | ::: pdr.loaders.dispatch
277 |     options:
278 |         heading_level: 4
279 | 
280 | ### loaders.handlers
281 | 
282 | ::: pdr.loaders.handlers
283 |     options:
284 |         heading_level: 4
285 | 
286 | ### loaders.image
287 | 
288 | ::: pdr.loaders.image
289 |     options:
290 |         heading_level: 4
291 | 
292 | ### loaders.queries
293 | 
294 | ::: pdr.loaders.queries
295 |     options:
296 |         heading_level: 4
297 | 
298 | ### loaders.table
299 | 
300 | ::: pdr.loaders.table
301 |     options:
302 |         heading_level: 4
303 | 
304 | ### loaders.text
305 | 
306 | ::: pdr.loaders.text
307 |     options:
308 |         heading_level: 4
309 | 
310 | ### loaders.utility
311 | 
312 | ::: pdr.loaders.utility
313 |     options:
314 |         heading_level: 4
315 | 
316 | ## np_utils
317 | 
318 | ::: pdr.np_utils
319 |     options:
320 |         heading_level: 3
321 | 
322 | ## parselabel
323 | 
324 | ::: pdr.parselabel
325 |     options:
326 |         heading_level: 3
327 | 
328 | ### parselabel.pds3
329 | 
330 | ::: pdr.parselabel.pds3
331 |     options:
332 |         heading_level: 4
333 | 
334 | ### parselabel.pds4
335 | 
336 | ::: pdr.parselabel.pds4
337 |     options:
338 |         heading_level: 4
339 | 
340 | ### parselabel.utils
341 | 
342 | ::: pdr.parselabel.utils
343 |     options:
344 |         heading_level: 4
345 | 
346 | ## pd_utils
347 | 
348 | ::: pdr.pd_utils
349 |     options:
350 |         heading_level: 3
351 | 
352 | ## pdr
353 | 
354 | ::: pdr.pdr
355 |     options:
356 |         heading_level: 3
357 | 
358 | ## pdrtypes
359 | 
360 | ::: pdr.pdrtypes
361 |     options:
362 |         heading_level: 3
363 | 
364 | ### pil_utils
365 | 
366 | ::: pdr.pil_utils
367 |     options:
368 |         heading_level: 3
369 | 
370 | ## pvl_utils
371 | 
372 | ::: pdr.pvl_utils
373 |     options:
374 |         heading_level: 3
375 | 
376 | ## utils
377 | 
378 | ::: pdr.utils
379 |     options:
380 |         heading_level: 3
381 | 
382 | 
383 | 


--------------------------------------------------------------------------------
/docs/pdr_joss_paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/docs/pdr_joss_paper.pdf


--------------------------------------------------------------------------------
/docs/pdr_pdart_proposal_roses20.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/docs/pdr_pdart_proposal_roses20.pdf


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: pdr
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python>=3.9
 6 |   - cytoolz
 7 |   - numpy
 8 |   - pandas>=2.0.0
 9 |   - git
10 |   - dustgoggles
11 |   - pip
12 |   - multidict
13 |   - more-itertools
14 |   - rms-vax
15 |   # optional dependencies (use minimal_environment.yml if you'd prefer not to install)
16 |   - pvl
17 |   - astropy
18 |   - jupyter
19 |   - pillow
20 |   - pytest
21 |   - Levenshtein
22 | 


--------------------------------------------------------------------------------
/minimal_environment.yml:
--------------------------------------------------------------------------------
 1 | name: pdr
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python>=3.9
 6 |   - cytoolz
 7 |   - numpy
 8 |   - pandas>=2.0.0
 9 |   - git
10 |   - pip
11 |   - multidict
12 |   - more-itertools
13 |   - dustgoggles
14 |   - rms-vax
15 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: Planetary Data Reader
 2 | site_description: one tool to read them all; https://github.com/MillionConcepts/pdr
 3 | theme:
 4 |   name: material
 5 | plugins:
 6 | - search
 7 | - mkdocstrings:
 8 |     handlers:
 9 |       python:
10 |         options:
11 |           filters: []
12 |           show_signature_annotations: true
13 | 
14 | nav:
15 |   - 'index.md'
16 |   - 'supported_datasets.md'
17 |   - 'version_history.md'
18 |   - 'code_of_conduct.md'
19 |   - Joss Publication: "https://joss.theoj.org/papers/10.21105/joss.07256"
20 |   - License: 'LICENSE.md'
21 |   - API Reference: 'pdr_api.md'
22 | 


--------------------------------------------------------------------------------
/pdr/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | import os.path as _osp
 3 | import sys
 4 | from typing import Collection, Optional, TYPE_CHECKING, Union
 5 | 
 6 | from pdr.pdr import Data, Metadata
 7 | 
 8 | if TYPE_CHECKING:
 9 |     from pathlib import Path
10 | 
11 | __version__ = "1.4.0"
12 | 
13 | pkg_dir = _osp.abspath(_osp.dirname(__file__))
14 | 
15 | 
16 | def read(
17 |     fp: Union[str, Path],
18 |     debug: bool = False,
19 |     label_fn: Optional[Union[Path, str]] = None,
20 |     search_paths: Union[Collection[str], str] = (),
21 |     skip_existence_check: bool = False,
22 |     **kwargs
23 | ) -> Data:
24 |     """
25 |     Read a data product with PDR. `fn` can be any file associated with the
26 |     product, preferably a detached label file if it exists. Returns a Data
27 |     object that provides an interface to the data and metadata in all available
28 |     files associated with the product.
29 |     """
30 |     return Data(
31 |         fp,
32 |         debug=debug,
33 |         label_fn=label_fn,
34 |         search_paths=search_paths,
35 |         skip_existence_check=skip_existence_check,
36 |         **kwargs
37 |     )
38 | 
39 | 
40 | def fastread(
41 |     fp: Union[str, Path],
42 |     debug: bool = False,
43 |     search_paths: Union[Collection[str], str] = (),
44 |     **kwargs
45 | ) -> Data:
46 |     """
47 |     Read a file with PDR, with the assumption that the label is either
48 |     attached to `fp` or that `fp` is itself a detached label file, and ignoring
49 |     the usual double-check for `fp`'s actual existence in the filesystem.
50 |     Intended for cases when you want access to a product's metadata very
51 |     quickly and you know exactly where its label is.
52 |     """
53 |     return read(fp, debug, fp, search_paths, True, **kwargs)
54 | 
55 | 
56 | # pdr.open() is an alias for pdr.read()
57 | setattr(sys.modules[__name__], 'open', read)
58 | 


--------------------------------------------------------------------------------
/pdr/_scaling.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from functools import wraps
  3 | from itertools import product
  4 | from numbers import Integral, Number, Real
  5 | from typing import Optional, Sequence, Union
  6 | 
  7 | import numpy as np
  8 | 
  9 | from pdr.formats.checkers import specialblock
 10 | from pdr.datatypes import PDS3_CONSTANT_NAMES, IMPLICIT_PDS3_CONSTANTS
 11 | from pdr.np_utils import casting_to_float
 12 | from pdr.pdrtypes import PDRLike
 13 | 
 14 | 
 15 | def find_special_constants(
 16 |     data: PDRLike, obj: np.ndarray, name: str
 17 | ) -> dict[str, Number]:
 18 |     """
 19 |     attempts to find special constants in an ndarray associated with a PDS3
 20 |     object by referencing the label and "standard" special constant values.
 21 |     """
 22 |     # NOTE: doesn't do anything for PDS4 products at present, although this
 23 |     #  may not be important; usually pds4_tools handles it.
 24 | 
 25 |     block = specialblock(data, name)
 26 |     # check for explicitly-defined special constants
 27 |     specials = {
 28 |         name: block[name]
 29 |         for name in PDS3_CONSTANT_NAMES
 30 |         if (name in block.keys()) and not (block[name] == "N/A")
 31 |     }
 32 |     for k in specials.keys():
 33 |         if isinstance(specials[k], Sequence):
 34 |             specials[k] = specials[k][0]
 35 |     # ignore uint8 implicit constants (0, 255) for now -- too problematic
 36 |     # TODO: maybe add an override
 37 |     if obj.dtype.name == "uint8":
 38 |         return specials
 39 |     # check for implicit constants appropriate to the sample type
 40 |     implicit_possibilities = IMPLICIT_PDS3_CONSTANTS[obj.dtype.name]
 41 |     # can't check for nans with "in" because it's an equality check, so
 42 |     # we don't intend this to be used, just want to make the key and put
 43 |     # in a value that won't conflict later
 44 |     if np.any(~np.isfinite(obj.data)):
 45 |         specials["INVALIDS"] = np.nan
 46 |     return specials | {
 47 |         possibility: constant
 48 |         for possibility, constant in implicit_possibilities.items()
 49 |         if constant in obj
 50 |     }
 51 | 
 52 | 
 53 | def mask_specials(obj, specials):
 54 |     """"""
 55 |     obj = np.ma.masked_array(obj)
 56 |     if np.nan in specials:
 57 |         # masks infs and nans as well
 58 |         obj.mask = np.ma.mask_or(np.isin(obj.data, specials),
 59 |                                  ~np.isfinite(obj.data))
 60 |     else:
 61 |         obj.mask = np.isin(obj.data, specials)
 62 |     return obj
 63 | 
 64 | 
 65 | def fit_to_scale(
 66 |     arr: np.ndarray,
 67 |     scale: Union[Integral, Real],
 68 |     offset: Union[Integral, Real]
 69 | ) -> np.ndarray:
 70 |     """
 71 |     Return a version of `arr` cast to the minimum dtype that will hold its
 72 |     range of values after multiplying by `offset` and adding `scale`.
 73 | 
 74 |     Supports:
 75 | 
 76 |     float32, float64, uint8, int8, uint16, int16, uint32, int32, uint64, int64.
 77 |     """
 78 |     if arr.dtype.char not in 'bBhHiIlLqQnNpPf':
 79 |         raise TypeError(f"This function does not support {arr.dtype.name}")
 80 |     if arr.dtype.char in 'fd' or int(scale + offset) != scale + offset:
 81 |         bases, widths, infofunc = ('f',), (4, 8), np.finfo
 82 |     else:
 83 |         bases, widths, infofunc = ('u', 'i'), (1, 2, 4, 8), np.iinfo
 84 |     amin, amax = map(int, (arr.min(), arr.max()))
 85 |     smin, smax = amin * scale + offset, amax * scale + offset
 86 |     for base, width in product(bases, widths):
 87 |         candidate = np.dtype(f'{base}{width}')
 88 |         cinfo = infofunc(candidate)
 89 |         if smin >= cinfo.min and smax <= cinfo.max:
 90 |             return arr.astype(candidate)
 91 |     raise TypeError("Unable to find a suitable data type for scaling.")
 92 | 
 93 | 
 94 | def overflow_wrap(array_func):
 95 |     @wraps(array_func)
 96 |     def with_upcasting(arr, scale, offset, *args, **kwargs):
 97 |         with warnings.catch_warnings():
 98 |             warnings.filterwarnings("error", message=".*overflow enc.*")
 99 |             try:
100 |                 return array_func(arr, scale, offset, *args, **kwargs)
101 |             except (OverflowError, RuntimeWarning):
102 |                 arr = fit_to_scale(arr, scale, offset)
103 |                 return array_func(arr, scale, offset, *args, **kwargs)
104 | 
105 |     return with_upcasting
106 | 
107 | 
108 | def _copy_scale(obj, offset, scale):
109 |     try:
110 |         # TODO: we should also be doing this per-plane scaling in inplace case
111 |         if len(obj) == len(scale) == len(offset) > 1:
112 |             planes = [
113 |                 obj[ix] * scale[ix] + offset[ix] for ix in range(len(scale))
114 |             ]
115 |             stacked = np.rollaxis(np.ma.dstack(planes), 2)
116 |             return stacked
117 |     except TypeError:
118 |         pass  # len() is not usable on a float object
119 |     return obj * scale + offset
120 | 
121 | 
122 | def _inplace_scale(obj, offset, scale):
123 |     if len(obj) == len(scale) == len(offset) > 1:
124 |         for ix, _ in enumerate(scale):
125 |             obj[ix] = obj[ix] * scale[ix] + offset[ix]
126 |     else:
127 |         obj *= scale
128 |         obj += offset
129 |     return obj
130 | 
131 | 
132 | def scale_array(
133 |     meta: PDRLike,
134 |     obj: np.ndarray,
135 |     object_name: str,
136 |     inplace: bool = False,
137 |     float_dtype: Optional["np.dtype"] = None,
138 | ):
139 |     """"""
140 |     from pdr.formats.checkers import specialblock
141 | 
142 |     block = specialblock(meta, object_name)
143 |     scale, offset = 1, 0
144 |     if "SCALING_FACTOR" in block.keys():
145 |         scale = block["SCALING_FACTOR"]
146 |         if isinstance(scale, dict):
147 |             scale = scale["value"]
148 |     if "OFFSET" in block.keys():
149 |         offset = block["OFFSET"]
150 |         if isinstance(offset, dict):
151 |             offset = offset["value"]
152 |     # meaningfully better for enormous unscaled arrays
153 |     if (scale == 1) and (offset == 0):
154 |         return obj
155 |     # try to perform the operation in-place if requested, although if
156 |     # we're casting to float, we can't
157 |     # TODO: detect rollover cases, etc.
158 |     if inplace is True and not casting_to_float(obj, scale, offset):
159 |         return overflow_wrap(_inplace_scale)(obj, offset, scale)
160 |     # if we're casting to float, permit specification of dtype
161 |     # prior to operation (float64 is numpy's default and often excessive)
162 |     if casting_to_float(obj, scale, offset):
163 |         if float_dtype is not None:
164 |             obj = obj.astype(float_dtype)
165 |     return overflow_wrap(_copy_scale)(obj, offset, scale)
166 | 
167 | 
168 | # TODO: shake this out much more vigorously
169 | # noinspection PyUnresolvedReferences
170 | def scale_pds4_tools_struct(struct: object) -> np.ndarray:
171 |     """see pds4_tools.reader.read_arrays.new_array"""
172 |     # TODO: apply bit_mask
173 |     from pdr.pds4_tools.reader.data_types import apply_scaling_and_value_offset
174 | 
175 |     array = struct.data
176 |     element_array = struct.meta_data["Element_Array"]
177 |     scale_kwargs = {
178 |         "scaling_factor": element_array.get("scaling_factor"),
179 |         "value_offset": element_array.get("value_offset"),
180 |     }
181 |     # TODO: is this important?
182 |     #     dtype = pds_to_numpy_type(struct.meta_data.data_type(),
183 |     #     data=array, **scale_kwargs)
184 |     special_constants = struct.meta_data.get("Special_Constants")
185 |     array = apply_scaling_and_value_offset(
186 |         array, special_constants=special_constants, **scale_kwargs
187 |     )
188 |     if hasattr(array, "mask"):
189 |         return np.ma.masked_array(np.asarray(array.data), array.mask)
190 |     return np.asarray(array)
191 | 


--------------------------------------------------------------------------------
/pdr/errors.py:
--------------------------------------------------------------------------------
 1 | class AlreadyLoadedError(Exception):
 2 |     """
 3 |     We already loaded this object and haven't been instructed to reload it.
 4 |     """
 5 |     pass
 6 | 
 7 | 
 8 | class DuplicateKeyWarning(UserWarning):
 9 |     """This product has duplicate object names; we're renaming them."""
10 |     pass
11 | 


--------------------------------------------------------------------------------
/pdr/formats/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module implements a wide variety of special-case behaviors for
 3 | nonconforming or malformatted data products. It implements these behaviors as
 4 | functions in distinct submodules organized by 'dataset' (mission, instrument,
 5 | etc.); the `checkers` submodule contains dispatch functions that preempt
 6 | generic behaviors and redirect them to functions from one of the dataset
 7 | submodules. See the documentation for `checkers` for details on this behavior.
 8 | """
 9 | 
10 | from .checkers import *
11 | import pdr.formats.cassini as cassini
12 | import pdr.formats.clementine as clementine
13 | import pdr.formats.dawn as dawn
14 | import pdr.formats.diviner as diviner
15 | import pdr.formats.epoxi as epoxi
16 | import pdr.formats.galileo as galileo
17 | import pdr.formats.ground as ground
18 | import pdr.formats.ihw as ihw
19 | import pdr.formats.iue as iue
20 | import pdr.formats.juno as juno
21 | import pdr.formats.lp as lp
22 | import pdr.formats.lroc as lroc
23 | import pdr.formats.lro as lro
24 | import pdr.formats.mariner as mariner
25 | import pdr.formats.mer as mer
26 | import pdr.formats.mex as mex
27 | import pdr.formats.mgn as mgn
28 | import pdr.formats.mgs as mgs
29 | import pdr.formats.mro as mro
30 | import pdr.formats.msl_apxs as msl_apxs
31 | import pdr.formats.msl_cmn as msl_cmn
32 | import pdr.formats.msl_ccam as msl_ccam
33 | import pdr.formats.msl_places as msl_places
34 | import pdr.formats.msl_rems as msl_rems
35 | import pdr.formats.msx as msx
36 | import pdr.formats.nh as nh
37 | import pdr.formats.odyssey as odyssey
38 | import pdr.formats.phoenix as phoenix
39 | import pdr.formats.pvo as pvo
40 | import pdr.formats.rosetta as rosetta
41 | import pdr.formats.saturn_rpx as saturn_rpx
42 | import pdr.formats.themis as themis
43 | import pdr.formats.ulysses as ulysses
44 | import pdr.formats.vega as vega
45 | import pdr.formats.viking as viking
46 | import pdr.formats.voyager as voyager
47 | 


--------------------------------------------------------------------------------
/pdr/formats/clementine.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import pdr.loaders.queries
 4 | 
 5 | 
 6 | def get_offset(data, pointer):
 7 |     """
 8 |     HITS
 9 |     * clem_GEO
10 |         * bsr_rdr_data
11 |     """
12 |     start_row = int(re.split(r",|[(|)]", data.metaget(f"^{pointer}"))[2])
13 |     return True, (start_row - 1) * data.metaget("RECORD_BYTES")
14 | 
15 | 
16 | def get_fn(data, object_name):
17 |     """
18 |     HITS
19 |     * clem_GEO
20 |         * bsr_rdr_data
21 |     """
22 |     target = re.split(r",|[(|)]", data.metaget(f"^{object_name}"))[1]
23 |     return True, target
24 | 
25 | 
26 | def get_structure(block, name, filename, data, identifiers):
27 |     """
28 |     HITS:
29 |     * clem_GEO
30 |         * bsr_rdr_data
31 |     """
32 |     fmtdef = pdr.loaders.queries.read_table_structure(
33 |         block, name, filename, data, identifiers
34 |     )
35 |     import numpy as np
36 |     import pandas as pd
37 | 
38 |     fmtdef = pd.concat([fmtdef, fmtdef], ignore_index=True)
39 |     fmtdef["NAME"] = fmtdef["NAME"].str.split("_", expand=True)[0]
40 |     fmtdef["NAME"] = fmtdef["NAME"].str.cat(map(str, fmtdef.index), sep="_")
41 |     fmtdef.ITEM_OFFSET = 8
42 |     fmtdef.ITEM_BYTES = 8
43 |     from pdr.loaders.queries import _fill_empty_byte_rows
44 |     from pdr.pd_utils import insert_sample_types_into_df
45 | 
46 |     fmtdef['BYTES'] = np.nan
47 |     fmtdef = _fill_empty_byte_rows(fmtdef)
48 |     fmtdef, dt = insert_sample_types_into_df(fmtdef, identifiers)
49 |     return fmtdef, dt
50 | 


--------------------------------------------------------------------------------
/pdr/formats/dawn.py:
--------------------------------------------------------------------------------
 1 | class DoesNotExistError(Exception):
 2 |     """"""
 3 |     pass
 4 | 
 5 | 
 6 | def dawn_history_hdu_exception():
 7 |     """
 8 |     filter out spurious HISTORY pointer
 9 | 
10 |     HITS
11 |     * dawn
12 |         * fc_edr_fit
13 |         * fc_rdr_fit
14 |     """
15 |     raise DoesNotExistError(
16 |         "Dawn FITS HISTORY extensions do not actually exist."
17 |     )
18 | 


--------------------------------------------------------------------------------
/pdr/formats/diviner.py:
--------------------------------------------------------------------------------
 1 | # because these can contain the value "NaN", combined with the fact that they
 2 | # are space-padded, pd.read_csv sometimes casts some columns to object,
 3 | # turning some of their values into strings and some into float, throwing
 4 | # warnings and making it obnoxious to work with them (users will randomly not
 5 | # be able to, e.g., add two columns together without a data cleaning step).
 6 | def diviner_l4_table_loader(fmtdef_dt, filename):
 7 |     """
 8 |     because these can contain the value "NaN", combined with the fact that they
 9 |     are space-padded, pd.read_csv sometimes casts some columns to object,
10 |     turning some of their values into strings and some into float, throwing
11 |     warnings and making it obnoxious to work with them (users will randomly not
12 |     be able to, e.g., add two columns together without a data cleaning step).
13 | 
14 |     HITS
15 |     * diviner
16 |         * l4
17 |     """
18 |     import numpy as np
19 |     import pandas as pd
20 | 
21 |     table = pd.DataFrame(
22 |         np.loadtxt(filename, delimiter=",", skiprows=1),
23 |         columns=[c for c in fmtdef_dt[0]["NAME"] if "PLACEHOLDER" not in c],
24 |     )
25 |     return table
26 | 


--------------------------------------------------------------------------------
/pdr/formats/epoxi.py:
--------------------------------------------------------------------------------
 1 | from pdr.loaders.queries import table_position
 2 | 
 3 | 
 4 | def cart_model_get_position(identifiers, block, target, name, start_byte):
 5 |     """
 6 |     The cartesian shape model's RECORD_BYTES and all three of the tables'
 7 |     ROW_BYTES should be 79 but the label lists them as 80.
 8 | 
 9 |     HITS
10 |     * epoxi
11 |         * shape
12 |     """
13 |     table_props = table_position(identifiers, block, target, name, start_byte)
14 |     row_bytes = 79
15 |     table_props["start"] = row_bytes * (target[1] - 1)
16 |     table_props["length"] = row_bytes * block["ROWS"]
17 |     return table_props
18 | 
19 | 
20 | def hriv_deconv_mask_start_byte(name, hdulist):
21 |     """
22 |     The EPOXI HRIV deconvolved radiance files have incorrect start byte
23 |     specifications for the MASK HDU.
24 | 
25 |     HITS
26 |     * epoxi
27 |         * hriv_deconvolved
28 |     """
29 |     if 'HEADER' in name:
30 |         return hdulist.fileinfo('MASK')['hdrLoc']
31 |     return hdulist.fileinfo('MASK')['datLoc']
32 | 


--------------------------------------------------------------------------------
/pdr/formats/ground.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | def mssso_cal_start_byte(name, hdulist):
 4 |     """
 5 |     A small subset of MSSSO CASPIR calibration images have the wrong start byte 
 6 |     for the IMAGE pointer in their PDS3 labels
 7 | 
 8 |     HITS
 9 |     * sl9_jupiter_impact
10 |         * mssso_cal
11 |     """
12 |     if 'HEADER' in name:
13 |         return 0
14 |     return hdulist.fileinfo(0)['datLoc']
15 | 
16 | 
17 | def wff_atm_special_block(data, name):
18 |     """
19 |     One WFF/ATM DEM image opens fine (BBMESA2X2), the other two (SCHOONER2X2 
20 |     and SEDAN2X2) have their LINES and LINE_SAMPLES values backwards.
21 | 
22 |     HITS
23 |     * wff_atm
24 |         * dem_img
25 |     """
26 |     block = data.metablock_(name)
27 | 
28 |     if data.metaget_("PRODUCT_ID").startswith("S"):
29 |         real_line_samples = block["LINES"]
30 |         real_lines = block["LINE_SAMPLES"]
31 | 
32 |         block["LINES"] = real_lines
33 |         block["LINE_SAMPLES"] = real_line_samples
34 |         return True, block
35 |     
36 |     return False, block
37 | 
38 | def ebrocc_geom_get_position(identifiers, block, target, name, start_byte):
39 |     """
40 |     ROW_BYTES = 45 in the labels, but it should be 47
41 | 
42 |     HITS
43 |     * ground_based
44 |         * ring_occ_1989_geometry
45 |     """
46 |     from pdr.loaders.queries import table_position
47 | 
48 |     table_props = table_position(identifiers, block, target, name, start_byte)
49 |     n_rows = block["ROWS"]
50 |     row_bytes = block["ROW_BYTES"] + 2
51 |     table_props["length"] = n_rows * row_bytes
52 |     return table_props
53 | 
54 | def trivial_header_loader():
55 |     """
56 |     The HEADER pointer is just the SPREADSHEET table's header row, and it does 
57 |     not open because "BYTES = UNK"
58 | 
59 |     HITS
60 |     * apollo
61 |         * BUG
62 |     """
63 |     warnings.warn(
64 |         f"This product's HEADER pointer is not currently supported."
65 |     )
66 |     return True
67 | 


--------------------------------------------------------------------------------
/pdr/formats/ihw.py:
--------------------------------------------------------------------------------
 1 | def curve_table_loader(filename, fmtdef_dt):
 2 |     """
 3 |     The labels do not always count column bytes correctly.
 4 | 
 5 |     HITS
 6 |     * ihw_isrn
 7 |         * curve
 8 |     """
 9 |     import pandas as pd
10 |     names = [c for c in fmtdef_dt[0].NAME if "PLACEHOLDER" not in c]
11 |     table = pd.read_csv(filename, header=None, sep=r"\s+")
12 |     assert len(table.columns) == len(names), "mismatched column count"
13 |     table.columns = names
14 |     return table
15 | 
16 | 
17 | def add_newlines_table_loader(fmtdef_dt, block, filename, start_byte):
18 |     """
19 |     Some Halley V1.0 tables (MSN, PPN, and IRSN datasets) are missing
20 |     newline characters between rows. (Also applies to some ICE ephemeris tables)
21 | 
22 |     HITS
23 |     * ihw
24 |         * ms_radar
25 |         * ms_vis
26 |     * ice
27 |         * ephem_tbl
28 |     """
29 |     from io import StringIO
30 |     import pandas as pd
31 |     from pdr.utils import head_file
32 | 
33 |     with head_file(filename) as f:
34 |         f.read(start_byte)
35 |         newlines_added = bytearray()
36 |         for row in range(0, block["ROWS"]):
37 |             bytes_ = f.read(block["ROW_BYTES"])
38 |             newlines_added += bytes_ + b"\n" # Add a newline to each row
39 |     string_buffer = StringIO(newlines_added.decode())
40 | 
41 |     # Adapted from _interpret_as_ascii()
42 |     fmtdef, dt = fmtdef_dt
43 |     colspecs = []
44 |     for record in fmtdef.to_dict("records"):
45 |         col_length = int(record["BYTES"])
46 |         colspecs.append((record["SB_OFFSET"], record["SB_OFFSET"] + col_length))
47 |     string_buffer.seek(0)
48 |     table = pd.read_fwf(string_buffer, header=None, colspecs=colspecs)
49 |     string_buffer.close()
50 |     table.columns = fmtdef.NAME.tolist()
51 |     table = table.drop([k for k in table.keys() if "PLACEHOLDER" in k], axis=1)
52 |     return table
53 | 
54 | 
55 | def get_special_block(data, name):
56 |     """
57 |     A handful of MSN Radar tables have column names that were not reading
58 |     correctly and were ending up as "NaN". Which also caused an AttributeError 
59 |     when running ix check.
60 | 
61 |     HITS
62 |     * ihw
63 |         * ms_radar
64 |     """
65 |     block = data.metablock_(name)
66 |     for item in iter(block.items()):
67 |         if "COLUMN" in item:
68 |             if item[1]["START_BYTE"] == 17 and "NAME" not in item[1]:
69 |                 item[1].add("NAME", ">=1SEC")
70 |             if item[1]["START_BYTE"] == 21 and "NAME" not in item[1]:
71 |                 item[1].add("NAME", ">=8SEC")
72 |     return block
73 | 
74 | 
75 | def get_structure(block, name, filename, data, identifiers):
76 |     """
77 |     SSN products with a SPECTRUM pointer were opening with an incorrect
78 |     column name.
79 | 
80 |     HITS
81 |     * ihw
82 |         * spec_hal_cal
83 |     """
84 |     from pdr.loaders.queries import read_table_structure
85 |     from pdr.pd_utils import insert_sample_types_into_df
86 |     
87 |     fmtdef = read_table_structure(
88 |         block, name, filename, data, identifiers
89 |     )
90 |     fmtdef.at[0, "NAME"] = fmtdef.at[0, "COLUMN_NAME"]
91 |     
92 |     fmtdef, dt = insert_sample_types_into_df(fmtdef, identifiers)
93 |     return fmtdef, dt
94 | 


--------------------------------------------------------------------------------
/pdr/formats/iue.py:
--------------------------------------------------------------------------------
 1 | def get_special_block(data, name):
 2 |     """
 3 |     A subset of the IUE resampled SSI/LSI comet images have a typo in their 
 4 |     labels: the QUALITY_IMAGE pointer name does not match its OBJECT name.
 5 |     
 6 |     HITS
 7 |     * iue
 8 |         * comet_image
 9 |     """
10 |     if data.metablock_(name) is not None:
11 |         return False, None
12 |     return True, data.metablock_("QUALITY_QUALITY_IMAGE")
13 | 


--------------------------------------------------------------------------------
/pdr/formats/juno.py:
--------------------------------------------------------------------------------
 1 | def jiram_rdr_sample_type():
 2 |     """
 3 |     JIRAM RDRs, both images and tables, are labeled as MSB but
 4 |     are actually LSB.
 5 | 
 6 |     HITS
 7 |     * juno_jiram
 8 |         * IMG_RDR
 9 |         * SPE_RDR
10 |     """
11 |     return "<f"
12 | 
13 | 
14 | # noinspection PyProtectedMember
15 | def waves_burst_fix_table_names(data, name):
16 |     """
17 |     WAVES burst files that include frequency offset tables have mismatched
18 |     pointer/object names.
19 | 
20 |     HITS
21 |     * juno_waves
22 |         * CDR_BURST
23 |     """
24 |     if name == "DATA_TABLE":
25 |         object_name = "TABLE"
26 |     elif name == "FREQ_OFFSET_TABLE":
27 |         object_name = "DATA_TABLE"
28 |     block = data.metablock_(object_name)
29 |     return block
30 | 
31 | 
32 | def bit_start_find_and_fix(
33 |     list_of_pvl_objects_for_bit_columns, start_bit_list
34 | ):
35 |     """
36 |     HITS
37 |     * juno_jiram
38 |         * LOG_IMG_RDR
39 |         * LOG_SPE_RDR
40 |         * LOG_IMG_EDR
41 |         * LOG_SPE_EDR
42 |     * mgs_tes
43 |         * ATM
44 |         * BOL
45 |         * OBS
46 |         * RAD_tab
47 |     * pvo
48 |         * pos_sedr
49 |     """
50 |     if (
51 |         list_of_pvl_objects_for_bit_columns[-1].get("NAME")
52 |         == "NADIR_OFFSET_SIGN"
53 |     ):
54 |         special_start_bit_list = start_bit_list
55 |         special_start_bit_list[-1] = 16
56 |         return True, special_start_bit_list
57 |     return False, None
58 | 


--------------------------------------------------------------------------------
/pdr/formats/lp.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def ancillary_table_loader(fn, fmtdef_dt):
 3 |     """
 4 |     The OUTAGES.TAB tables were being read as comma separated, which would be 
 5 |     fine except they have a missing comma between columns somewhere around row 
 6 |     300 that causes that row to read wrong
 7 | 
 8 |     HITS
 9 |     * lunar_prospector
10 |         * er_ancillary (partial)
11 |         * mag_ancillary (partial)
12 |         * eng_ancillary
13 |     """
14 |     from pdr.utils import decompress
15 |     from io import StringIO
16 |     from pdr.loaders.table import _read_fwf_with_colspecs
17 | 
18 |     with decompress(fn) as f:
19 |         stringbuf = StringIO(f.read().decode())
20 |     stringbuf.seek(0)
21 | 
22 |     fmtdef, dt = fmtdef_dt
23 |     table = _read_fwf_with_colspecs(fmtdef, stringbuf)
24 | 
25 |     table = table.iloc[:, 0:6]
26 |     table.columns = [
27 |         f for f in fmtdef['NAME'] if not f.startswith('PLACEHOLDER')
28 |     ]
29 |     return table
30 | 


--------------------------------------------------------------------------------
/pdr/formats/lro.py:
--------------------------------------------------------------------------------
  1 | from pdr.loaders.queries import table_position
  2 | 
  3 | 
  4 | # def lamp_rdr_histogram_header_loader(data):
  5 | #     # CAL_HISTOGRAM_DATA_HEADER pointer is an ASCII FITS header, but the
  6 | #     # 'histogram' keyword tries to send it to data.read_histogram
  7 | #     return data.read_header
  8 | 
  9 | 
 10 | # TODO: this doesn't fully solve the problem; data.show() still throws errors
 11 | # that cause ix check to crash (refers to original special case in /develop
 12 | # -- this is a rewrite)
 13 | def lamp_rdr_histogram_image_loader(data):
 14 |     """Products can have multiple unique pointers that are
 15 |     defined by a single image object (CAL_HISTOGRAM_DATA_IMAGE)."""
 16 |     object_name = "CAL_HISTOGRAM_DATA_IMAGE"
 17 |     block = data.metablock_(object_name)
 18 |     return block
 19 | 
 20 | 
 21 | def get_crater_offset():
 22 |     """
 23 |     lro crater edr products have a header table with 64 bytes per row, the
 24 |     second table start byte is given in rows (also the wrong row) but had a
 25 |     different number of row bytes
 26 | 
 27 |     HITS
 28 |     * lro_crater
 29 |         * edr_sec
 30 |         * edr_hk
 31 |     """
 32 |     return True, 64
 33 | 
 34 | 
 35 | def crater_bit_col_sample_type(base_samp_info):
 36 |     """
 37 |     HITS
 38 |     * lro_crater
 39 |         * edr_sec
 40 |         * edr_hk
 41 |     """
 42 |     from pdr.datatypes import sample_types
 43 | 
 44 |     sample_type = base_samp_info["SAMPLE_TYPE"]
 45 |     sample_bytes = base_samp_info["BYTES_PER_PIXEL"]
 46 |     if "BIT_STRING" == sample_type:
 47 |         sample_type = "MSB_BIT_STRING"
 48 |         return True, sample_types(
 49 |             sample_type, int(sample_bytes), for_numpy=True
 50 |         )
 51 |     if "N/A" in sample_type:
 52 |         sample_type = "MSB_UNSIGNED_INTEGER"
 53 |         return True, sample_types(
 54 |             sample_type, int(sample_bytes), for_numpy=True
 55 |         )
 56 |     return False, None
 57 | 
 58 | 
 59 | def rss_get_position(identifiers, block, target, name, start_byte):
 60 |     """
 61 |     The RSS WEA products' WEAREC_TABLE undercounts ROW_BYTES by 1
 62 | 
 63 |     HITS
 64 |     * lro_rss
 65 |         * wea
 66 |     """
 67 |     table_props = table_position(identifiers, block, target, name, start_byte)
 68 |     n_records = block["ROWS"]
 69 |     record_bytes = block["ROW_BYTES"] + 1
 70 |     length = n_records * record_bytes
 71 |     table_props["length"] = length
 72 |     return True, table_props
 73 | 
 74 | 
 75 | def mini_rf_image_loader(data, name):
 76 |     """
 77 |     one of the mosaic labels has the wrong values for lines/line_samples
 78 | 
 79 |     HITS
 80 |     * lro_mini_rf
 81 |         * mosaic
 82 |     """
 83 |     block = data.metablock_(name)
 84 |     block["LINES"] = 5760
 85 |     block["LINE_SAMPLES"] = 11520
 86 |     return block
 87 | 
 88 | 
 89 | def mini_rf_spreadsheet_loader(filename, fmtdef_dt):
 90 |     """
 91 |     Mini-RF housekeeping CSVs have variable-width columns but the labels treat 
 92 |     them as fixed-width. 
 93 | 
 94 |     HITS
 95 |     * lro_mini_rf
 96 |         * housekeeping
 97 |     """
 98 |     import pandas as pd
 99 | 
100 |     fmtdef, dt = fmtdef_dt
101 |     # The names argument is used here to explicitly set the number of columns
102 |     # to 3. Otherwise the first row (which only has 1 column) confuses read_csv
103 |     table = pd.read_csv(filename, header=None, sep=",",
104 |                         names = ("POINT_NAME", "VALUE", "UNITS"))
105 |     assert len(table.columns) == len(fmtdef.NAME.tolist())
106 |     table.columns = fmtdef.NAME.tolist()
107 |     return table
108 | 
109 | 
110 | def wea_table_loader(filename, fmtdef_dt):
111 |     """
112 |     Some, but not all, wea files have more bytes than the labels define per row.
113 | 
114 |     HITS
115 |     * lro_rss
116 |         * wea
117 |     """
118 |     import pandas as pd
119 | 
120 |     fmtdef, dt = fmtdef_dt
121 | 
122 |     table = pd.read_csv(filename, skiprows=1, header=None, sep=r':|\s+',
123 |                         engine='python')
124 |     table.columns = [
125 |         f for f in fmtdef['NAME'] if not f.startswith('PLACEHOLDER')
126 |     ]
127 |     return table
128 | 
129 | 
130 | class DoesNotExistError(Exception):
131 |     """"""
132 |     pass
133 | 
134 | def lamp_edr_hdu_exceptions(name, hdulist):
135 |     """
136 |     Sometimes all the LAMP EDR table pointers exist, sometimes they aren't 
137 |     actually there.
138 | 
139 |     HITS
140 |     * lro_lamp
141 |         * edr
142 |     """
143 |     if name == "ACQUISITION_LIST_TABLE":
144 |         extname = "Acquisition List"
145 |     elif name == "FRAME_DATA_TABLE":
146 |         extname = "Raw Frame Data"
147 |     elif name == "CALCULATED_COUNTRATE_TABLE":
148 |         extname = "Calculated Countrate"
149 |     elif name == "LTS_DATA_TABLE":
150 |         extname = "LTS Data"
151 |     elif name == "HOUSEKEEPING_TABLE":
152 |         extname = "Housekeeping Data"
153 |     else:
154 |         # Nothing should hit this, but it's here in case there is a rogue 
155 |         # product with a [*]_TABLE pointer missed above
156 |         return False, None
157 |     
158 |     if hdulist.fileinfo(extname)['datSpan'] == 0:
159 |         raise DoesNotExistError(
160 |             f"The {name}'s length is zero; the table does not actually exist."
161 |         )
162 |     return False, None
163 | 
164 | def lamp_rdr_hdu_start_byte(name, hdulist):
165 |     """
166 |     This special case raises an error if a pointer's data doesn't actually 
167 |     exist, and returns the correct start byte if it does.
168 | 
169 |     HITS
170 |     * lro_lamp
171 |         * rdr
172 |     """
173 |     if "ACQUISITION_LIST" in name:
174 |         extname = "Acquisition List"
175 |     elif "CAL_PIXELLIST_DATA" in name:
176 |         extname = "Calibrated Pixel List Mode Data"
177 |     elif "ANCILLARY_DATA" in name:
178 |         extname = "Ancillary Data"
179 |     elif "CAL_HISTOGRAM_" in name:
180 |         # The multiple CAL_HISTOGRAM_[...]_IMAGE pointers all point at the same 
181 |         # FITS HDU (each pointer illegally represents one image in the cube).
182 |         extname = "Calibrated Histogram Mode Data"
183 |     elif "CAL_CALCULATED_COUNTRATE" in name:
184 |         extname = "Calculated Countrate"
185 |         try:
186 |             # Check to see if this is the correct 'EXTNAM' in the fits HDU
187 |             hdulist.fileinfo(extname)
188 |         except:
189 |             # Sometimes this pointer refers to a different HDU extension name
190 |             extname = "Reduced Count Rate"
191 |     elif "LTS_DATA" in name:
192 |         extname = "LTS Data"
193 |     elif "HOUSEKEEPING" in name:
194 |         extname = "Housekeeping Data"
195 |     elif "WAVELENGTH_LOOKUP" in name:
196 |         extname = "Wavelength Lookup Image"
197 |     else:
198 |         # The CAL_SPECTRAL_IMAGE_* pointers open fine
199 |         return False, None
200 | 
201 |     if 'HEADER' in name:
202 |         return True, hdulist.fileinfo(extname)['hdrLoc']
203 |     if hdulist.fileinfo(extname)['datSpan'] == 0:
204 |         raise DoesNotExistError(
205 |             f"The {name}'s length is zero; the data object does not actually exist."
206 |         )
207 |     return True, hdulist.fileinfo(extname)['datLoc']
208 | 
209 | 


--------------------------------------------------------------------------------
/pdr/formats/lroc.py:
--------------------------------------------------------------------------------
 1 | def lroc_edr_sample_type():
 2 |     """
 3 |     LROC EDRs specify signed integers but appear to be unsigned.
 4 | 
 5 |     HITS
 6 |     * lroc
 7 |         * NAC_EDR
 8 |         * WAC_EDR
 9 |     """
10 |     return ">B"
11 | 


--------------------------------------------------------------------------------
/pdr/formats/mariner.py:
--------------------------------------------------------------------------------
 1 | def get_special_block(data, name):
 2 |     """
 3 |     Mariner 9 IRIS tables have 316 ROW_PREFIX_BYTES followed by 1 column
 4 |     with 1500 ITEMS. The column's START_BYTE = 317, but it should be 1.
 5 | 
 6 |     HITS
 7 |     * mariner
 8 |         * iris
 9 |     """
10 |     block = data.metablock_(name)
11 |     block["COLUMN"]["START_BYTE"] = 1
12 |     return block
13 | 


--------------------------------------------------------------------------------
/pdr/formats/mer.py:
--------------------------------------------------------------------------------
 1 | def rss_spreadsheet_loader(filename, fmtdef_dt):
 2 |     """
 3 |     The RSS UHFD labels have the wrong ROWS value for most products.
 4 | 
 5 |     HITS
 6 |     * mer_rss
 7 |         *uhfd
 8 |     """
 9 |     import pandas as pd
10 | 
11 |     fmtdef, dt = fmtdef_dt
12 |     table = pd.read_csv(filename, header=None, sep=",")
13 |     assert len(table.columns) == len(fmtdef.NAME.tolist())
14 |     table.columns = fmtdef.NAME.tolist()
15 |     return table
16 | 


--------------------------------------------------------------------------------
/pdr/formats/mex.py:
--------------------------------------------------------------------------------
  1 | from pdr.loaders.queries import table_position
  2 | 
  3 | 
  4 | def marsis_get_position(identifiers, block, target, name, start_byte):
  5 |     """
  6 |     HITS
  7 |     * mex_marsis
  8 |         * TEC_EDR
  9 |     """
 10 |     table_props = table_position(identifiers, block, target, name, start_byte)
 11 |     n_records = identifiers["FILE_RECORDS"]
 12 |     record_bytes = 143
 13 |     table_props["length"] = n_records * record_bytes
 14 |     return table_props
 15 | 
 16 | 
 17 | def aspera_table_loader(filename, fmtdef_dt):
 18 |     """
 19 |     The ASPERA IMA EDRs are ascii csv tables containing 2 data types: SENSOR
 20 |     and MODE. The VALUES column is repeated and has 96 items total. In the MODE
 21 |     rows only the first VALUES item contains data, and should be followed by 95
 22 |     'missing' items.
 23 |     In reality these rows have 96 empty/missing items because of an extra
 24 |     comma. This special case cuts off the extra column during the pd.read_csv()
 25 |     call.
 26 | 
 27 |     HITS
 28 |     * mex_aspera
 29 |         * ima
 30 |     """
 31 |     import pandas as pd
 32 |     
 33 |     fmtdef, dt = fmtdef_dt
 34 |     table = pd.read_csv(
 35 |         filename, header=None, usecols=range(len(fmtdef.NAME.tolist()))
 36 |     )
 37 |     assert len(table.columns) == len(fmtdef.NAME.tolist())
 38 |     table.columns = fmtdef.NAME.tolist()
 39 |     return table
 40 | 
 41 | 
 42 | def aspera_ima_ddr_structure(block, name, filename, data, identifiers):
 43 |     """
 44 |     The ASPERA IMA DDR table opens correctly as written in its label, but
 45 |     the BYTES values for columns 3 and 4 are wrong.
 46 | 
 47 |     HITS
 48 |     * mex_aspera
 49 |         * ima_ddr
 50 |     """
 51 |     from pdr.loaders.queries import read_table_structure
 52 | 
 53 |     fmtdef = read_table_structure(
 54 |         block, name, filename, data, identifiers
 55 |     )
 56 |     fmtdef.at[2, "BYTES"] = 12
 57 |     fmtdef.at[3, "BYTES"] = 12
 58 |     return fmtdef, None
 59 | 
 60 | 
 61 | def pfs_edr_special_block(data, name):
 62 |     """
 63 |     The PFS EDRs have a few errors in their labels prior to orbit 8945, after
 64 |     which they are corrected.
 65 | 
 66 |     HITS
 67 |     * mex_marsis
 68 |         * raw_lwc
 69 |         * raw_swc
 70 |         * cal_lwc
 71 |         * cal_swc
 72 |         * hk_early_mission
 73 |         * orb001_lwc
 74 |         * orb001_swc
 75 |     """
 76 |     block = data.metablock_(name)
 77 |     orbit_number = data.metaget_("ORBIT_NUMBER")
 78 |     
 79 |     if orbit_number == "N/A" or int(orbit_number) < 8945:
 80 |         # Fixes the number of rows in the table by replacing ROWS with
 81 |         # FILE_RECORDS.
 82 |         block["ROWS"] = data.metaget_("FILE_RECORDS")
 83 |         # Replaces the time columns' DATA_TYPEs with the correct type based on
 84 |         # products created later in the mission.
 85 |         for item in iter(block.items()):
 86 |             if "COLUMN" in item:
 87 |                 if item[1]["NAME"] == "OBT OBSERVATION TIME":
 88 |                     item[1]["DATA_TYPE"] = "PC_REAL"
 89 |                 if item[1]["NAME"] == "SCET OBSERVATION TIME":
 90 |                     item[1]["DATA_TYPE"] = "PC_UNSIGNED_INTEGER"
 91 |         return True, block
 92 |     return False, block
 93 | 
 94 | 
 95 | def mrs_ddr_atmo_position(identifiers, block, target, name, start_byte):
 96 |     """
 97 |     The MRS derived atmosphere profiles were opening with data cut off at the
 98 |     ends of the tables. Recalculating the table length with ROW_BYTES = 278
 99 |     instead of 276 fixes it.
100 | 
101 |     HITS
102 |     * mex_mrs
103 |         * occ_atmo
104 |     """
105 |     table_props = table_position(identifiers, block, target, name, start_byte)
106 |     row_bytes = 278
107 |     table_props["length"] = row_bytes * block["ROWS"]
108 |     return table_props
109 | 
110 | 
111 | def mrs_get_position(identifiers, block, target, name, start_byte):
112 |     """
113 |     MRS ICL level 1b DOPPLER_TABLEs and ODF level 2 RANGING_TABLEs undercount
114 |     ROW_BYTES by 1.
115 | 
116 |     HITS
117 |     * mex_mrs
118 |         * lvl_1b_icl (partial)
119 |         * lvl_2_odf (partial)
120 |     """
121 |     table_props = table_position(identifiers, block, target, name, start_byte)
122 |     row_bytes = block["ROW_BYTES"] + 1
123 |     table_props["length"] = row_bytes * block["ROWS"]
124 |     return table_props
125 | 
126 | 
127 | def mrs_l1b_odf_table_loader(filename, fmtdef_dt):
128 |     """
129 |     MRS level 1b ODF labels have variable and sometimes incorrect ROW_BYTES
130 |     values.
131 | 
132 |     HITS
133 |     * mex_mrs
134 |         * lvl_1b_odf
135 |     """
136 |     import pandas as pd
137 | 
138 |     fmtdef, dt = fmtdef_dt
139 |     table = pd.read_csv(filename, header=None, sep=r"\s+")
140 |     table.columns = [
141 |         f for f in fmtdef['NAME'] if not f.startswith('PLACEHOLDER')
142 |     ]
143 |     return table
144 | 
145 | 
146 | def mrs_l1b_odf_rmp_redirect(data):
147 |     """
148 |     RMP tables are a subset of MRS level 1b ODFs that were not opening because
149 |     their pointer and object names do not match.
150 |     
151 |     HITS:
152 |     * mex_mrs
153 |         * lvl_1b_odf (partial)
154 |     """
155 |     object_name = "RAMP_TABLE"
156 |     block = data.metablock_(object_name)
157 |     return block
158 | 
159 | def vmc_rdr_hdu_selection(name, hdulist):
160 |     """
161 |     The VMC RDRs have 1 IMAGE pointer and 2 IMAGE objects. From the volume's 
162 |     readme: "The first layer includes the calibrated values, and the second 
163 |     layer includes the raw values." It is unclear whether or not the 'second 
164 |     layer' is a copy of the EDR image or if intermediate calibration steps 
165 |     have been applied to it.
166 |     Assuming the single band image is akin to the EDRs, this special case 
167 |     returns the multiband calibrated image.
168 | 
169 |     HITS
170 |     * mex_vmc
171 |         * rdr
172 |     """
173 | 
174 |     return hdulist.fileinfo(1)['datLoc']
175 | 


--------------------------------------------------------------------------------
/pdr/formats/mgn.py:
--------------------------------------------------------------------------------
 1 | from io import StringIO
 2 | 
 3 | from pdr.utils import head_file
 4 | 
 5 | 
 6 | def geom_table_loader(filename, fmtdef_dt):
 7 |     """
 8 |     The Magellan radar system geometry tables include null bytes between rows.
 9 | 
10 |     HITS
11 |     * gal_nims
12 |         * impact
13 |     * mgn_image
14 |         * midr_tables
15 |     """
16 |     import pandas as pd
17 |     from pdr.utils import head_file
18 | 
19 |     fmtdef, dt = fmtdef_dt
20 |     with head_file(filename) as buf:
21 |         bytes_ = buf.read().replace(b"\x00", b"")
22 |     string_buffer = StringIO(bytes_.decode())
23 |     string_buffer.seek(0)
24 |     table = pd.read_csv(string_buffer, header=None)
25 |     names = [n for n in fmtdef['NAME'] if 'PLACEHOLDER' not in n]
26 |     assert len(table.columns) == len(names), 'column name mismatch'
27 |     string_buffer.close()
28 |     table.columns = names
29 |     return table
30 | 
31 | 
32 | def orbit_table_in_img_loader():
33 |     """
34 |     HITS
35 |     * mgn_post_mission
36 |         * fmap
37 |         * fmap_browse
38 |     """
39 |     return True
40 | 
41 | 
42 | def get_fn(data):
43 |     """
44 |     HITS
45 |     * mgn_post_mission
46 |         * fmap
47 |         * fmap_browse
48 |     """
49 |     target = data.filename
50 |     return True, target
51 | 
52 | 
53 | def occultation_loader(identifiers, fmtdef_dt, block, filename):
54 |     """
55 |     Checks end of each row for newline character. If missing, removes
56 |     extraneous newline from middle of the row and adjusts for the extra byte.
57 |     Adapted from _interpret_as_ascii()
58 | 
59 |     HITS
60 |     * mgn_occult
61 |         * ddr
62 |     """
63 |     import pandas as pd
64 | 
65 |     fmtdef, dt = fmtdef_dt
66 |     record_length = block["ROW_BYTES"]
67 | 
68 |     # Checks end of each row for newline character. If missing, removes extraneous
69 |     # newline from middle of the row and adjusts for the extra byte.
70 |     with head_file(filename) as f:
71 |         processed = bytearray()
72 |         for row in range(0, identifiers["FILE_RECORDS"]):
73 |             bytes_ = f.read(record_length)
74 |             if not bytes_.endswith(b"\n"):
75 |                 new_bytes_ = bytes_.replace(b"\n", b"") + f.read(1)
76 |                 processed += new_bytes_
77 |             else:
78 |                 processed += bytes_
79 |     string_buffer = StringIO(processed.decode())
80 |     # adapted from _interpret_as_ascii()
81 |     colspecs = []
82 |     position_records = fmtdef.to_dict("records")
83 |     for record in position_records:
84 |         col_length = record["BYTES"]
85 |         colspecs.append((record["SB_OFFSET"], record["SB_OFFSET"] + col_length))
86 |     string_buffer.seek(0)
87 |     table = pd.read_fwf(string_buffer, header=None, colspecs=colspecs)
88 |     string_buffer.close()
89 | 
90 |     table.columns = fmtdef.NAME.tolist()
91 |     return table.drop("PLACEHOLDER_0", axis=1)
92 | 
93 | 
94 | def gvanf_sample_type():
95 |     return ">B"
96 | 


--------------------------------------------------------------------------------
/pdr/formats/mgs.py:
--------------------------------------------------------------------------------
 1 | from pdr.loaders.queries import read_table_structure
 2 | 
 3 | 
 4 | def get_odf_structure(block, name, filename, data, identifiers):
 5 |     """"""
 6 |     from pdr.pd_utils import insert_sample_types_into_df
 7 |     fmtdef = read_table_structure(
 8 |         block, name, filename, data, identifiers
 9 |     )
10 |     fmtdef.at[7, "BYTES"] = 2
11 |     fmtdef[f"ROW_BYTES"] = block.get(f"ROW_BYTES")
12 | 
13 |     fmtdef, dt = insert_sample_types_into_df(fmtdef, identifiers)
14 |     return fmtdef, dt
15 | 
16 | 
17 | def get_ecs_structure(block, name, filename, data, identifiers):
18 |     """
19 |     HITS
20 |     * mgs_rss_raw
21 |         * ecs
22 |     """
23 |     from pdr.pd_utils import insert_sample_types_into_df, compute_offsets
24 |     fmtdef = read_table_structure(
25 |         block, name, filename, data, identifiers
26 |     )
27 |     fmtdef.at[5, "START_BYTE"] = 80
28 |     fmtdef[f"ROW_BYTES"] = block.get(f"ROW_BYTES")
29 | 
30 |     fmtdef = compute_offsets(fmtdef)
31 |     fmtdef, dt = insert_sample_types_into_df(fmtdef, identifiers)
32 |     return fmtdef, dt
33 | 
34 | 
35 | def mola_pedr_special_block(data, name, identifiers):
36 |     """
37 |     Fix for FILE_RECORDS = "UNK" and ROWS = "UNK" in the MOLA PEDR labels.
38 |     This special case calculates ROWS using the count_from_bottom_of_file()
39 |     logic in reverse.
40 | 
41 |     HITS
42 |     * mgs_mola
43 |         * pedr
44 |     * mgs_sampler
45 |         * pedr
46 |     """
47 |     import os
48 |     from pathlib import Path
49 |     from pdr.loaders.queries import data_start_byte
50 | 
51 |     block = data.metablock_(name)
52 |     target = data.metaget_("^"+name)
53 |     start_byte = data_start_byte(identifiers, block, target, data.filename)
54 | 
55 |     table_bytes = os.path.getsize(Path(data.filename)) - start_byte
56 |     block["ROWS"] = int(table_bytes / block["ROW_BYTES"])
57 | 
58 |     return block
59 | 


--------------------------------------------------------------------------------
/pdr/formats/mro.py:
--------------------------------------------------------------------------------
 1 | from io import StringIO
 2 | 
 3 | from pdr.loaders.queries import read_table_structure
 4 | from pdr.utils import head_file
 5 | 
 6 | 
 7 | def get_structure(block, name, filename, data, identifiers):
 8 |     """
 9 |     The first column in the MCS (EDR/RDR/DDR) format files are just named "1"
10 |     which is being read as 'int'. This was causing problems in read_table
11 |     during the table.drop call
12 | 
13 |     HITS
14 |     * mro
15 |         * mcs_edr
16 |         * mcs_rdr
17 |     """
18 |     fmtdef = read_table_structure(
19 |         block, name, filename, data, identifiers
20 |     )
21 |     fmtdef["NAME"] = fmtdef["NAME"].values.astype(str)
22 |     return fmtdef, None
23 | 
24 | 
25 | def mcs_ddr_table_loader(fmtdef_dt, block, filename, start_byte):
26 |     """Reads each row of the table and removes extra newline characters.
27 |     Adapted from _interpret_as_ascii()."""
28 |     with head_file(filename) as f:
29 |         f.read(start_byte)
30 |         newlines_removed = bytearray()
31 |         for row in range(0, block["ROWS"]):
32 |             bytes_ = f.read(block["ROW_BYTES"])
33 |             newlines_removed += bytes_.replace(b"\n", b"") + b"\n"
34 |     string_buffer = StringIO(newlines_removed.decode())
35 |     import pandas as pd
36 |     from pdr.pd_utils import compute_offsets
37 | 
38 |     # Adapted from _interpret_as_ascii()
39 |     fmtdef, dt = fmtdef_dt
40 |     colspecs = []
41 |     position_records = compute_offsets(fmtdef).to_dict("records")
42 |     for record in position_records:
43 |         col_length = record["BYTES"]
44 |         colspecs.append((record["SB_OFFSET"], record["SB_OFFSET"] + col_length))
45 |     string_buffer.seek(0)
46 |     table = pd.read_fwf(string_buffer, header=None, colspecs=colspecs)
47 |     string_buffer.close()
48 | 
49 |     table.columns = fmtdef.NAME.tolist()
50 |     return table
51 | 
52 | def crism_mrdr_ancill_position(identifiers, block, target, name, start_byte):
53 |     """
54 |     ROW_BYTES = 14 in the labels, but it should be 16 (the RECORD_BYTES)
55 | 
56 |     HITS
57 |     * crism
58 |         * ancil_mrdr
59 |     """
60 |     from pdr.loaders.queries import table_position
61 |     
62 |     table_props = table_position(identifiers, block, target, name, start_byte)
63 |     n_rows = block["ROWS"]
64 |     row_bytes = identifiers["RECORD_BYTES"]
65 |     table_props["length"] = n_rows * row_bytes
66 |     return table_props
67 | 
68 | 


--------------------------------------------------------------------------------
/pdr/formats/msl_apxs.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | 
 4 | def table_loader(pointer):
 5 |     """
 6 |     we don't support these right now, or maybe ever
 7 | 
 8 |     HITS
 9 |     * msl_apxs
10 |         * APXS_SCIENCE_EDR
11 |     """
12 |     warnings.warn(
13 |         f"The MSL APXS {pointer} tables are not currently supported."
14 |     )
15 |     return True
16 | 
17 | def trivial_header_loader():
18 |     """
19 |     The HEADER pointer is just the SPREADSHEET table's header row, and it does 
20 |     not open because "BYTES = UNK"
21 | 
22 |     HITS
23 |     * msl_apxs
24 |         * APXS_OXIDE_RDR
25 |         * APXS_SPECTRUM_RDR
26 |     """
27 |     warnings.warn(
28 |         f"The MSL APXS RDR HEADER pointers are not currently supported."
29 |     )
30 |     return True
31 | 


--------------------------------------------------------------------------------
/pdr/formats/msl_ccam.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | 
 4 | def image_reply_table_loader():
 5 |     """
 6 |     HITS
 7 |     * msl_ccam
 8 |         * CCAM_RMI_EDR
 9 |     """
10 |     warnings.warn(
11 |         "MSL ChemCam IMAGE_REPLY binary tables are not supported "
12 |         "due to a formatting error in label files."
13 |     )
14 |     return True
15 | 


--------------------------------------------------------------------------------
/pdr/formats/msl_cmn.py:
--------------------------------------------------------------------------------
 1 | def spreadsheet_loader(filename):
 2 |     """
 3 |     HITS
 4 |     * msl_cmn
 5 |         * DIFFRACTION_ALL_RDR
 6 |         * ENERGY_SINGLE_RDR
 7 |         * MINERAL_TABLES
 8 |     * msl_sam
 9 |         * l0_qms
10 |         * l1a_qms
11 |         * l1b_qms
12 |     """
13 |     import pandas as pd
14 |     return pd.read_csv(filename)
15 | 
16 | 
17 | def trivial_header_loader():
18 |     """
19 |     HITS
20 |     * msl_cmn
21 |         * DIFFRACTION_ALL_RDR
22 |         * ENERGY_SINGLE_RDR
23 |         * MINERAL_TABLES
24 |     * msl_sam
25 |         * l0_hk
26 |         * l0_qms
27 |         * l0_gc
28 |         * l0_tls
29 |         * l1a_hk
30 |         * l1a_qms
31 |         * l1a_gc
32 |         * l1a_tls
33 |         * l1b_qms
34 |         * l1b_gc
35 |         * l2_qms
36 |         * l2_gc
37 |         * l2_tls
38 |     """
39 |     return True
40 | 
41 | 
42 | def fix_mangled_name(data):
43 |     """
44 |     HITS
45 |     * msl_cmn
46 |         * HOUSEKEEPING
47 |     """
48 |     object_name = "CHMN_HSKN_HEADER_TABLE"
49 |     block = data.metablock_(object_name)
50 |     return block
51 | 
52 | 
53 | def get_offset(object_name):
54 |     """
55 |     incorrectly specifies object length rather than start byte
56 | 
57 |     HITS
58 |     * msl_cmn
59 |         * DIFFRACTION_ALL_RDR
60 |         * ENERGY_SINGLE_RDR
61 |         * MINERAL_TABLES
62 |         * CCD_FRAME
63 |         * DIFFRACTION_SINGLE
64 |         * DIFFRACTION_SPLIT
65 |         * DIFFRACTION_ALL
66 |         * ENERGY_ALL
67 |         * ENERGY_SINGLE
68 |         * ENERGY_SPLIT
69 |         * HOUSKEEPING
70 |         * TRANSMIT_RAW
71 |     """
72 |     if object_name == "HISTOGRAM":
73 |         return True, 300
74 |     if object_name == "CHMN_HSK_HEADER_TABLE":
75 |         return True, 0
76 |     return False, None
77 | 


--------------------------------------------------------------------------------
/pdr/formats/msl_places.py:
--------------------------------------------------------------------------------
 1 | def spreadsheet_loader(filename, fmtdef_dt):
 2 |     """
 3 |     HITS
 4 |     * msl_places
 5 |         * localizations
 6 |     """
 7 |     import pandas as pd
 8 | 
 9 |     fmtdef, dt = fmtdef_dt
10 |     table = pd.read_csv(filename, sep=",")
11 |     assert len(table.columns) == len(fmtdef.NAME.tolist())
12 |     table.columns = fmtdef.NAME.tolist()
13 |     return table
14 | 


--------------------------------------------------------------------------------
/pdr/formats/msl_rems.py:
--------------------------------------------------------------------------------
 1 | def edr_table_loader(filename, fmtdef_dt, block, start_byte):
 2 |     """
 3 |     The ROW_SUFFIX_BYTES are either miscounted by a few bytes, or we don't 
 4 |     handle them correctly. There appears to be a related issue with the tables' 
 5 |     start bytes as well. This special case bypasses both issues.
 6 | 
 7 |      HITS
 8 |     * msl_rems
 9 |         * edr_SP
10 |     """
11 |     import pandas as pd
12 |     
13 |     fmtdef, dt = fmtdef_dt
14 | 
15 |     # number of rows to skip (there are multiple table pointers per product)
16 |     skips = int(start_byte / 399)
17 |     table = pd.read_csv(filename, header=None, 
18 |                         skiprows=skips,
19 |                         nrows=block["ROWS"])
20 | 
21 |     col_names = [c for c in fmtdef_dt[0]['NAME'] if "PLACEHOLDER" not in c]
22 |     assert len(table.columns) == len(col_names), "mismatched column count"
23 |     table.columns = col_names
24 |     return table
25 | 
26 | 
27 | def edr_offset(data, name):
28 |     """
29 |     HITS:
30 |     * msl_rems
31 |         * edr_HSDEF
32 |         # edr_HSREG
33 |     """
34 |     start_byte = data.metaget_("^"+name)[1] - 1
35 |     return True, start_byte
36 | 
37 | 
38 | def rdr_table_loader(filename, fmtdef_dt):
39 |     """
40 |     Missing values are variations of "UNK" and "NULL", which cause mixed dtype 
41 |     warnings when using the default pd.read_csv() parameters. 
42 | 
43 |      HITS
44 |     * msl_rems
45 |         * rdr_rmd
46 |         * rdr_rnv
47 |         * rdr_rtl
48 |     """
49 |     import pandas as pd
50 | 
51 |     fmtdef, dt = fmtdef_dt
52 |     
53 |     missing_const = [' UNK', '    UNK', '     UNK', '      UNK',
54 |                      '       UNK', '         UNK', 
55 |                      '   NULL', '    NULL']
56 |     table = pd.read_csv(filename, header=None,
57 |                         na_values=missing_const)
58 | 
59 |     col_names = [c for c in fmtdef_dt[0]['NAME'] if "PLACEHOLDER" not in c]
60 |     assert len(table.columns) == len(col_names), "mismatched column count"
61 |     table.columns = col_names
62 |     return table
63 | 


--------------------------------------------------------------------------------
/pdr/formats/msx.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def cube_envi_header_position(identifiers, block, target, name, start_byte, fn):
 3 |     """
 4 |     The ENVI_HEADER pointer's BYTES = "N/A"
 5 | 
 6 |     HITS
 7 |     * msx
 8 |         * cubes
 9 |     """
10 |     from pdr.loaders.queries import table_position
11 |     import os
12 |     from pathlib import Path
13 |     
14 |     table_props = table_position(identifiers, block, target, name, start_byte)
15 |     table_props["length"] = os.path.getsize(Path(fn))
16 |     return table_props
17 | 
18 | 


--------------------------------------------------------------------------------
/pdr/formats/nh.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | 
 4 | def get_fn(data):
 5 |     """
 6 |     The PEPSSI DDRs have an extra space at the start of the SPREADSHEET
 7 |     pointer's filename that causes 'file not found' errors.
 8 | 
 9 |     HITS
10 |     * nh_derived
11 |         * atmos_comp
12 |     * nh_pepssi
13 |         * flux_resampled
14 |     """
15 |     label = Path(data.labelname)
16 |     return True, Path(label.parent, f"{label.stem}.csv")
17 | 


--------------------------------------------------------------------------------
/pdr/formats/odyssey.py:
--------------------------------------------------------------------------------
 1 | def map_table_loader(filename, fmtdef_dt):
 2 |     """
 3 |     A few products open fine from their labels, but most do not. Seems like
 4 |     a byte counting issue in the labels.
 5 | 
 6 |     HITS
 7 |     * mars_odyssey
 8 |         * maps
 9 |     """
10 |     import pandas as pd
11 |     names = [c for c in fmtdef_dt[0]['NAME'] if 'PLACEHOLDER' not in c]
12 |     # Some tables use tabs as column delimiters, others use spaces.
13 |     table = pd.read_csv(filename, header=None, sep=r"\s+")
14 |     assert len(table.columns) == len(names), "Mismatched column count"
15 |     table.columns = names
16 |     return table
17 | 


--------------------------------------------------------------------------------
/pdr/formats/phoenix.py:
--------------------------------------------------------------------------------
  1 | def elec_em6_structure(block, name, filename, data, identifiers):
  2 |     """
  3 |     ELEC EDR em6/TBL tables: All the START_BYTEs in TBL_0_STATE_DATA.FMT
  4 |     are off by 36 bytes.
  5 | 
  6 |     HITS
  7 |     * phoenix
  8 |         * elec_edr (partial)
  9 |     """
 10 |     from pdr.pd_utils import insert_sample_types_into_df, compute_offsets
 11 |     from pdr.loaders.queries import read_table_structure
 12 |     fmtdef = read_table_structure(
 13 |         block, name, filename, data, identifiers
 14 |     )
 15 |     for line in range(0, len(fmtdef)):
 16 |         if fmtdef.at[line, "BLOCK_NAME"] == "TBL0 DATA":
 17 |             fmtdef.at[line, "START_BYTE"] -= 36
 18 |     fmtdef = compute_offsets(fmtdef)
 19 |     return insert_sample_types_into_df(fmtdef, identifiers)
 20 | 
 21 | 
 22 | def afm_rdr_structure(block, name, filename, data, identifiers):
 23 |     """
 24 |     AFM RDR header tables: Several columns' NAME fields start with lowercase
 25 |     letters, which is_an_assignment_line() in /parselabel/pds3.py evaluates as
 26 |     NOT an assignment statement.
 27 | 
 28 |     HITS
 29 |     * phoenix
 30 |         * afm_rdr
 31 |     """
 32 |     from pdr.loaders.queries import read_table_structure
 33 |     fmtdef = read_table_structure(block, name, filename, data, identifiers)
 34 |     fmtdef.insert(1, 'NAME', fmtdef.pop('NAME'))
 35 |     for line in range(0, len(fmtdef)):
 36 |         col_number_text = fmtdef.at[line, "COLUMN_NUMBER"]
 37 |         if (
 38 |             isinstance(col_number_text, str)
 39 |             and "NAME" in col_number_text
 40 |         ):
 41 |             fmtdef.at[
 42 |                 line, "COLUMN_NUMBER"
 43 |             ] = col_number_text.split("NAME = ")[0]
 44 |             fmtdef.at[line, "NAME"] = col_number_text.split("NAME = ")[1]
 45 |     return fmtdef, None
 46 | 
 47 | 
 48 | def afm_table_loader(filename, fmtdef_dt, name):
 49 |     """
 50 |     AFM RDR tables: Several labels miscount bytes somewhere in the tables
 51 | 
 52 |     HITS
 53 |     * phoenix
 54 |         * afm_rdr
 55 |     """
 56 |     import pandas as pd
 57 |     
 58 |     if "HEADER_TABLE" in name:
 59 |         num_rows_skipped = 0
 60 |         num_rows = 4
 61 |     elif name == "AFM_F_ERROR_TABLE":
 62 |         num_rows_skipped = 4
 63 |         num_rows = 512
 64 |     elif name == "AFM_F_HEIGHT_TABLE":
 65 |         num_rows_skipped = 516
 66 |         num_rows = 512
 67 |     elif name == "AFM_B_ERROR_TABLE":
 68 |         num_rows_skipped = 1028
 69 |         num_rows = 512
 70 |     elif name == "AFM_B_HEIGHT_TABLE":
 71 |         num_rows_skipped = 1540
 72 |         num_rows = 512
 73 |     table = pd.read_csv(
 74 |         filename,
 75 |         header=None,
 76 |         sep=",",
 77 |         skiprows=num_rows_skipped, nrows=num_rows
 78 |     )
 79 |     names = [c for c in fmtdef_dt[0]['NAME'] if "PLACEHOLDER" not in c]
 80 |     assert len(table.columns) == len(names), "mismatched column count"
 81 |     table.columns = names
 82 |     return table
 83 | 
 84 | 
 85 | def phxao_header_position(identifiers, block, target, name, start_byte):
 86 |     """
 87 |     PHXAO tables: Some table headers have lost trailing whitespace
 88 |     assumed to be present by the label.  Treat as newline-delimited
 89 |     instead; the record count is correct.
 90 | 
 91 |     HITS
 92 |     * phoenix
 93 |        * atm_phxao
 94 |     """
 95 |     from pdr.loaders.queries import _extract_table_records
 96 |     return {
 97 |         "as_rows": True,
 98 |         "start": 0,
 99 |         "length": _extract_table_records(block),
100 |     }
101 | 
102 | 
103 | def phxao_table_offset(filename, identifiers):
104 |     """
105 |     PHXAO tables: Some table headers have lost trailing whitespace
106 |     assumed to be present by the label.  Recalculate the table offset
107 |     assuming that the table itself is still fixed-width.
108 | 
109 |     HITS
110 |     * phoenix
111 |        * atm_phxao
112 |     """
113 |     from pdr.loaders._helpers import count_from_bottom_of_file
114 |     rows = identifiers["ROWS"]
115 |     row_bytes = identifiers["ROW_BYTES"]
116 |     start_byte = count_from_bottom_of_file(
117 |         filename, rows, row_bytes=row_bytes
118 |     )
119 |     return True, start_byte
120 | 
121 | 
122 | def wcl_edr_special_block(data, name):
123 |     """
124 |     WCL EDR ema/emb/emc tables: the START_BYTE for columns 13 and 14 are
125 |     off by 1 and 2 bytes respectively. (The em8/em9/emf tables are fine.)
126 | 
127 |     HITS
128 |     * phoenix
129 |         * wcl_edr (partial)
130 |     """
131 |     block = data.metablock_(name)
132 |     
133 |     for item in iter(block.items()):
134 |         if "COLUMN" in item:
135 |             if item[1]["COLUMN_NUMBER"] == 13:
136 |                 item[1]["START_BYTE"] -= 1
137 |             if item[1]["COLUMN_NUMBER"] == 14:
138 |                 item[1]["START_BYTE"] -= 2
139 |     return block
140 | 
141 | 
142 | def wcl_rdr_offset(data, name):
143 |     """WCL RDR CP/CV tables: in the labels, each pointer's start byte is
144 |     missing '<BYTES>' even though the units are bytes rather than file_records.
145 |     This doesn't fix the header table though, they still need attention."""
146 |     target = data.metaget_("^"+name)
147 |     start_byte = target[-1] - 1
148 |     return True, start_byte
149 | 
150 | 
151 | def led_edr_structure(block, name, filename, data, identifiers):
152 |     """
153 |     TEGA_LED.FMT: the CONTAINER's REPETITIONS should be 1000, not 1010
154 | 
155 |     HITS
156 |     * phoenix
157 |         * lededr
158 |     """
159 |     from pdr.pd_utils import insert_sample_types_into_df, compute_offsets
160 |     from pdr.loaders.queries import read_table_structure
161 | 
162 |     fmtdef = read_table_structure(
163 |         block, name, filename, data, identifiers
164 |     )
165 |     real_repetitions = 1000
166 |     real_fmtdef_len = 5 + (real_repetitions * 3)
167 |     fmtdef = fmtdef.iloc[0:real_fmtdef_len, :]
168 | 
169 |     for line in range(0, len(fmtdef)):
170 |         if fmtdef.at[line, "BLOCK_NAME"] == "LED_RECORDS":
171 |             fmtdef.at[line, "BLOCK_REPETITIONS"] = 1000
172 | 
173 |     fmtdef = compute_offsets(fmtdef)
174 |     return insert_sample_types_into_df(fmtdef, identifiers)
175 | 
176 | 
177 | def sc_rdr_structure(block, name, filename, data, identifiers):
178 |     """
179 |     TEGA_SCRDR.FMT: most of the START_BYTEs are off by 4 because column 2 
180 |     ("TEGA_TIME") is actually 8 bytes, not 4
181 | 
182 |     HITS
183 |     * phoenix
184 |         * scrdr
185 |     """
186 |     from pdr.pd_utils import insert_sample_types_into_df, compute_offsets
187 |     from pdr.loaders.queries import read_table_structure
188 | 
189 |     fmtdef = read_table_structure(
190 |         block, name, filename, data, identifiers
191 |     )
192 |     for line in range(0, len(fmtdef)):
193 |         if fmtdef.at[line, "COLUMN_NUMBER"] == 2:
194 |             fmtdef.at[line, "BYTES"] = 8
195 |         if fmtdef.at[line, "COLUMN_NUMBER"] >= 3:
196 |             fmtdef.at[line, "START_BYTE"] += 4
197 |     
198 |     fmtdef = compute_offsets(fmtdef)
199 |     return insert_sample_types_into_df(fmtdef, identifiers)
200 | 


--------------------------------------------------------------------------------
/pdr/formats/pvo.py:
--------------------------------------------------------------------------------
 1 | def orpa_low_res_loader(data, name):
 2 |     """
 3 |     ORPA low resolution: labels for earlier orbits have the correct
 4 |     ROW_BYTES, but there is a typo introduced later that says 'ROW_BYTES =
 5 |     241' instead of 243
 6 | 
 7 |     HITS
 8 |     * pvo
 9 |         * orpa_lowres
10 |     """
11 |     block = data.metablock_(name)
12 |     block["ROW_BYTES"] = 243
13 |     return block
14 | 
15 | 
16 | def oims_12s_loader(data, name):
17 |     """
18 |     OIMS 12 second averages: all labels say 'ROWS = 42' regardless of the
19 |     data's actual length
20 | 
21 |     HITS
22 |     * pvo
23 |         * oims_12s
24 |     """
25 |     block = data.metablock_(name)
26 |     block["ROWS"] = data.metaget_("FILE_RECORDS")
27 |     return block
28 | 


--------------------------------------------------------------------------------
/pdr/formats/rosetta.py:
--------------------------------------------------------------------------------
 1 | def rosetta_table_loader(filename, fmtdef_dt):
 2 |     """
 3 |     HITS
 4 |     * rosetta_rpc
 5 |         * RPCMIP
 6 |     """
 7 |     import astropy.io.ascii
 8 | 
 9 |     table = astropy.io.ascii.read(filename).to_pandas()
10 |     fmtdef, dt = fmtdef_dt
11 |     table.columns = fmtdef["NAME"].to_list()
12 |     return table
13 | 
14 | 
15 | def midas_rdr_sps_structure(block, name, filename, data, identifiers):
16 |     """
17 |     SPS TIME_SERIES tables are made up of a repeated container with 4 columns 
18 |     followed by a non-repeated checksum column. In compute_offsets() the 
19 |     `block_names` list ends up out of order, so SB_OFFSET is not calculated 
20 |     correctly for columns in the repeated CONTAINER.
21 | 
22 |     TODO: This seems like a more general issue with how compute_offsets() 
23 |     handles a repeated container followed by a single column
24 | 
25 |     HITS
26 |     * rosetta_dust
27 |         * RDR_midas_sps
28 |     """
29 |     from pdr.pd_utils import insert_sample_types_into_df, compute_offsets
30 |     from pdr.loaders.queries import read_table_structure
31 |     import pandas as pd
32 | 
33 |     fmtdef = read_table_structure(
34 |         block, name, filename, data, identifiers
35 |     )
36 |     for end in ("_PREFIX", "_SUFFIX", ""):
37 |         length = block.get(f"ROW{end}_BYTES")
38 |         if length is not None:
39 |             fmtdef[f"ROW{end}_BYTES"] = length
40 | 
41 |     # Add a placeholder row to the start of the fmtdef so that the 
42 |     # "block_names" list in compute_offsets() is in the right order and 
43 |     # SB_OFFSET is calculated correctly
44 |     placeholder_row = {
45 |         "NAME": "PLACEHOLDER_block",
46 |         "DATA_TYPE": "VOID",
47 |         "BYTES": 0,
48 |         "START_BYTE": 1,
49 |         "BLOCK_REPETITIONS": 1,
50 |         "BLOCK_NAME": "CONTROL_DATA", # matches the checksum column's BLOCK_NAME
51 |         "ROW_PREFIX_BYTES": 46,
52 |     }
53 |     fmtdef = pd.concat(
54 |         [pd.DataFrame([placeholder_row]), fmtdef]
55 |     ).reset_index(drop=True)
56 | 
57 |     fmtdef = compute_offsets(fmtdef)
58 |     return insert_sample_types_into_df(fmtdef, identifiers)
59 | 
60 | 
61 | def fix_pad_length_structure(block, name, filename, data, identifiers):
62 |     """
63 |     The MIDAS FSC tables and several CONSERT ptypes have ROW_PREFIX_BYTES, 
64 |     ROW_SUFFIX_BYTES, and a COLUMN with multiple ITEMS. compute_offsets() 
65 |     calculates the wrong end_byte and pad_length values from the BYTES and 
66 |     ROW_BYTES values in their labels.
67 | 
68 |     HITS
69 |     * rosetta_consert
70 |         * l2_land
71 |         * l2_orbit
72 |         * l3_land
73 |         * l3_land_fss
74 |         * l3_orbit
75 |         * l3_orbit_fss
76 |         * l4_land
77 |         * l4_orbit
78 |         * l4_orbit_grnd
79 |     * rosetta_dust
80 |         * RDR_midas_fsc
81 |     """
82 |     from pdr.pd_utils import insert_sample_types_into_df, compute_offsets
83 |     from pdr.loaders.queries import read_table_structure
84 |     fmtdef = read_table_structure(
85 |         block, name, filename, data, identifiers
86 |     )
87 |     for end in ("_PREFIX", "_SUFFIX", ""):
88 |         length = block.get(f"ROW{end}_BYTES")
89 |         if length is not None:
90 |             fmtdef[f"ROW{end}_BYTES"] = length
91 | 
92 |     # to calculate end_byte correctly in compute_offsets()
93 |     fmtdef["BYTES"] = fmtdef["ITEM_BYTES"]
94 |     # to calculate pad_length correctly in compute_offsets()
95 |     fmtdef["ROW_BYTES"] = fmtdef["ROW_BYTES"] + fmtdef["ROW_PREFIX_BYTES"]
96 |     
97 |     fmtdef = compute_offsets(fmtdef)
98 |     return insert_sample_types_into_df(fmtdef, identifiers)
99 | 


--------------------------------------------------------------------------------
/pdr/formats/saturn_rpx.py:
--------------------------------------------------------------------------------
 1 | def rpx_img_hdu_start_byte(name, hdulist):
 2 |     """
 3 |     The multiple *_IMAGE pointers in these files all point at the same FITS
 4 |     HDU (each pointer illegally represents one band of the image).
 5 | 
 6 |     HITS
 7 |     * saturn_rpx
 8 |         * hst_raw_img
 9 |         * hst_raw_mask
10 |         * hst_cal_img
11 |         * hst_cal_mask
12 |         * hst_eng_data
13 |         * hst_eng_mask
14 |     """
15 |     if 'HEADER' in name:
16 |         return 0
17 |     return hdulist.fileinfo(0)['datLoc']
18 | 


--------------------------------------------------------------------------------
/pdr/formats/themis.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import warnings
 3 | 
 4 | from dustgoggles.structures import listify
 5 | 
 6 | from pdr.parselabel.pds3 import pointerize
 7 | 
 8 | 
 9 | def get_visgeo_qube_offset(data):
10 |     """"""
11 |     return True, data.metaget_("^QUBE")[1] - 1
12 | 
13 | 
14 | def trivial_themis_geo_loader(pointer):
15 |     """
16 |     HITS
17 |     * themis
18 |         * ir_GEO_v2
19 |         * vis_GEO_v2
20 |     """
21 |     warnings.warn(f"THEMIS {pointer} objects are not currently supported.")
22 |     return True
23 | 
24 | 
25 | def check_gzip_fn(data, object_name):
26 |     """
27 |     Some THEMIS QUBEs are stored in gzipped formats. The labels do not always
28 |     bother to mention this.
29 | 
30 |     HITS
31 |     * themis
32 |         * BTR
33 |         * ABR
34 |         * PBT_v1
35 |         * PBT_v2
36 |         * ALB_v2
37 |         * ir_GEO_v2
38 |         * vis_GEO_v2
39 |         * ir_EDR
40 |         * vis_EDR
41 |         * vis_RDR
42 |     """
43 |     target = data.metaget(pointerize(object_name))
44 |     if isinstance(target, (dict, int)):
45 |         return False, None
46 |     filename = listify(target)[0]
47 |     if filename.endswith("gz"):
48 |         return filename
49 |     return True, [filename, f"{filename}.gz"]
50 | 
51 | 
52 | def get_qube_offset(data):
53 |     """
54 |     some THEMIS QUBEs mis-specify file records.
55 | 
56 |     HITS
57 |     * themis
58 |         * ir_GEO_v2
59 |         * vis_GEO_v2
60 |     """
61 |     if (
62 |         data.metaget_("FILE_RECORDS")
63 |         >= os.stat(data.file_mapping["QUBE"]).st_size
64 |     ):
65 |         return True, data.metaget_("^QUBE")[-1] - 1
66 |     return False, None
67 | 


--------------------------------------------------------------------------------
/pdr/formats/ulysses.py:
--------------------------------------------------------------------------------
 1 | def gas_table_loader(filename, fmtdef_dt):
 2 |     """
 3 |     GASDATA.FMT has the wrong START_BYTE for columns in the container.
 4 |     After manually changing the labels during testing, START_BYTE was still
 5 |     not incrementing correctly with each repetition of the container.
 6 |     This fixes both issues with 1 special case.
 7 | 
 8 |     HITS
 9 |     * ulysses
10 |         * gas
11 |     """
12 |     import pandas as pd
13 |     fmtdef, dt = fmtdef_dt
14 |     # Some tables use tabs as column deliminators, others use spaces.
15 |     table = pd.read_csv(filename, skiprows=17, sep=r"\s+", header=None)
16 |     assert len(table.columns) == len(fmtdef.NAME.tolist())
17 |     table.columns = fmtdef.NAME.tolist()
18 |     return table
19 | 
20 | 
21 | def get_sample_type(base_samp_info):
22 |     """
23 |     The bit column's data_type is BIT_STRING, which throws errors. Guessing
24 |     this should be MSB_BIT_STRING. The tables look correct when compared to
25 |     their ASCII versions.
26 | 
27 |     HITS
28 |     * ulysses
29 |         * epac_pha_bin
30 |     """
31 |     from pdr.datatypes import sample_types
32 |     sample_type = base_samp_info["SAMPLE_TYPE"]
33 |     sample_bytes = base_samp_info["BYTES_PER_PIXEL"]
34 | 
35 |     if "BIT_STRING" == sample_type:
36 |         sample_type = "MSB_BIT_STRING"
37 |         return True, sample_types(
38 |             sample_type, int(sample_bytes), for_numpy=True
39 |         )
40 |     return False, None
41 | 
42 | 
43 | def get_special_block(data, name, identifiers):
44 |     """
45 |     START_BYTE is wrong for repeated columns within the container. ITEM_BYTES
46 |     is also off by 1.
47 | 
48 |     HITS
49 |     * ulysses
50 |         * epac_all_chan
51 |         * epac_omni_ele
52 |         * epac_omni_pro
53 |         * epac_pha_asc
54 |         * epac_pha_bin
55 |         * epac_prtl
56 |         * epac_pstl
57 |     """
58 |     block = data.metablock_(name)
59 |     if "ULY-J-EPAC-4-SUMM-PSTL" in identifiers["DATA_SET_ID"]:
60 |         block["CONTAINER"]["COLUMN"]["ITEM_BYTES"] = 13
61 |         block["CONTAINER"]["COLUMN"]["START_BYTE"] = 1
62 |     elif "ULY-J-EPAC-4-SUMM-ALL-CHAN" in identifiers["DATA_SET_ID"]:
63 |         block.getall('CONTAINER')[0]['COLUMN']['START_BYTE'] = 1
64 |         block.getall('CONTAINER')[1]['CONTAINER']['START_BYTE'] = 1
65 |         block.getall('CONTAINER')[1]['CONTAINER']['COLUMN']['START_BYTE'] = 1
66 |     return block
67 | 


--------------------------------------------------------------------------------
/pdr/formats/vega.py:
--------------------------------------------------------------------------------
 1 | def get_structure(block, name, filename, data, identifiers):
 2 |     """
 3 |     "Encounter data" tables miscount the last column's START_BYTE by 1
 4 | 
 5 |     HITS
 6 |     * vega
 7 |         * ducma
 8 |     """
 9 |     from pdr.loaders.queries import read_table_structure
10 |     fmtdef = read_table_structure(
11 |         block, name, filename, data, identifiers
12 |     )
13 | 
14 |     if "encounter data" in block['DESCRIPTION']:
15 |         fmtdef.at[10, "START_BYTE"] = 62
16 |     return fmtdef, None
17 | 
18 | 
19 | def fix_array_structure(name, block, fn, data, identifiers):
20 |     """
21 |     HITS
22 | 
23 |     * giotto
24 |         * pia
25 |     * vega
26 |         * puma_mode
27 |     """
28 |     from pdr.datatypes import sample_types
29 |     from pdr.loaders.queries import read_table_structure, \
30 |         check_array_for_subobject
31 | 
32 |     if not block.get("INTERCHANGE_FORMAT") == "BINARY":
33 |         return None, None
34 |     has_sub = check_array_for_subobject(block)
35 |     if not has_sub:
36 |         dt = sample_types(block["DATA_TYPE"], block["BYTES"], True)
37 |         return None, dt
38 |     fmtdef = read_table_structure(block, name, fn, data, identifiers)
39 |     specbytes = block.get("COLLECTION").get("BYTES")
40 |     specstart = fmtdef.loc[
41 |         fmtdef['NAME'] == 'PLACEHOLDER_SPECTRUM', "START_BYTE"
42 |     ].iloc[0]
43 |     fmtdef.loc[fmtdef['NAME'] == 'PLACEHOLDER_SPECTRUM', "AXIS_ITEMS"] = (
44 |         (specbytes - specstart + 1)
45 |         / len(fmtdef.loc[fmtdef['BLOCK_NAME'].str.contains('SPECTRUM')])
46 |     )
47 |     # Sometimes arrays define start_byte, sometimes their elements do
48 |     if "START_BYTE" in fmtdef.columns:
49 |         fmtdef['START_BYTE'] = fmtdef['START_BYTE'].fillna(1)
50 |     from pdr.pd_utils import compute_offsets, insert_sample_types_into_df
51 | 
52 |     return insert_sample_types_into_df(compute_offsets(fmtdef), identifiers)
53 | 


--------------------------------------------------------------------------------
/pdr/formats/viking.py:
--------------------------------------------------------------------------------
 1 | def seis_table_loader(filepath, fmtdef_dt):
 2 |     """
 3 |     The Viking 2 seismometer tables have mangled labels. The raw data tables
 4 |     are variable length CSVs, and labels for the summary tables count column
 5 |     bytes wrong. Half the labels define columns that do not match the data.
 6 | 
 7 |     HITS
 8 |     * viking
 9 |         * seis_raw
10 |         * seis_summary
11 |     """
12 |     import pandas as pd
13 | 
14 |     col_names = [c for c in fmtdef_dt[0].NAME if "PLACEHOLDER" not in c]
15 |     filename = filepath.split("/")[-1]
16 |     # The summary tables have miscounted bytes in their labels. The columns are
17 |     # separated by whitespace, so can be read by read_csv() instead. Also, both
18 |     # labels define a SEISMIC_TIME_SOLS column that doesn't exist in the data.
19 |     if "summary" in filename.lower():
20 |         table = pd.read_csv(filepath, header=None, sep=r"\s+")
21 |         col_names.remove("SEISMIC_TIME_SOLS")
22 |         if "event_wind_summary" in filename.lower():
23 |             # event_wind_summary.tab has a column not included in the label. It
24 |             # is listed in: https://pds-geosciences.wustl.edu/viking/vl2-m-seis-5-rdr-v1/vl_9020/document/vpds_event_winds_format.txt
25 |             col_names.insert(7, "ORIGINAL_LINES_COUNT")
26 |     # The raw event tables are variable-length CSVs. Their labels include a
27 |     # SEISMIC_SOL column that doesn't exist in the data.
28 |     elif "event" in filename.lower():
29 |         table = pd.read_csv(filepath, header=None, sep=",")
30 |         col_names.remove("SEISMIC_SOL")
31 |     # The raw high-rate tables are variable-length CSVs. Their labels list the
32 |     # correct number of columns.
33 |     elif "high" in filename.lower():
34 |         table = pd.read_csv(filepath, header=None, sep=",")
35 |     else:
36 |         raise ValueError("Unknown Viking 2 SEIS table format.")
37 |     assert len(table.columns) == len(col_names), "mismatched column count"
38 |     table.columns = col_names
39 |     return table
40 | 


--------------------------------------------------------------------------------
/pdr/formats/voyager.py:
--------------------------------------------------------------------------------
  1 | def mag_special_block(data, name):
  2 |     """
  3 |     ROW_BYTES are listed as 144 in the labels for Uranus and Neptune MAG RDRs.
  4 |     Their tables look the same, but the Neptune products open wrong. Setting
  5 |     ROW_BYTES to 145 fixes it.
  6 | 
  7 |     HITS
  8 |     * vg_mag
  9 |         * rdr_nep
 10 |     """
 11 |     block = data.metablock_(name)
 12 |     block["ROW_BYTES"] = 145 
 13 |     return block
 14 | 
 15 | 
 16 | def get_structure(block, name, filename, data, identifiers):
 17 |     """
 18 |     The VGR_PLS_HR_2017.FMT for PLS 1-hour averages undercounts the last column
 19 |     by 1 byte.
 20 | 
 21 |     HITS
 22 |     * vg_pls
 23 |         * sys_1hr_avg (partial)
 24 |     """
 25 |     from pdr.loaders.queries import read_table_structure
 26 |     fmtdef = read_table_structure(
 27 |         block, name, filename, data, identifiers
 28 |     )
 29 |     fmtdef.at[8, "BYTES"] = 6
 30 |     return fmtdef, None
 31 | 
 32 | 
 33 | def pls_avg_special_block(data, name):
 34 |     """
 35 |     Because VGR_PLS_HR_2017.FMT undercounts by 1 byte, the products that
 36 |     reference it also undercount their ROW_BYTES by 1.
 37 | 
 38 |     HITS
 39 |     * vg_pls
 40 |         * sys_1hr_avg
 41 |     """
 42 |     block = data.metablock_(name)
 43 |     if block["^STRUCTURE"] == "VGR_PLS_HR_2017.FMT":
 44 |         block["ROW_BYTES"] = 57 
 45 |         return True, block
 46 |     return False, None
 47 | 
 48 | 
 49 | def pls_fine_special_block(data, name):
 50 |     """
 51 |     Most of the PLS FINE RES labels undercount the ROW_BYTES. The most recent
 52 |     product (2007-241_2018-309) is formatted differently and opens correctly.
 53 | 
 54 |     HITS
 55 |     * vg_pls
 56 |         * sys_fine_res
 57 |     """
 58 |     block = data.metablock_(name)
 59 |     if block["ROW_BYTES"] == 57:
 60 |         block["ROW_BYTES"] = 64 
 61 |         return True, block
 62 |     return False, None
 63 | 
 64 | 
 65 | def pls_ionbr_special_block(data, name):
 66 |     """
 67 |     SUMRY.LBL references the wrong format file
 68 | 
 69 |     HITS
 70 |     * vg_pls
 71 |         * ur_ionbr (partial)
 72 |     """
 73 |     block = data.metablock_(name)
 74 |     block["^STRUCTURE"] = "SUMRY.FMT" 
 75 |     return True, block
 76 | 
 77 | 
 78 | def pra_special_block(data, name, identifiers):
 79 |     """
 80 |     PRA Lowband RDRs: The Jupiter labels use the wrong START_BYTE for columns
 81 |     in containers. The Saturn/Uranus/Neptune labels define columns with
 82 |     multiple ITEMS, but ITEM_BYTES is missing and the BYTES value is wrong.
 83 | 
 84 |     HITS
 85 |     * vg_pra
 86 |         * lowband_jup
 87 |         * lowband_other
 88 |     """
 89 |     block = data.metablock_(name)
 90 |     if identifiers["DATA_SET_ID"] in (
 91 |         "VG2-S-PRA-3-RDR-LOWBAND-6SEC-V1.0",
 92 |         "VG2-N-PRA-3-RDR-LOWBAND-6SEC-V1.0",
 93 |         "VG2-U-PRA-3-RDR-LOWBAND-6SEC-V1.0"
 94 |     ):
 95 |         for item in iter(block.items()):
 96 |             if "COLUMN" in item and "SWEEP" in item[1]["NAME"]:
 97 |                 item[1].add("ITEM_BYTES", 4)  # The original BYTES value
 98 |                 item[1]["BYTES"] = 284  # ITEM_BYTES * ITEMS
 99 |     elif identifiers["DATA_SET_ID"] == "VG2-J-PRA-3-RDR-LOWBAND-6SEC-V1.0":
100 |         for item in iter(block["CONTAINER"].items()):
101 |             if "COLUMN" in item:
102 |                 if item[1]["NAME"] == "STATUS_WORD":
103 |                     item[1]["START_BYTE"] = 1
104 |                 if item[1]["NAME"] == "DATA_CHANNELS":
105 |                     item[1]["START_BYTE"] = 5
106 |     return True, block
107 | 
108 | 
109 | def lecp_table_loader(filename, fmtdef_dt):
110 |     """
111 |     VG1 LECP Jupiter SUMM Sector tables reference a format file with incorrect
112 |     START_BYTEs for columns within a CONTAINER. Columns are consistently
113 |     separated by whitespace.
114 |     The VG2 Uranus 12.8 minute step table (ascii version) was missing values 
115 |     from some rows, not sure why. Reusing this special case fixes it.
116 | 
117 |     HITS
118 |     vg_lecp
119 |         * j_summ_sector_vg1
120 |         * u_rdr_step_12.8 (partial)
121 |     """
122 |     import pandas as pd
123 | 
124 |     fmtdef, dt = fmtdef_dt
125 |     table = pd.read_csv(filename, header=None, sep=r"\s+")
126 | 
127 |     col_names = [c for c in fmtdef_dt[0]['NAME'] if "PLACEHOLDER" not in c]
128 |     assert len(table.columns) == len(col_names), "mismatched column count"
129 |     table.columns = col_names
130 |     return table
131 | 
132 | 
133 | def lecp_vg1_sat_table_loader(filename, fmtdef_dt):
134 |     """
135 |     VG1 Saturn RDR step products have an extra header row partway through their 
136 |     tables. This special case skips those rows by treating them as comments. 
137 |     PDS volume affected: VG1-S-LECP-3-RDR-STEP-6MIN-V1.0
138 | 
139 |     HITS
140 |     vg_lecp
141 |         * s_rdr_step (partial)
142 |     """
143 |     import pandas as pd
144 | 
145 |     fmtdef, dt = fmtdef_dt
146 |     # Rows that start with "VOYAGER" are extra headers. "comment='V'" skips them
147 |     table = pd.read_csv(filename, comment='V')
148 | 
149 |     col_names = [c for c in fmtdef_dt[0]['NAME'] if "PLACEHOLDER" not in c]
150 |     assert len(table.columns) == len(col_names), "mismatched column count"
151 |     table.columns = col_names
152 |     return table
153 | 


--------------------------------------------------------------------------------
/pdr/loaders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/loaders/__init__.py


--------------------------------------------------------------------------------
/pdr/loaders/_helpers.py:
--------------------------------------------------------------------------------
  1 | """Simple utility functions for assorted loaders and queries."""
  2 | from __future__ import annotations
  3 | from functools import wraps
  4 | import os
  5 | from pathlib import Path
  6 | import re
  7 | from typing import Any, Callable, Optional, Union, TYPE_CHECKING
  8 | 
  9 | from cytoolz import curry
 10 | from multidict import MultiDict
 11 | 
 12 | if TYPE_CHECKING:
 13 |     from pdr.pdrtypes import DataIdentifiers, PhysicalTarget
 14 | 
 15 | 
 16 | HETERODOX_ENDING = re.compile(r"\r\n?")
 17 | """Pattern for heterodox but not deeply bizarre line endings."""
 18 | _cle = curry(re.sub, HETERODOX_ENDING, "\n")
 19 | """partially evaluated replacer of heterodox with orthodox line endings."""
 20 | 
 21 | 
 22 | def looks_like_ascii(block: MultiDict, name: str) -> bool:
 23 |     """Is this probably an ASCII table?"""
 24 |     return (
 25 |         ("SPREADSHEET" in name)
 26 |         or ("ASCII" in name)
 27 |         or (block.get("INTERCHANGE_FORMAT") == "ASCII")
 28 |     )
 29 | 
 30 | 
 31 | def quantity_start_byte(
 32 |     quantity_dict: dict[str, Union[str, int]], record_bytes: Optional[int]
 33 | ) -> Optional[int]:
 34 |     """
 35 |     Attempt to infer an object's start byte from a dict parsed from a PVL
 36 |     quantity object associated with a PVL pointer parameter, along with, if
 37 |     known, the size of a product's records (relevant only if the quantity
 38 |     units are not bytes). Returns None if we can't infer it (usually meaning
 39 |     that the label gives the start position in records but doesn't say how
 40 |     big the records are).
 41 |     """
 42 |     # TODO: are there cases in which _these_ aren't 1-indexed?
 43 |     if quantity_dict["units"] == "BYTES":
 44 |         return quantity_dict["value"] - 1
 45 |     if record_bytes is not None:
 46 |         return record_bytes * max(quantity_dict["value"] - 1, 0)
 47 | 
 48 | 
 49 | def count_from_bottom_of_file(
 50 |     fn: Union[str, list, Path], rows: int, row_bytes: int
 51 | ) -> int:
 52 |     """
 53 |     Fallback start-byte-finding function for cases in which a label gives
 54 |     the length of a table in terms of number of rows and row length, but does
 55 |     not specify where in the file the table _starts_. In these cases, the table
 56 |     usually goes to the end of the file, but may be preceded by a header or
 57 |     whatever, which means that we can often guess its start byte by subtracting
 58 |     the table size in bytes from the physical size of the file. This is not
 59 |     guaranteed to work!
 60 |     """
 61 |     tab_size = rows * row_bytes
 62 |     if isinstance(fn, list):
 63 |         fn = fn[0]
 64 |     return os.path.getsize(Path(fn)) - tab_size
 65 | 
 66 | 
 67 | def _check_delimiter_stream(
 68 |     identifiers: DataIdentifiers,
 69 |     name: str,
 70 |     target: PhysicalTarget,
 71 |     block: MultiDict,
 72 | ) -> bool:
 73 |     """
 74 |     Does it look like this object is a delimiter-separated table without an
 75 |     explicitly-defined row length?
 76 |     """
 77 |     # TODO: this may be deprecated. assess against notionally-supported
 78 |     #  products.
 79 |     if isinstance(target, dict):
 80 |         if target.get("units") == "BYTES":
 81 |             return False
 82 |     # TODO: untangle this, everywhere
 83 |     if isinstance(target, (list, tuple)):
 84 |         if isinstance(target[-1], dict):
 85 |             if target[-1].get("units") == "BYTES":
 86 |                 return False
 87 |     # TODO: Other criteria that could appear in the block?
 88 |     if "BYTES" in block:
 89 |         return False
 90 |     # TODO: not sure this is a good assumption -- it is a bad assumption
 91 |     #  for the CHEMIN RDRs, but those labels are just wrong
 92 |     if identifiers["RECORD_BYTES"] not in (None, ""):
 93 |         return False
 94 |     # TODO: not sure this is a good assumption
 95 |     if not identifiers["RECORD_TYPE"] == "STREAM":
 96 |         return False
 97 |     # Well-known object types that imply textuality, if we have nothing
 98 |     # else to go on
 99 |     if any(label in name for label in ("ASCII", "SPREADSHEET", "HEADER")):
100 |         return True
101 |     return False
102 | 
103 | 
104 | def check_explicit_delimiter(block: MultiDict) -> str:
105 |     """
106 |     Check if an ASCII TABLE/SPREADSHEET definition explicitly gives a field
107 |     delimiter. If it doesn't, tentatively assume it's comma-separated.
108 |     """
109 |     if "FIELD_DELIMITER" in block.keys():
110 |         try:
111 |             return {
112 |                 "COMMA": ",",
113 |                 "VERTICAL_BAR": "|",
114 |                 "SEMICOLON": ";",
115 |                 "TAB": "\t",
116 |             }[block["FIELD_DELIMITER"]]
117 |         except KeyError:
118 |             raise KeyError("Unknown FIELD_DELIMITER character.")
119 |     return ","
120 | 
121 | 
122 | def canonicalize_line_endings(text: Any) -> Any:
123 |     """
124 |     Attempt to replace common 'heterodox' line endings in a string or
125 |     list/tuple of strings with canonical endings (\n). Does not attempt to
126 |     perform sophisticated delimiter sniffing, and will only reliably handle
127 |     only \r and \r\n endings, not \n\r, EM / 0x19, \r\r\n, etc.
128 |     Ignores (returns unchanged) non-strings and non-string elements of
129 |     lists/tuples.
130 |     """
131 |     if isinstance(text, str):
132 |         return _cle(text)
133 |     if isinstance(text, (list, tuple)):
134 |         return type(text)([_cle(s) if isinstance(s, str) else s for s in text])
135 |     return text
136 | 
137 | 
138 | def canonicalized(func: Callable) -> Callable:
139 |     """
140 |     Creates a version of `func` that canonicalizes line endings of any string
141 |     (or top-level string elements of a list/tuple), returned by `func`
142 |     """
143 | 
144 |     @wraps(func)
145 |     def with_canonical_endings(*args, **kwargs):
146 |         return canonicalize_line_endings(func(*args, **kwargs))
147 | 
148 |     return with_canonical_endings
149 | 


--------------------------------------------------------------------------------
/pdr/loaders/astrowrap.py:
--------------------------------------------------------------------------------
1 | try:
2 |     from astropy.io import fits
3 |     from astropy.io.fits import HDUList
4 |     from astropy.io.fits.hdu import BinTableHDU
5 | except ImportError:
6 |     raise ModuleNotFoundError(
7 |         "Reading FITS files requires the optional astropy dependency."
8 |     )
9 | 


--------------------------------------------------------------------------------
/pdr/loaders/datawrap.py:
--------------------------------------------------------------------------------
  1 | """Classes to wrap and manage complex data-loading workflows."""
  2 | from typing import Any
  3 | 
  4 | from dustgoggles.dynamic import exc_report
  5 | from dustgoggles.func import constant
  6 | 
  7 | from pdr.formats import (
  8 |     check_special_sample_type,
  9 |     check_special_qube_band_storage,
 10 |     check_special_position,
 11 |     check_special_structure,
 12 |     check_special_table_reader,
 13 |     check_special_fits_start_byte
 14 | )
 15 | from pdr.func import get_argnames, softquery, specialize, call_kwargfiltered
 16 | from pdr.parselabel.pds3 import depointerize
 17 | from pdr.pdrtypes import LoaderFunction, PDRLike
 18 | from pdr.loaders.queries import (
 19 |     DEFAULT_DATA_QUERIES,
 20 |     get_identifiers,
 21 |     get_file_mapping,
 22 |     get_fits_start_byte,
 23 |     get_hdulist,
 24 |     get_target
 25 | )
 26 | 
 27 | 
 28 | def _format_exc_report(exc: Exception) -> dict:
 29 |     """format an exception report for inclusion in another dict"""
 30 |     report = exc_report(exc)
 31 |     for k, v in tuple(report.items()):
 32 |         if k != 'exception':
 33 |             del report[k]
 34 |             report[f"exception_{k}"] = v
 35 |     return report
 36 | 
 37 | 
 38 | class Loader:
 39 |     """
 40 |     compact wrapper for loader functions, intended principally but not solely
 41 |     for library-internal use. provides a common interface, adds compactness,
 42 |     delays imports, etc.
 43 |     """
 44 | 
 45 |     def __init__(self, loader_function: LoaderFunction):
 46 |         self.loader_function = loader_function
 47 |         self.argnames = get_argnames(loader_function)
 48 | 
 49 |     def __call__(
 50 |         self, pdrlike: PDRLike, name: str, **kwargs
 51 |     ) -> dict[str, Any]:
 52 |         kwargdict = {"data": pdrlike, "name": depointerize(name)} | kwargs
 53 |         kwargdict["tracker"].set_metadata(loader=self.__class__.__name__)
 54 |         record_exc = {"status": "query_ok"}
 55 |         try:
 56 |             info = softquery(self.loader_function, self.queries, kwargdict)
 57 |         except Exception as exc:
 58 |             record_exc = {"status": "query_failed"} | _format_exc_report(exc)
 59 |             raise exc
 60 |         finally:
 61 |             kwargdict["tracker"].track(self.loader_function, **record_exc)
 62 |             kwargdict["tracker"].dump()
 63 |         load_exc = {"status": "load_ok"}
 64 |         try:
 65 |             return {name: call_kwargfiltered(self.loader_function, **info)}
 66 |         except Exception as exc:
 67 |             load_exc = {"status": "load_failed"} | _format_exc_report(exc)
 68 |             raise exc
 69 |         finally:
 70 |             kwargdict["tracker"].track(self.loader_function, **load_exc)
 71 |             kwargdict["tracker"].dump()
 72 |     queries = DEFAULT_DATA_QUERIES
 73 | 
 74 | 
 75 | class ReadImage(Loader):
 76 |     """wrapper for read_image"""
 77 | 
 78 |     def __init__(self):
 79 |         from pdr.loaders.image import read_image
 80 |         from pdr.loaders.queries import (
 81 |             base_sample_info,
 82 |             im_sample_type,
 83 |             check_if_qube,
 84 |             get_qube_band_storage_type,
 85 |             generic_image_properties,
 86 |         )
 87 | 
 88 |         super().__init__(read_image)
 89 |         self.queries = DEFAULT_DATA_QUERIES | {
 90 |             "base_samp_info": base_sample_info,
 91 |             "sample_type": specialize(
 92 |                 im_sample_type, check_special_sample_type
 93 |             ),
 94 |             "band_storage_type": specialize(
 95 |                 get_qube_band_storage_type, check_special_qube_band_storage
 96 |             ),
 97 |             "gen_props": specialize(generic_image_properties, check_if_qube),
 98 |             # just modifies gen_props in place, triggers transform in load step
 99 |         }
100 | 
101 | 
102 | class ReadTable(Loader):
103 |     """wrapper for read_table"""
104 | 
105 |     def __init__(self):
106 |         from pdr.loaders.queries import table_position, parse_table_structure
107 |         from pdr.loaders.table import read_table
108 | 
109 |         super().__init__(specialize(read_table, check_special_table_reader))
110 |         self.queries = DEFAULT_DATA_QUERIES | {
111 |             "table_props": specialize(table_position, check_special_position),
112 |             "fmtdef_dt": specialize(
113 |                 parse_table_structure, check_special_structure
114 |             ),
115 |         }
116 | 
117 | 
118 | class ReadHeader(Loader):
119 |     """wrapper for read_header"""
120 | 
121 |     def __init__(self):
122 |         from pdr.loaders.text import read_header
123 |         from pdr.loaders.queries import table_position
124 | 
125 |         super().__init__(read_header)
126 |         self.queries = DEFAULT_DATA_QUERIES | {
127 |             "table_props": specialize(table_position, check_special_position)
128 |         }
129 | 
130 | 
131 | class ReadText(Loader):
132 |     """wrapper for read_text"""
133 | 
134 |     def __init__(self):
135 |         from pdr.loaders.text import read_text
136 | 
137 |         super().__init__(read_text)
138 | 
139 | 
140 | class ReadLabel(Loader):
141 |     """wrapper for read_label"""
142 | 
143 |     def __init__(self):
144 |         from pdr.loaders.text import read_label
145 | 
146 |         super().__init__(read_label)
147 | 
148 | 
149 | class ReadFits(Loader):
150 |     """wrapper for handle_fits_file"""
151 | 
152 | 
153 |     def __init__(self):
154 |         from pdr.loaders.handlers import handle_fits_file
155 | 
156 |         # noinspection PyTypeChecker
157 |         super().__init__(handle_fits_file)
158 | 
159 |     def __call__(self, pdrlike: PDRLike, name: str, **kwargs):
160 |         # slightly hacky but works with how we've done dictionary construction
161 |         return tuple(super().__call__(pdrlike, name, **kwargs).values())[0]
162 | 
163 |     queries = DEFAULT_DATA_QUERIES | {
164 |         "fn": get_file_mapping,
165 |         'target': get_target,
166 |         "identifiers": get_identifiers,
167 |         'hdulist': get_hdulist,
168 |         "hdu_id": specialize(
169 |             get_fits_start_byte, check_special_fits_start_byte
170 |         ),
171 |         'hdu_id_is_index': constant(False)
172 |     }
173 | 
174 | 
175 | class ReadCompressedImage(Loader):
176 |     """wrapper for handle_compressed_image"""
177 | 
178 |     def __init__(self):
179 |         from pdr.loaders.handlers import handle_compressed_image
180 | 
181 |         super().__init__(handle_compressed_image)
182 | 
183 | 
184 | class ReadArray(Loader):
185 |     """wrapper for read_array"""
186 | 
187 |     def __init__(self):
188 |         from pdr.loaders.table import read_array
189 |         from pdr.loaders.queries import parse_array_structure
190 | 
191 |         super().__init__(read_array)
192 |         self.queries = DEFAULT_DATA_QUERIES | {
193 |             "fmtdef_dt": specialize(
194 |                 parse_array_structure, check_special_structure
195 |             ),
196 |         }
197 | 
198 | 
199 | class TBD(Loader):
200 |     """wrapper for tbd"""
201 | 
202 |     def __init__(self):
203 |         from pdr.loaders.utility import tbd
204 | 
205 |         super().__init__(tbd)
206 | 
207 | 
208 | class Trivial(Loader):
209 |     """wrapper for trivial"""
210 | 
211 |     def __init__(self):
212 |         from pdr.loaders.utility import trivial
213 | 
214 |         super().__init__(trivial)
215 | 


--------------------------------------------------------------------------------
/pdr/loaders/dispatch.py:
--------------------------------------------------------------------------------
  1 | """Functions to select appropriate Loader subclasses for data objects."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import re
  6 | from typing import Optional, TYPE_CHECKING
  7 | 
  8 | from pdr.formats import check_trivial_case
  9 | from pdr.loaders.utility import (
 10 |     looks_like_this_kind_of_file,
 11 |     DESKTOP_IMAGE_EXTENSIONS,
 12 |     FITS_EXTENSIONS,
 13 |     IMAGE_EXTENSIONS,
 14 |     TABLE_EXTENSIONS,
 15 |     TEXT_EXTENSIONS,
 16 | )
 17 | from pdr.loaders.datawrap import (
 18 |     Loader,
 19 |     ReadArray,
 20 |     ReadCompressedImage,
 21 |     ReadFits,
 22 |     ReadHeader,
 23 |     ReadImage,
 24 |     ReadLabel,
 25 |     ReadTable,
 26 |     ReadText,
 27 |     TBD,
 28 |     Trivial
 29 | )
 30 | 
 31 | if TYPE_CHECKING:
 32 |     from pdr import Data
 33 | 
 34 | 
 35 | def image_lib_dispatch(pointer: str, data: Data) -> Optional[Loader]:
 36 |     """
 37 |     check file extensions to see if we want to toss a file to an external
 38 |     library rather than using our internal raster handling. current cases are:
 39 |     pillow for tiff, gif, or jp2; astropy for fits
 40 |     """
 41 |     object_filename = data._target_path(pointer)
 42 |     if looks_like_this_kind_of_file(object_filename, FITS_EXTENSIONS):
 43 |         return ReadFits()
 44 |     if looks_like_this_kind_of_file(
 45 |         object_filename, DESKTOP_IMAGE_EXTENSIONS
 46 |     ):
 47 |         return ReadCompressedImage()
 48 |     return None
 49 | 
 50 | 
 51 | def pointer_to_loader(pointer: str, data: Data) -> Loader:
 52 |     """
 53 |     Attempt to select an appropriate Loader subclass based on a PDS3 object
 54 |     name (and sometimes the file extension).
 55 | 
 56 |     The apparently-redundant sequence of conditionals is not in fact redundant;
 57 |     it is based on our knowledge of the most frequently used but sometimes
 58 |     redundant object names in the PDS3 corpus.
 59 |     """
 60 |     if check_trivial_case(pointer, data.identifiers, data.filename):
 61 |         return Trivial()
 62 |     if pointer == "LABEL":
 63 |         return ReadLabel()
 64 |     if image_lib_dispatch(pointer, data) is not None:
 65 |         return image_lib_dispatch(pointer, data)
 66 |     if (
 67 |         "TEXT" in pointer
 68 |         or "PDF" in pointer
 69 |         or "MAP_PROJECTION_CATALOG" in pointer
 70 |     ):
 71 |         return ReadText()
 72 |     if "DESC" in pointer:  # probably points to a reference file
 73 |         return ReadText()
 74 |     if "ARRAY" in pointer:
 75 |         return ReadArray()
 76 |     table_words = (
 77 |         "TABLE", "SPREADSHEET", "CONTAINER", "SERIES", "SPECTRUM", "HISTOGRAM"
 78 |     )
 79 |     if (
 80 |         any(val in pointer for val in table_words)
 81 |         and not any(val+"_HEADER" in pointer for val in table_words)
 82 |         and "HISTOGRAM_IMAGE" not in pointer
 83 |     ):
 84 |         return ReadTable()
 85 |     if "HEADER" in pointer:
 86 |         if looks_like_this_kind_of_file(
 87 |             data.file_mapping[pointer], FITS_EXTENSIONS
 88 |         ):
 89 |             return ReadFits()
 90 |         return ReadHeader()
 91 |     # I have moved this below "table" due to the presence of a number of
 92 |     # binary tables named things like "Image Time Table". If there are pictures
 93 |     # of tables, we will need to do something more sophisticated.
 94 |     if (
 95 |         ("IMAGE" in pointer)
 96 |         or ("QUB" in pointer)
 97 |         or ("XDR_DOCUMENT" in pointer)
 98 |     ):
 99 |         return ReadImage()
100 |     if "FILE_NAME" in pointer:
101 |         return file_extension_to_loader(pointer)
102 |     return TBD()
103 | 
104 | 
105 | def file_extension_to_loader(fn: str) -> Loader:
106 |     """
107 |     Attempt to select the correct Loader subclass for an object based solely on
108 |     its file extension. Used primarily for objects only specified by a PDS3
109 |     FILE_NAME pointer or similar.
110 |     """
111 |     if looks_like_this_kind_of_file(fn, FITS_EXTENSIONS):
112 |         return ReadFits()
113 |     if looks_like_this_kind_of_file(fn, IMAGE_EXTENSIONS):
114 |         return ReadImage()
115 |     if looks_like_this_kind_of_file(fn, TEXT_EXTENSIONS):
116 |         return ReadText()
117 |     if looks_like_this_kind_of_file(fn, TABLE_EXTENSIONS):
118 |         return ReadTable()
119 |     if looks_like_this_kind_of_file(fn, DESKTOP_IMAGE_EXTENSIONS):
120 |         return ReadCompressedImage()
121 |     return TBD()
122 | 
123 | 
124 | OBJECTS_TO_IGNORE = (
125 |     "DATA_SET_MAP_PROJECT.*", ".*_DESC$", ".*DESCRIPTION(_[0-9]*)?$"
126 | )
127 | """
128 | PDS3 objects we do not automatically load, even when loading greedily.
129 | These are reference files, usually throwaway ones, that are usually not
130 | archived in the same place as the data products and add little, if any, context 
131 | to individual products (they are the same across an entire 'product type').
132 | This means that in almost all cases, attempting to greedily load them has no
133 | purpose but to throw irrelevant warnings at the user. 
134 | """
135 | OBJECTS_IGNORED_BY_DEFAULT = re.compile("|".join(OBJECTS_TO_IGNORE))
136 | 


--------------------------------------------------------------------------------
/pdr/loaders/text.py:
--------------------------------------------------------------------------------
  1 | """Pointy-end functions for text-handling Loader subclasses."""
  2 | from io import TextIOWrapper
  3 | from pathlib import Path
  4 | from typing import Optional, Union
  5 | import warnings
  6 | 
  7 | from pdr.loaders._helpers import canonicalized
  8 | from pdr.loaders.utility import looks_like_this_kind_of_file
  9 | from pdr.parselabel.utils import trim_label
 10 | from pdr.utils import check_cases, decompress
 11 | 
 12 | 
 13 | def read_text(target: str, fn: Union[list[str], str]) -> Union[list[str], str]:
 14 |     """Read text from a file or list of files."""
 15 |     try:
 16 |         if isinstance(fn, str):
 17 |             return ignore_if_pdf(check_cases(fn))
 18 |         elif isinstance(fn, list):
 19 |             return [
 20 |                 ignore_if_pdf(check_cases(each_file))
 21 |                 for each_file in fn
 22 |             ]
 23 |     except FileNotFoundError or UnicodeDecodeError:
 24 |         warnings.warn(f"couldn't find {target}")
 25 |         raise
 26 | 
 27 | 
 28 | def read_header(
 29 |     fn: Union[str, Path],
 30 |     table_props: dict,
 31 |     name: str = "HEADER"
 32 | ) -> str:
 33 |     """Read a text header from a file."""
 34 |     return skeptically_load_header(fn, table_props, name)
 35 | 
 36 | 
 37 | @canonicalized
 38 | def read_label(
 39 |     fn: Union[str, Path],
 40 |     fmt: Optional[str] = "text"
 41 | ) -> Union[str, "PVLModule"]:
 42 |     """
 43 |     Read the entirety of a PDS3 label, optionally using `pvl` to parse it as
 44 |     completely as possible into Python objects. This is not intended for use
 45 |     in the primary `pdr.Metadata` initialization workflow, but rather to
 46 |     handle cases when the user explicitly requests the entirety of the label
 47 |     (typically by accessing the "LABEL" key of a `pdr.Data` object).
 48 |     """
 49 |     if fmt == "text":
 50 |         return trim_label(decompress(fn))
 51 |     elif fmt == "pvl":
 52 |         import pvl
 53 | 
 54 |         return pvl.load(fn)
 55 |     raise NotImplementedError(f"The {fmt} format is not yet implemented.")
 56 | 
 57 | 
 58 | @canonicalized
 59 | def skeptically_load_header(
 60 |     fn: Union[Path, str],
 61 |     table_props: dict,
 62 |     name: str = "header",  # TODO: what's with this default value?
 63 |     fmt: Optional[str] = "text",
 64 | ) -> Union[str, "PVLModule", None]:
 65 |     """
 66 |     Attempt to read a text HEADER object from a file. PDS3 does not give a
 67 |     strict definition of the HEADER object, so there is no way to
 68 |     _consistently_ load HEADERs in a coherent, well-formatted fashion. However,
 69 |     providers generally use HEADER to denote either attached file/product-level
 70 |     metadata, column headers for an ASCII table, or object-level
 71 |     contextualizing metadata for ASCII tables.
 72 | 
 73 |     By default, simply read the designated byte range as unicode text. If
 74 |     `fmt` is "pvl", also attempt to parse this text as PVL. (This will fail
 75 |     on most products, because most HEADER objects are not PVL, but is useful
 76 |     for some ancillary attached labels, especially ISIS labels.)
 77 | 
 78 |     NOTE: HEADERs defined in labels very often do not actually exist and are
 79 |     never essential for loading primary data objects, so this function is
 80 |     _always_ "optional", even in debug mode. If it fails, it will simply raise
 81 |     a UserWarning and return None.
 82 | 
 83 |     WARNING: this function is not intended to load metadata of standard file
 84 |     formats (such as TIFF tags or FITS headers). These headers should always
 85 |     be handled by a format-specific parser. More generally, it will never work
 86 |     on binary files.
 87 |     """
 88 |     # TODO: all these check_cases calls are probably unnecessary w/new file
 89 |     #  mapping workflow
 90 |     # FIXME: PVL mode ignores the table_props
 91 |     # FIXME: Character encoding should be controlled separately from as_rows
 92 |     try:
 93 |         if fmt == "pvl":
 94 |             try:
 95 |                 from pdr.pvl_utils import cached_pvl_load
 96 | 
 97 |                 return cached_pvl_load(decompress(check_cases(fn)))
 98 |             except ValueError:
 99 |                 pass
100 |         if table_props["as_rows"] is True:
101 |             # In order to take advantage of Python's universal newline
102 |             # handling, we need to decode the file and _then_ split it.
103 |             # Tolerate encoding errors mainly because we might have a
104 |             # textual header preceded or followed by binary data, and
105 |             # the decoder is going to process more of the file than
106 |             # the part we actually use.
107 |             lines = []
108 |             start = table_props["start"]
109 |             end = start + table_props["length"]
110 |             with decompress(check_cases(fn)) as f:
111 |                 decoded_f = TextIOWrapper(f, encoding="UTF-8", errors="replace")
112 |                 for i, line in enumerate(decoded_f):
113 |                     if i >= end:
114 |                         break
115 |                     if i >= start:
116 |                         lines.append(line.replace("\n", "\r\n"))
117 |             text = "".join(lines)
118 |         else:
119 |             with decompress(check_cases(fn)) as file:
120 |                 file.seek(table_props["start"])
121 |                 text = file.read(min(table_props["length"], 80000)).decode(
122 |                     "ISO-8859-1"
123 |                 )
124 |         return text
125 |     except (ValueError, OSError) as ex:
126 |         warnings.warn(f"unable to parse {name}: {ex}")
127 | 
128 | 
129 | @canonicalized
130 | # TODO: misleading name. Primarily a file _reader_.
131 | def ignore_if_pdf(fn: Union[str, Path]) -> Optional[str]:
132 |     """Read text from a file if it's not a pdf."""
133 |     if looks_like_this_kind_of_file(fn, [".pdf"]):
134 |         warnings.warn(f"Cannot open {fn}; PDF files are not supported.")
135 |         return
136 |     # TODO: should use a context manager to avoid dangling file handles
137 |     return open(check_cases(fn)).read()
138 | 


--------------------------------------------------------------------------------
/pdr/loaders/utility.py:
--------------------------------------------------------------------------------
 1 | """Support objects for 'utility' Loader subclasses."""
 2 | 
 3 | from functools import partial
 4 | from itertools import chain
 5 | from operator import contains
 6 | from pathlib import Path
 7 | from typing import Collection
 8 | import warnings
 9 | 
10 | from multidict import MultiDict
11 | 
12 | 
13 | # TODO, maybe: I think we should keep these somewhere else; they're certainly
14 | #  not used exclusively in loaders
15 | 
16 | LABEL_EXTENSIONS = (".xml", ".lbl")
17 | IMAGE_EXTENSIONS = (".img", ".rgb")
18 | TABLE_EXTENSIONS = (".tab", ".csv")
19 | TEXT_EXTENSIONS = (".txt", ".md")
20 | FITS_EXTENSIONS = (".fits", ".fit", ".fits.gz", ".fit.gz", ".fz")
21 | BMP_EXTENSIONS = (".bmp",)
22 | TIFF_EXTENSIONS = (".tif", ".tiff")
23 | JP2_EXTENSIONS = (".jp2", ".jpf", ".jpc", ".jpx")
24 | GIF_EXTENSIONS = (".gif",)
25 | JPEG_EXTENSIONS = (".jpg", ".jpeg")  # NOTE: Also extensions like MPO/MPF
26 | PNG_EXTENSIONS = (".png",)
27 | WEBP_EXTENSIONS = (".webp",)
28 | DESKTOP_IMAGE_EXTENSION_SETS = (
29 |     BMP_EXTENSIONS,
30 |     TIFF_EXTENSIONS,
31 |     JP2_EXTENSIONS,
32 |     JPEG_EXTENSIONS,
33 |     GIF_EXTENSIONS,
34 |     PNG_EXTENSIONS,
35 |     WEBP_EXTENSIONS
36 | )
37 | # NOTE: these are codes from pillow. some sources may call them other things.
38 | DESKTOP_IMAGE_STANDARDS = (
39 |     "JPEG2000",
40 |     "JPEG",
41 |     "MPO",
42 |     "WEBP",
43 |     "PNG",
44 |     "TIFF",
45 |     "GIF",
46 |     "BMP"
47 | )
48 | DESKTOP_IMAGE_EXTENSIONS = tuple(chain(*DESKTOP_IMAGE_EXTENSION_SETS))
49 | 
50 | 
51 | def trivial(*_, **__):
52 |     """
53 |     This is a trivial loader. It does not load. The purpose is to use
54 |     for any pointers we don't want to load and instead simply want ignored.
55 |     """
56 |     pass
57 | 
58 | 
59 | def tbd(name: str, block: MultiDict, *_, **__):
60 |     """
61 |     This is a placeholder function for objects that are not explicitly
62 |     supported elsewhere. It throws a warning and
63 |     passes just the value of the pointer.
64 |     """
65 |     warnings.warn(f"The {name} pointer is not yet fully supported.")
66 |     return block
67 | 
68 | 
69 | def looks_like_this_kind_of_file(
70 |     filename: str, kind_extensions: Collection[str]
71 | ) -> bool:
72 |     """Does this file have any of these extensions?"""
73 |     is_this_kind_of_extension = partial(contains, kind_extensions)
74 |     return any(map(is_this_kind_of_extension, Path(filename.lower()).suffixes))
75 | 
76 | 
77 | def is_trivial(pointer: str) -> bool:
78 |     """
79 |     Returns True if this is the name of a data object we want to handle
80 |     trivally, in the sense that we never ever want to load it directly.
81 |     """
82 |     # TIFF tags / headers should always be parsed by the TIFF parser itself
83 |     if (
84 |         ("TIFF" in pointer)
85 |         and ("IMAGE" not in pointer)
86 |         and ("DOCUMENT" not in pointer)
87 |     ):
88 |         return True
89 |     # we don't present STRUCTURES separately from their tables
90 |     if "STRUCTURE" in pointer:
91 |         return True
92 |     # only in MSL CCAM products; probably for internal processing pipelines
93 |     if "PDS_OBJECT" in pointer:
94 |         return True
95 |     return False
96 | 


--------------------------------------------------------------------------------
/pdr/np_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Methods for working with numpy objects, primarily intended as components of
  3 | pdr's image- and table-loading routines.
  4 | """
  5 | from bz2 import BZ2File
  6 | from gzip import GzipFile
  7 | from io import BufferedIOBase, BytesIO
  8 | from numbers import Number
  9 | from typing import Optional, Union
 10 | from zipfile import ZipFile
 11 | 
 12 | import numpy as np
 13 | 
 14 | 
 15 | def enforce_order_and_object(array: np.ndarray, inplace=True) -> np.ndarray:
 16 |     """
 17 |     Make an ndarray compatible for use with pandas or other similarly-strict
 18 |     interfaces. Determine which, if any, of the array's fields are in nonnative
 19 |     byteorder and swap them; also convert any void dtypes to object.
 20 |     """
 21 |     # NOTE: doing the void conversion in this function is inelegant but
 22 |     # somewhat efficient.
 23 |     # TODO: or is it? benchmark.
 24 |     if inplace is False:
 25 |         array = array.copy()
 26 |     if len(array.dtype) < 2:
 27 |         if len(array.dtype) == 0:
 28 |             dtype = array.dtype
 29 |             void_return = array
 30 |         else:
 31 |             dtype = array.dtype[0]
 32 |             # if we don't slice the field out explicitly, numpy will transform
 33 |             # it into an array of tuples
 34 |             void_return = array[tuple(array.dtype.fields.keys())[0]]
 35 |         if "V" in str(dtype):
 36 |             return void_return.astype("O")
 37 |         if dtype.isnative:
 38 |             return array
 39 |         return array.byteswap().view(array.dtype.newbyteorder("="))
 40 |     swap_targets = []
 41 |     swapped_dtype = []
 42 |     for name, field in array.dtype.fields.items():
 43 |         if field[0].isnative is False:
 44 |             swap_targets.append(name)
 45 |             swapped_dtype.append((name, field[0].newbyteorder("=")))
 46 |         elif "V" not in str(field[0]):
 47 |             swapped_dtype.append((name, field[0]))
 48 |         else:
 49 |             swapped_dtype.append((name, "O"))
 50 |     # TODO: this may work unreliably for small integer types
 51 |     return np.array(array, dtype=swapped_dtype)
 52 | 
 53 | 
 54 | def casting_to_float(array: np.ndarray, *operands: Number) -> bool:
 55 |     """
 56 |     check: will this operation cast the array to float?
 57 |     return True if array is integer-valued and any operands are not integers.
 58 |     """
 59 |     return (array.dtype.char in np.typecodes["AllInteger"]) and not all(
 60 |         [isinstance(operand, int) for operand in operands]
 61 |     )
 62 | 
 63 | 
 64 | # TODO: shake this out with a bunch of different compression type examples,
 65 | #  including specific compressions on band/line/single-plane/etc. images,
 66 | #  compressed binary tables, etc.
 67 | # TODO: I'm not sure if the above TODO is still relevant.
 68 | def np_from_buffered_io(
 69 |     buffered_io: BufferedIOBase,
 70 |     dtype: Union[np.dtype, str],
 71 |     offset: Optional[int] = None,
 72 |     count: Optional[int] = None,
 73 | ) -> np.ndarray:
 74 |     """
 75 |     Read a 1D numpy array of the specified dtype, size, and offset from a
 76 |     buffered IO object.
 77 |     """
 78 |     if offset is not None:
 79 |         buffered_io.seek(offset)
 80 |     if isinstance(buffered_io, (BZ2File, ZipFile, GzipFile, BytesIO)):
 81 |         # we need to read the appropriate amount into a new buffer, especially
 82 |         # if it's monolithically compressed
 83 |         n_bytes = None if count is None else count * dtype.itemsize
 84 |         stream = BytesIO(buffered_io.read(n_bytes))
 85 |         return np.frombuffer(stream.getbuffer(), dtype=dtype)
 86 |     count = -1 if count is None else count
 87 |     # In this case, buffered_io is just an open file stream
 88 |     return np.fromfile(buffered_io, dtype=dtype, count=count)
 89 | 
 90 | 
 91 | def make_c_contiguous(arr: np.ndarray) -> np.ndarray:
 92 |     """
 93 |     If an ndarray isn't C-contiguous, reorder it as C-contiguous. If it is,
 94 |     don't mess with it.
 95 |     """
 96 |     if arr.flags["C_CONTIGUOUS"] is False:
 97 |         return np.ascontiguousarray(arr)
 98 |     return arr
 99 | 
100 | 
101 | # TODO: really all arguments but ibm/sreg are redundant for basic S/360 formats
102 | def ibm_to_np(ibm: np.ndarray, sreg: int, ereg: int, mmask: int) -> np.ndarray:
103 |     """
104 |     Convert an array composed of IBM System 360-style floats (expressed as
105 |     4- or 8-byte unsigned integers, as appropriate for byte width) to numpy
106 |     float64.
107 |     """
108 |     # dtype conversion: this field must be signed
109 |     ibm_sign = (ibm >> sreg & 0x01).astype('int8')
110 |     # dtype conversion: largest values possible will overfloat int64 or float32
111 |     ibm_exponent = (ibm >> ereg & 0x7f).astype('float64')
112 |     ibm_mantissa = ibm & mmask
113 |     mantissa = ibm_mantissa / (2 ** ereg)
114 |     exponent = 16 ** (ibm_exponent - 64)
115 |     sign = 1 - (2 * ibm_sign).astype('int8')
116 |     return sign * mantissa * exponent
117 | 
118 | 
119 | def ibm32_to_np_f32(ibm):
120 |     """
121 |     Convert an array of IBM System 360-style 32-bit floats (expressed as 32-bit
122 |     unsigned integers) to numpy float64.
123 |     """
124 |     return ibm_to_np(ibm, 31, 24, 0x00ffffff)
125 | 
126 | 
127 | def ibm64_to_np_f64(ibm):
128 |     """
129 |     Convert an array of IBM System 360-style 64-bit floats (expressed as 64-bit
130 |     unsigned integers) to numpy float64.
131 |     """
132 |     return ibm_to_np(ibm, 63, 56, 0x00ffffffffffffff)
133 | 


--------------------------------------------------------------------------------
/pdr/parselabel/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/parselabel/__init__.py


--------------------------------------------------------------------------------
/pdr/parselabel/pds4.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple utilities for preprocessing pds4_tools-produced label objects for the
 3 | pdr.Metadata constructor.
 4 | """
 5 | from collections import OrderedDict
 6 | from typing import Mapping, TYPE_CHECKING
 7 | 
 8 | from dustgoggles.func import constant
 9 | from dustgoggles.structures import dig_for_keys
10 | from multidict import MultiDict
11 | 
12 | 
13 | if TYPE_CHECKING:
14 |     from pdr.pds4_tools.reader.label_objects import Label
15 | 
16 | 
17 | def unpack_to_multidict(
18 |     packed: Mapping, mtypes: tuple[type, ...] = (dict,)
19 | ) -> MultiDict:
20 |     """
21 |     Recursively unpack any Mapping into a MultiDict. Unpacks all list or tuple
22 |     values at any level into multiple keys at that level. This is an unusual-
23 |     sounding behavior but is generally appropriate for PDS4 labels, and
24 |     specifically for the pds4_tools representation of XML labels. PDS4 types
25 |     with cardinality > 1 always (?) represent multiple distinct entities /
26 |     properties rather than an array of properties. The list can also always be
27 |     retrieved from the resulting multidict with `MultiDict.get_all()`.
28 | 
29 |     Example:
30 |     ```
31 |     >>> unpack_to_multidict({'a': 1, 'b': [{'c': 2}, 3]})
32 |     <MultiDict('a': 1, 'b': <MultiDict('c': 2)>, 'b': 3)>
33 |     ```
34 |     """
35 |     unpacked, items = MultiDict(), list(reversed(packed.items()))
36 |     while len(items) > 0:
37 |         k, v = items.pop()
38 |         if isinstance(v, (list, tuple)):
39 |             items += [(k, e) for e in reversed(v)]
40 |         elif isinstance(v, mtypes):
41 |             unpacked.add(k, unpack_to_multidict(v, mtypes))
42 |         else:
43 |             unpacked.add(k, v)
44 |     return unpacked
45 | 
46 | 
47 | # noinspection PyTypeChecker
48 | def reformat_pds4_tools_label(label: "Label") -> tuple[MultiDict, list[str]]:
49 |     """
50 |     Convert a pds4_tools Label object into a MultiDict and a list of parameters
51 |     suitable for constructing a pdr.Metadata object. This is not just a type
52 |     conversion; it also rearranges some nested data structures (in particular,
53 |     repeated child elements become multiple keys of a MultiDict rather than
54 |     a list of OrderedDicts).
55 |     """
56 |     unpacked = unpack_to_multidict(label.to_dict(), (OrderedDict, MultiDict))
57 |     # collect all keys to populate pdr.Metadata's fieldcounts attribute
58 |     params = dig_for_keys(
59 |         unpacked, None, base_pred=constant(True), mtypes=(MultiDict,)
60 |     )
61 |     return unpacked, params
62 | 


--------------------------------------------------------------------------------
/pdr/parselabel/utils.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import re
 3 | from typing import Union, IO
 4 | 
 5 | 
 6 | KNOWN_LABEL_ENDINGS = (
 7 |     re.compile(b"\nEND {0,2}(\r| {8})"),  # common PVL convention
 8 |     re.compile(b"\x00{3}"),  # just null bytes, for odder cases
 9 | )
10 | """
11 | Fast regex patterns for generic PVL label endings. They work for almost all PVL 
12 | labels in the PDS.
13 | """
14 | 
15 | DEFAULT_PVL_LIMIT = 1000 * 1024
16 | """heuristic for max label size. we know it's not a real rule."""
17 | 
18 | 
19 | class InvalidAttachedLabel(ValueError):
20 |     pass
21 | 
22 | 
23 | def _scan_to_end_of_label(
24 |     buf: IO, max_size: int, text: bytes, raise_no_ending: bool
25 | ):
26 |     """Subroutine of trim_label()"""
27 |     length = 0
28 |     while length < max_size:
29 |         if (chunk := buf.read(50 * 1024)) == b'':
30 |             break
31 |         for ending in KNOWN_LABEL_ENDINGS:
32 |             if (endmatch := re.search(ending, text[:-15] + chunk)) is not None:
33 |                 return text + chunk[: endmatch.span()[1]]
34 |         text, length = text + chunk, length + 50 * 1024
35 |     if raise_no_ending is True:
36 |         raise InvalidAttachedLabel("Couldn't find a label ending.")
37 |     return text
38 | 
39 | 
40 | def trim_label(
41 |     fn: Union[IO, Path, str],
42 |     max_size: int = DEFAULT_PVL_LIMIT,
43 |     strict_decode: bool = True,
44 |     raise_no_ending: bool = False
45 | ) -> str:
46 |     """Look for a PVL label at the top of a file."""
47 |     target_is_fn = isinstance(fn, (Path, str))
48 |     try:
49 |         if target_is_fn is True:
50 |             fn = open(fn, 'rb')
51 |         text = fn.read(20)
52 |         if strict_decode is True:
53 |             try:
54 |                 text.decode('ascii')
55 |             except UnicodeDecodeError:
56 |                 raise InvalidAttachedLabel("File head appears to be binary.")
57 |         text = _scan_to_end_of_label(fn, max_size, text, raise_no_ending)
58 |     finally:
59 |         if target_is_fn is True:
60 |             fn.close()
61 |     policy = "strict" if strict_decode is True else "replace"
62 |     try:
63 |         return text.decode("utf-8", errors=policy)
64 |     except UnicodeDecodeError:
65 |         raise InvalidAttachedLabel("Invalid characters in label.")
66 | 


--------------------------------------------------------------------------------
/pdr/pdrtypes.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import (
  4 |     Callable, Literal, Optional, TypedDict, TYPE_CHECKING, Union
  5 | )
  6 | # TypeAlias is new in 3.10
  7 | # this is exactly how it's defined in python3.11/typing.py
  8 | try:
  9 |     from typing import TypeAlias
 10 | except ImportError:
 11 |     def TypeAlias(self, parameters):
 12 |         raise TypeError(f"{self} is not subscriptable")
 13 | 
 14 | 
 15 | if TYPE_CHECKING:
 16 |     from multidict import MultiDict
 17 |     import numpy as np
 18 |     import pandas as pd
 19 |     from pdr import Data, Metadata
 20 | 
 21 | ByteOrder: TypeAlias = Literal["<", ">"]
 22 | """Most significant/least significant byteorder codes"""
 23 | 
 24 | PDRLike: TypeAlias = Union["Data", "Metadata"]
 25 | """Something with a pdr-style metadata-getting interface"""
 26 | 
 27 | LoaderFunction: TypeAlias = Callable[
 28 |     ..., Union[str, "MultiDict", "pd.DataFrame", "np.ndarray"]
 29 | ]
 30 | """Signature of a Loader's load function"""
 31 | 
 32 | PhysicalTarget: TypeAlias = Union[
 33 |     list[str, int], tuple[str, int], int, str, dict[str, Union[str, int]]
 34 | ]
 35 | """Expected formats of 'pointer' parameters, i.e. ^WHATEVER = PhysicalTarget"""
 36 | 
 37 | BandStorageType: TypeAlias = Literal[
 38 |     "BAND_SEQUENTIAL", "LINE_INTERLEAVED", "SAMPLE_INTERLEAVED", None
 39 | ]
 40 | """
 41 | Codes for physical storage layout of 3-D arrays. Also known as BSQ/band 
 42 | sequential, BIL/band interleaved by line, BIP/band interleaved by pixel. 
 43 | None implies either that the storage layout is unknown or that the array is
 44 | not 3-D.
 45 | """
 46 | 
 47 | Axname: TypeAlias = Literal["BAND", "LINE", "SAMPLE"]
 48 | """Conventional names for image axes."""
 49 | 
 50 | 
 51 | class ImageProps(TypedDict):
 52 |     """Standard image properties dict used in image-processing workflows."""
 53 |     # Number of bytes per pixel (eventually redundant with sample_type but
 54 |     # populated much earlier)
 55 |     BYTES_PER_PIXEL: Literal[1, 2, 4, 8]
 56 |     # Do the elements of the array, when loaded, represent VAX reals?
 57 |     is_vax_real: bool
 58 |     # numpy dtype string
 59 |     sample_type: str
 60 |     # total number of elements
 61 |     pixels: int
 62 |     # number of elements along each dimension
 63 |     nrows: int
 64 |     ncols: int
 65 |     nbands: int
 66 |     # physical storage layout of 3D arrays (None for 2D arrays)
 67 |     band_storage_type: BandStorageType
 68 |     # total row/column/band pad elements due to ISIS-style axplanes
 69 |     rowpad: int
 70 |     colpad: int
 71 |     bandpad: int
 72 |     # number of pad elements for left/right sideplanes
 73 |     prefix_rows: Optional[int]
 74 |     suffix_rows: Optional[int]
 75 |     # number of pad elements for bottom/topplanes
 76 |     prefix_cols: Optional[int]
 77 |     suffix_cols: Optional[int]
 78 |     # number of pad elements for front/backplanes
 79 |     prefix_bands: Optional[int]
 80 |     suffix_bands: Optional[int]
 81 |     # total pad elements due to line prefixes/suffixes
 82 |     linepad: int
 83 |     # number of elements in line prefix and suffix
 84 |     line_prefix_pix: Optional[int]
 85 |     line_suffix_pix: Optional[int]
 86 |     # Order of axes expressed as a tuple of axis names, only used by ISIS qubes
 87 |     axnames: Optional[tuple[Axname]]
 88 | 
 89 | 
 90 | class DataIdentifiers(TypedDict):
 91 |     """
 92 |     Standard PDS3 'identifiers' Data checks its Metadata for on initialization
 93 |     (if it's made from a PDS3 product). Used primarily to make special case
 94 |     checks more compact. These are taken directly from the label, then
 95 |     stringified if they're sets or tuples. All keys are always present, but
 96 |     may be None if a parameter's not actually in the label.
 97 |     """
 98 |     DATA_SET_ID: Union[str, None]
 99 |     DATA_SET_NAME: Union[str, None]
100 |     FILE_NAME: Union[str, None]
101 |     FILE_RECORDS: Union[int, None]
102 |     INSTRUMENT_ID: Union[str, None]
103 |     INSTRUMENT_HOST_NAME: Union[str, None]
104 |     INSTRUMENT_NAME: Union[str, None]
105 |     LABEL_RECORDS: Union[int, None]
106 |     NOTE: Union[str, None]
107 |     PRODUCT_ID: Union[str, None]
108 |     PRODUCT_TYPE: Union[str, None]
109 |     RECORD_BYTES: Union[int, None]
110 |     RECORD_TYPE: Union[str, None]
111 |     ROW_BYTES: Union[int, None]
112 |     ROWS: Union[int, None]
113 |     SPACECRAFT_NAME: Union[str, None]
114 |     STANDARD_DATA_PRODUCT_ID: Union[str, None]
115 | 


--------------------------------------------------------------------------------
/pdr/pds4_tools/CREDITS:
--------------------------------------------------------------------------------
 1 | Authors:
 2 | 
 3 |  Lev Nagdimunov
 4 | 
 5 | Contributors:
 6 | 
 7 |   Tilden Barnes
 8 |   Michael S. P. Kelley
 9 |   Matthew Knight
10 |   Mark Bentley
11 | 
12 | Special Thanks:
13 | 
14 |    Michael F. A'Hearn
15 |    Ludmilla Kolokolova
16 |    Anne Raugh
17 |    James M. Bauer
18 | 
19 | Other Credits:
20 | 
21 |   NASA PDS-Small Bodies Node for funding this project.
22 |   PyInstaller and Py2App for binary packaging.
23 |   SAOImage DS9 and fv FITS Viewer for inspiration.
24 | 


--------------------------------------------------------------------------------
/pdr/pds4_tools/__about__.py:
--------------------------------------------------------------------------------
1 | __author__ = "Lev Nagdimunov"
2 | __copyright__ = "2015 - 2021, University of Maryland"
3 | 
4 | __version__ = "1.4.dev0"
5 | __email__ = "lnagdi1@astro.umd.edu"
6 | 


--------------------------------------------------------------------------------
/pdr/pds4_tools/__init__.py:
--------------------------------------------------------------------------------
 1 | from pdr.pds4_tools.__about__ import (__version__, __author__, __email__, __copyright__)
 2 | 
 3 | from .reader import pds4_read
 4 | from .reader import pds4_read as read
 5 | 
 6 | from .utils.logging import set_loglevel
 7 | 
 8 | try:
 9 |     from .viewer import pds4_viewer
10 |     from .viewer import pds4_viewer as view
11 | except ImportError as e:
12 | 
13 |     def _missing_optional_deps(exception, *args, **kwargs):
14 |         raise exception
15 | 
16 |     import functools as _functools
17 |     pds4_viewer = view = _functools.partial(_missing_optional_deps, e)
18 | 


--------------------------------------------------------------------------------
/pdr/pds4_tools/extern/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/pds4_tools/extern/__init__.py


--------------------------------------------------------------------------------
/pdr/pds4_tools/extern/cached_property.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015, Daniel Greenfeld
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are
 6 | # met:
 7 | #
 8 | #     * Redistributions of source code must retain the above copyright
 9 | #        notice, this list of conditions and the following disclaimer.
10 | #
11 | #     * Redistributions in binary form must reproduce the above
12 | #        copyright notice, this list of conditions and the following
13 | #        disclaimer in the documentation and/or other materials provided
14 | #        with the distribution.
15 | #
16 | #     * Neither the name of cached-property nor the names of its
17 | #        contributors may be used to endorse or promote products derived
18 | #        from this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 | # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
30 | # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 | # POSSIBILITY OF SUCH DAMAGE.
32 | 
33 | # -*- coding: utf-8 -*-
34 | 
35 | __author__ = 'Daniel Greenfeld'
36 | __email__ = 'pydanny@gmail.com'
37 | __version__ = '1.3.0'
38 | __license__ = 'BSD'
39 | 
40 | from time import time
41 | import threading
42 | 
43 | 
44 | class cached_property(object):
45 |     """
46 |     A property that is only computed once per instance and then replaces itself
47 |     with an ordinary attribute. Deleting the attribute resets the property.
48 | 
49 |     Source: https://github.com/bottlepy/bottle/commit/fa7733e075da0d790d809aa3d2f53071897e6f76
50 |     """  # noqa
51 | 
52 |     def __init__(self, func):
53 |         self.__doc__ = getattr(func, '__doc__')
54 |         self.func = func
55 | 
56 |     def __get__(self, obj, cls):
57 |         if obj is None:
58 |             return self
59 |         value = obj.__dict__[self.func.__name__] = self.func(obj)
60 |         return value
61 | 
62 | 
63 | class threaded_cached_property(object):
64 |     """
65 |     A cached_property version for use in environments where multiple threads
66 |     might concurrently try to access the property.
67 |     """
68 | 
69 |     def __init__(self, func):
70 |         self.__doc__ = getattr(func, '__doc__')
71 |         self.func = func
72 |         self.lock = threading.RLock()
73 | 
74 |     def __get__(self, obj, cls):
75 |         if obj is None:
76 |             return self
77 | 
78 |         obj_dict = obj.__dict__
79 |         name = self.func.__name__
80 |         with self.lock:
81 |             try:
82 |                 # check if the value was computed before the lock was acquired
83 |                 return obj_dict[name]
84 |             except KeyError:
85 |                 # if not, do the calculation and release the lock
86 |                 return obj_dict.setdefault(name, self.func(obj))


--------------------------------------------------------------------------------
/pdr/pds4_tools/extern/zscale.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the NumDisplay tool available at the following URL:
  2 | # http://stsdas.stsci.edu/numdisplay/
  3 | #
  4 | # Copyright (C) 2005 Association of Universities for Research in Astronomy (AURA)
  5 | #
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | #
  9 | #     1. Redistributions of source code must retain the above copyright
 10 | #       notice, this list of conditions and the following disclaimer.
 11 | #
 12 | #     2. Redistributions in binary form must reproduce the above
 13 | #       copyright notice, this list of conditions and the following
 14 | #       disclaimer in the documentation and/or other materials provided
 15 | #       with the distribution.
 16 | #
 17 | #     3. The name of AURA and its representatives may not be used to
 18 | #       endorse or promote products derived from this software without
 19 | #       specific prior written permission.
 20 | #
 21 | # THIS SOFTWARE IS PROVIDED BY AURA ``AS IS'' AND ANY EXPRESS OR IMPLIED
 22 | # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 23 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 24 | # DISCLAIMED. IN NO EVENT SHALL AURA BE LIABLE FOR ANY DIRECT, INDIRECT,
 25 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 26 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 27 | # OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 28 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 29 | # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 30 | # USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 31 | # DAMAGE.
 32 | 
 33 | from __future__ import division # confidence high
 34 | 
 35 | import math
 36 | import numpy
 37 | 
 38 | MAX_REJECT = 0.5
 39 | MIN_NPIXELS = 5
 40 | GOOD_PIXEL = 0
 41 | BAD_PIXEL = 1
 42 | KREJ = 2.5
 43 | MAX_ITERATIONS = 5
 44 | 
 45 | def zscale (image, nsamples=1000, contrast=0.25, bpmask=None, zmask=None):
 46 |     """Implement IRAF zscale algorithm
 47 | 
 48 |     Parameters
 49 |     ----------
 50 |     image : arr
 51 |         2-d numpy array
 52 | 
 53 |     nsamples : int (Default: 1000)
 54 |         Number of points in array to sample for determining scaling factors
 55 | 
 56 |     contrast : float (Default: 0.25)
 57 |         Scaling factor for determining min and max. Larger values increase the
 58 |         difference between min and max values used for display.
 59 | 
 60 |     bpmask : None
 61 |         Not used at this time
 62 | 
 63 |     zmask : None
 64 |         Not used at this time
 65 | 
 66 |     Returns
 67 |     -------
 68 |     (z1, z2)
 69 |     """
 70 | 
 71 |     # Sample the image
 72 |     samples = zsc_sample (image, nsamples, bpmask, zmask)
 73 |     npix = len(samples)
 74 |     samples.sort()
 75 |     zmin = samples[0]
 76 |     zmax = samples[-1]
 77 |     # For a zero-indexed array
 78 |     center_pixel = (npix - 1) // 2
 79 |     if npix%2 == 1:
 80 |         median = samples[center_pixel]
 81 |     else:
 82 |         median = 0.5 * (samples[center_pixel] + samples[center_pixel + 1])
 83 | 
 84 |     #
 85 |     # Fit a line to the sorted array of samples
 86 |     minpix = max(MIN_NPIXELS, int(npix * MAX_REJECT))
 87 |     ngrow = max (1, int (npix * 0.01))
 88 |     ngoodpix, zstart, zslope = zsc_fit_line (samples, npix, KREJ, ngrow,
 89 |                                              MAX_ITERATIONS)
 90 | 
 91 |     if ngoodpix < minpix:
 92 |         z1 = zmin
 93 |         z2 = zmax
 94 |     else:
 95 |         if contrast > 0: zslope = zslope / contrast
 96 |         z1 = max (zmin, median - (center_pixel - 1) * zslope)
 97 |         z2 = min (zmax, median + (npix - center_pixel) * zslope)
 98 |     return z1, z2
 99 | 
100 | 
101 | def zsc_sample (image, maxpix, bpmask=None, zmask=None):
102 | 
103 |     # Figure out which pixels to use for the zscale algorithm
104 |     # Returns the 1-d array samples
105 |     # Don't worry about the bad pixel mask or zmask for the moment
106 |     # Sample in a square grid, and return the first maxpix in the sample
107 |     nc = image.shape[0]
108 |     nl = image.shape[1]
109 |     stride = max (1.0, math.sqrt((nc - 1) * (nl - 1) / float(maxpix)))
110 |     stride = int (stride)
111 |     samples = image[::stride,::stride].flatten()
112 | 
113 |     # Remove invalid values for masked arrays
114 |     if isinstance(samples, numpy.ma.MaskedArray):
115 |         samples = samples.compressed()
116 | 
117 |     # Remove invalid values from ndarrays
118 |     elif isinstance(samples, numpy.ndarray):
119 |         samples = samples[numpy.isfinite(samples)]
120 | 
121 |     return samples[:maxpix]
122 | 
123 | 
124 | def zsc_fit_line (samples, npix, krej, ngrow, maxiter):
125 | 
126 |     #
127 |     # First re-map indices from -1.0 to 1.0
128 |     xscale = 2.0 / (npix - 1)
129 |     xnorm = numpy.arange(npix)
130 |     xnorm = xnorm * xscale - 1.0
131 | 
132 |     ngoodpix = npix
133 |     minpix = max (MIN_NPIXELS, int (npix*MAX_REJECT))
134 |     last_ngoodpix = npix + 1
135 | 
136 |     intercept = 0
137 |     slope = 0
138 | 
139 |     # This is the mask used in k-sigma clipping.  0 is good, 1 is bad
140 |     badpix = numpy.zeros(npix, dtype="int32")
141 | 
142 |     #  Iterate
143 |     for niter in range(maxiter):
144 | 
145 |         if (ngoodpix >= last_ngoodpix) or (ngoodpix < minpix):
146 |             break
147 | 
148 |         # Accumulate sums to calculate straight line fit
149 |         goodpixels = numpy.where(badpix == GOOD_PIXEL)
150 |         sumx = xnorm[goodpixels].sum()
151 |         sumxx = (xnorm[goodpixels]*xnorm[goodpixels]).sum()
152 |         sumxy = (xnorm[goodpixels]*samples[goodpixels]).sum()
153 |         sumy = samples[goodpixels].sum()
154 |         sum = len(goodpixels[0])
155 | 
156 |         delta = sum * sumxx - sumx * sumx
157 |         # Slope and intercept
158 |         intercept = (sumxx * sumy - sumx * sumxy) / delta
159 |         slope = (sum * sumxy - sumx * sumy) / delta
160 | 
161 |         # Subtract fitted line from the data array
162 |         fitted = xnorm*slope + intercept
163 |         flat = samples - fitted
164 | 
165 |         # Compute the k-sigma rejection threshold
166 |         ngoodpix, mean, sigma = zsc_compute_sigma (flat, badpix, npix)
167 | 
168 |         threshold = sigma * krej
169 | 
170 |         # Detect and reject pixels further than k*sigma from the fitted line
171 |         lcut = -threshold
172 |         hcut = threshold
173 |         below = numpy.where(flat < lcut)
174 |         above = numpy.where(flat > hcut)
175 | 
176 |         badpix[below] = BAD_PIXEL
177 |         badpix[above] = BAD_PIXEL
178 | 
179 |         # Convolve with a kernel of length ngrow
180 |         kernel = numpy.ones(ngrow,dtype="int32")
181 |         badpix = numpy.convolve(badpix, kernel, mode='same')
182 | 
183 |         ngoodpix = len(numpy.where(badpix == GOOD_PIXEL)[0])
184 | 
185 |         niter += 1
186 | 
187 |     # Transform the line coefficients back to the X range [0:npix-1]
188 |     zstart = intercept - slope
189 |     zslope = slope * xscale
190 | 
191 |     return ngoodpix, zstart, zslope
192 | 
193 | 
194 | def zsc_compute_sigma (flat, badpix, npix):
195 | 
196 |     # Compute the rms deviation from the mean of a flattened array.
197 |     # Ignore rejected pixels
198 | 
199 |     # Accumulate sum and sum of squares
200 |     goodpixels = numpy.where(badpix == GOOD_PIXEL)
201 |     sumz = flat[goodpixels].sum()
202 |     sumsq = (flat[goodpixels]*flat[goodpixels]).sum()
203 |     ngoodpix = len(goodpixels[0])
204 |     if ngoodpix == 0:
205 |         mean = None
206 |         sigma = None
207 |     elif ngoodpix == 1:
208 |         mean = sumz
209 |         sigma = None
210 |     else:
211 |         mean = sumz / ngoodpix
212 |         temp = sumsq / (ngoodpix - 1) - sumz*sumz / (ngoodpix * (ngoodpix - 1))
213 |         if temp < 0:
214 |             sigma = 0.0
215 |         else:
216 |             sigma = math.sqrt (temp)
217 | 
218 |     return ngoodpix, mean, sigma


--------------------------------------------------------------------------------
/pdr/pds4_tools/reader/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import pds4_read
2 | 


--------------------------------------------------------------------------------
/pdr/pds4_tools/reader/read_headers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import unicode_literals
  5 | 
  6 | from .header_objects import HeaderStructure
  7 | 
  8 | 
  9 | def _read_header_byte_data(header_structure):
 10 |     """ Reads the byte data from the data file for a PDS4 Header.
 11 | 
 12 |     Determines, from the structure's meta data, the relevant start and stop bytes in the data file prior to
 13 |     reading.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     header_structure : HeaderStructure
 18 |         The PDS4 Header data structure for which the byte data needs to be read. Should have been
 19 |         initialized via `HeaderStructure.from_file` method, or contain the required meta data.
 20 | 
 21 |     Returns
 22 |     -------
 23 |     str or bytes
 24 |         The exact byte data for the header.
 25 |     """
 26 | 
 27 |     from .core import read_byte_data
 28 | 
 29 |     meta_data = header_structure.meta_data
 30 | 
 31 |     start_byte = meta_data['offset']
 32 |     stop_byte = start_byte + meta_data['object_length']
 33 | 
 34 |     return read_byte_data(header_structure.parent_filename, start_byte, stop_byte)
 35 | 
 36 | 
 37 | def new_header(input, **structure_kwargs):
 38 |     """ Create an header structure from PDS-compliant data.
 39 | 
 40 |     Parameters
 41 |     ----------
 42 |     input : bytes, str or unicode
 43 |         A string or bytes containing the data for header.
 44 |     structure_kwargs :  dict, optional
 45 |         Keywords that are passed directly to the `HeaderStructure` constructor.
 46 | 
 47 |     Returns
 48 |     -------
 49 |     HeaderStructure
 50 |         An object representing the PDS4 header structure. The data attribute will contain *input*.
 51 |         Other attributes may be specified via *structure_kwargs*.
 52 |     """
 53 | 
 54 |     # Create the HeaderStructure
 55 |     header_structure = HeaderStructure(**structure_kwargs)
 56 |     header_structure.data = input
 57 | 
 58 |     return header_structure
 59 | 
 60 | 
 61 | def read_header_data(header_structure):
 62 |     """
 63 |     Reads the data for a single PDS4 header structure, modifies *header_structure* to contain said data.
 64 | 
 65 |     Parameters
 66 |     ----------
 67 |     header_structure : HeaderStructure
 68 |         The PDS4 Header data structure to which the data should be added.
 69 | 
 70 |     Returns
 71 |     -------
 72 |     None
 73 |     """
 74 | 
 75 |     header_byte_data = _read_header_byte_data(header_structure)
 76 | 
 77 |     header_structure.data = new_header(header_byte_data).data
 78 | 
 79 | 
 80 | def read_header(full_label, header_label, data_filename, lazy_load=False, decode_strings=False):
 81 |     """ Create the `HeaderStructure`, containing label, data and meta data for a PDS4 Header from a file.
 82 | 
 83 |     Headers refer to PDS4 header data structures, which typically describe a portion of the data that serves
 84 |     as a header for some other data format.
 85 | 
 86 |     Parameters
 87 |     ----------
 88 |     full_label : Label
 89 |         The entire label for a PDS4 product, from which *header_label* originated.
 90 |     header_label : Label
 91 |         Portion of label that defines the PDS4 header data structure.
 92 |     data_filename : str or unicode
 93 |         Filename, including the full path, of the data file that contains the data for this header.
 94 |     lazy_load : bool, optional
 95 |         If True, does not read-in the data of this header until the first attempt to access it.
 96 |         Defaults to False.
 97 |     decode_strings : bool, optional
 98 |         If True, the header data will be decoded to the ``unicode`` type in Python 2, and to the
 99 |         ``str`` type in Python 3. If False, leaves said data as a byte string. Defaults to False.
100 | 
101 |     Returns
102 |     -------
103 |     HeaderStructure
104 |         An object representing the header; contains its label, data and meta data
105 | 
106 |     Raises
107 |     ------
108 |     TypeError
109 |         Raised if called on a non-header according to *header_label*.
110 |     """
111 | 
112 |     # Skip over data structure if its not actually an Array
113 |     if 'Header' not in header_label.tag:
114 |         raise TypeError('Attempted to read_header() on a non-header: ' + header_label.tag)
115 | 
116 |     # Create the data structure for this array
117 |     header_structure = HeaderStructure.from_file(data_filename, header_label, full_label,
118 |                                                  lazy_load=lazy_load, decode_strings=decode_strings)
119 | 
120 |     return header_structure
121 | 


--------------------------------------------------------------------------------
/pdr/pds4_tools/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/pds4_tools/utils/__init__.py


--------------------------------------------------------------------------------
/pdr/pds4_tools/utils/compat.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | from __future__ import unicode_literals
 5 | 
 6 | import inspect
 7 | from xml.etree import ElementTree as ET
 8 | 
 9 | import numpy as np
10 | 
11 | from ..extern import six
12 | 
13 | # OrderedDict compat (Python 2.7+ and 3.1+)
14 | try:
15 |     from collections import OrderedDict
16 | except ImportError:
17 |     from ..extern.ordered_dict import OrderedDict
18 | 
19 | # ArgParse compat (Python 2.7+ and 3.2+)
20 | try:
21 |     import argparse
22 | except ImportError:
23 |     from ..extern import argparse
24 | 
25 | # ElementTree compat (Python 2.7+ and 3.3+)
26 | ET_Element = ET.Element if isinstance(ET.Element, six.class_types) else ET._Element
27 | ET_Tree_iter = ET.ElementTree.iter if hasattr(ET.ElementTree, 'iter') else ET.ElementTree.getiterator
28 | ET_Element_iter = ET_Element.iter if hasattr(ET_Element, 'iter') else ET_Element.getiterator
29 | ET_ParseError = ET.ParseError if hasattr(ET, 'ParseError') else None
30 | 
31 | # NumPy compat (NumPy 2.0+)
32 | NUMPY_LT_2_0 = np.__version__.startswith(('0.', '1.'))
33 | 
34 | try:
35 |     np_unicode = np.unicode_
36 | except AttributeError:
37 |     np_unicode = np.str_
38 | 
39 | try:
40 |     np_issubclass = np.issubclass_
41 | except AttributeError:
42 |     np_issubclass = issubclass
43 | 
44 | 
45 | # signature.bind(...).arguments compat (Python 3.3+)
46 | def bind_arguments(func, *args, **kwargs):
47 |     # Python 3.3+
48 |     try:
49 |         signature = inspect.signature(func)
50 |         arguments = signature.bind(*args, **kwargs).arguments
51 |     except AttributeError:
52 |         # Python 2.7+
53 |         try:
54 |             arguments = inspect.getcallargs(func, *args, **kwargs)
55 |             defaults = inspect.getcallargs(func, (), ())
56 |             for arg in arguments.keys():
57 |                 if (defaults[arg] == arguments[arg]) and (arg not in kwargs):
58 |                     del arguments[arg]
59 |         except AttributeError:
60 |             arguments = kwargs
61 | 
62 |     return arguments
63 | 


--------------------------------------------------------------------------------
/pdr/pds4_tools/utils/constants.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | from __future__ import unicode_literals
 5 | 
 6 | # PDS4 namespace URIs and default corresponding prefixes. Contains only those that have required
 7 | # special usage in the code, and thus must be known.
 8 | PDS4_NAMESPACES = {'pds': 'http://pds.nasa.gov/pds4/pds/v1',
 9 |                    'disp': 'http://pds.nasa.gov/pds4/disp/v1',
10 |                    'sp': 'http://pds.nasa.gov/pds4/sp/v1'}
11 | 
12 | # PDS4 root element names for labels that could contain file areas with supported data structures
13 | PDS4_DATA_ROOT_ELEMENTS = ['Product_Observational',
14 |                            'Product_Ancillary',
15 |                            'Product_Browse',
16 |                            'Product_Collection']
17 | 
18 | # PDS4 file area names that could contain supported data structures
19 | PDS4_DATA_FILE_AREAS = ['File_Area_Observational',
20 |                         'File_Area_Observational_Supplemental',
21 |                         'File_Area_Ancillary',
22 |                         'File_Area_Browse',
23 |                         'File_Area_Inventory']
24 | 
25 | # PDS4 table types that are supported data structures, and subclasses (which should be supported by default
26 | # since are subclasses) there of
27 | PDS4_TABLE_TYPES = ['Table_Character', 'Table_Binary', 'Table_Delimited', 'Inventory']
28 | 


--------------------------------------------------------------------------------
/pdr/pds4_tools/utils/exceptions.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | from __future__ import unicode_literals
 5 | 
 6 | from .deprecation import PDS4ToolsDeprecationWarning
 7 | 
 8 | 
 9 | class PDS4StandardsException(Exception):
10 |     """ Custom exception thrown when PDS4 Standards are violated. """
11 |     pass
12 | 


--------------------------------------------------------------------------------
/pdr/pil_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utilities for dealing with 'desktop'-format images using pillow.
  3 | 
  4 | TODO: not all of this ultimately goes here. Also, we might want to use opencv
  5 |  for some things instead.
  6 | """
  7 | from io import BytesIO
  8 | import re
  9 | from pathlib import Path
 10 | from typing import Any, Union, Mapping
 11 | from xml.etree import ElementTree
 12 | 
 13 | from dustgoggles.func import constant
 14 | from dustgoggles.structures import dig_for_keys
 15 | from multidict import MultiDict
 16 | 
 17 | try:
 18 |     from PIL import Image
 19 |     from PIL.ExifTags import GPSTAGS, TAGS
 20 |     from PIL.ImageCms import ImageCmsProfile
 21 |     from PIL.TiffTags import lookup
 22 | except ImportError:
 23 |     raise ModuleNotFoundError
 24 | 
 25 | NS_PATTERN = re.compile("{.*?}")
 26 | 
 27 | 
 28 | def unpack_icp(icp_blob: bytes):
 29 |     unpacked = {}
 30 |     for attr in dir((icp := ImageCmsProfile(BytesIO(icp_blob)).profile)):
 31 |         if attr.startswith("__"):
 32 |             continue
 33 |         if callable((obj := getattr(icp, attr))):
 34 |             continue
 35 |         unpacked[attr] = obj
 36 |     return unpacked
 37 | 
 38 | 
 39 | def add_gps_ifd(im: Image, gps_tagname: int):
 40 |     gpsdict = im.getexif().get_ifd(gps_tagname)
 41 |     return {GPSTAGS[k].replace('GPS', ''): v for k, v in gpsdict.items()}
 42 | 
 43 | 
 44 | def get_image_metadata(im: Image):
 45 |     outdict = {}
 46 |     meta = list(im.getexif().items())
 47 |     if hasattr(im, "mpinfo"):
 48 |         meta += list(im.mpinfo.items())
 49 |     for tag, val in meta:
 50 |         if tag in TAGS.keys():
 51 |             name = TAGS[tag]
 52 |         elif (
 53 |             im.format in ("TIFF", "MPO")
 54 |             and (tname := lookup(tag).name) != "unknown"
 55 |         ):
 56 |             name = tname
 57 |         else:
 58 |             name = str(tag)
 59 |         if name == 'GPSInfo':
 60 |             outdict |= add_gps_ifd(im, tag)
 61 |         elif name == 'XMLPacket':
 62 |             outdict[name] = unpack_xml(ElementTree.fromstring(val))
 63 |         elif name == 'InterColorProfile':
 64 |             outdict[name] = unpack_icp(val)
 65 |         else:
 66 |             outdict[name] = val
 67 |     return outdict
 68 | 
 69 | 
 70 | def strip_ns(tag):
 71 |     return NS_PATTERN.sub("", tag)
 72 | 
 73 | 
 74 | def maybestrip_ns(obj, do_remove):
 75 |     text = obj.tag if isinstance(obj, ElementTree.Element) else obj
 76 |     return text if do_remove is False else strip_ns(text)
 77 | 
 78 | 
 79 | def pick_text_attrib(node, remove_ns=True):
 80 |     has_text = node.text is not None and node.text.strip() != ''
 81 |     if has_text and len(node) > 0:
 82 |         raise SyntaxError(
 83 |             f"Can't parse text-containing parent node {node.tag}"
 84 |         )
 85 |     has_attrib = len(node.attrib) != 0
 86 |     if has_text is has_attrib is False:
 87 |         return None
 88 |     if has_attrib is False:
 89 |         return node.text.strip()
 90 |     attrib = {
 91 |         maybestrip_ns(k, remove_ns): v for k, v in node.attrib.items()
 92 |     }
 93 |     if has_text is True:
 94 |         return {'attrib': attrib, 'text': node.text.strip()}
 95 |     return attrib
 96 | 
 97 | 
 98 | def paramdig(unpacked: Mapping) -> tuple[Mapping, list[str]]:
 99 |     return unpacked, dig_for_keys(
100 |         unpacked, None, base_pred=constant(True), mtypes=(MultiDict, dict)
101 |     )
102 | 
103 | 
104 | # TODO: probably want more!
105 | IMAGE_META_ATTRS = (
106 |     'mode',
107 |     'size',
108 |     'width',
109 |     'height',
110 |     'format',
111 |     'format_description',
112 |     'n_frames',
113 | )
114 | 
115 | 
116 | def unpack_xml(root: ElementTree.Element, remove_ns: bool = True) -> Any:
117 |     pick = pick_text_attrib(root, remove_ns)
118 |     if len(root) == 0:
119 |         return pick
120 |     if pick is not None:
121 |         # should only ever be dict or None for a non-terminal node
122 |         xmd = MultiDict(pick)
123 |     else:
124 |         xmd = MultiDict()
125 |     for node in root:
126 |         unpacked = unpack_xml(node, remove_ns)
127 |         if unpacked is None or len(unpacked) == 0:
128 |             continue
129 |         xmd.add(maybestrip_ns(node, remove_ns), unpacked)
130 |     return xmd
131 | 
132 | 
133 | # TODO, maybe: decode ImageResources (see kings_river_canyon.tiff)
134 | def skim_image_data(fn: Union[str, Path]) -> dict:
135 |     im, meta = Image.open(fn), {'fn': str(fn)}
136 |     for attr in IMAGE_META_ATTRS:
137 |         if (val := getattr(im, attr, None)) is None:
138 |             continue
139 |         meta[attr] = val
140 |     meta['mimetype'] = Image.MIME[meta['format']]
141 |     if (pal := getattr(im, 'palette', None)) is not None:
142 |         # TODO, maybe: I hate that they use the color as the key and the
143 |         #  palette index as the value, but keeping it now for compatibility
144 |         meta['palette'] = pal.colors
145 |     # NOTE: this looks at TIFF tags for TIFFs by default
146 |     return meta | get_image_metadata(im)
147 | 


--------------------------------------------------------------------------------
/pdr/pvl_utils.py:
--------------------------------------------------------------------------------
 1 | """utilities for working with the `pvl` library."""
 2 | from functools import cache
 3 | 
 4 | try:
 5 |     import pvl
 6 |     import pvl.decoder
 7 |     import pvl.grammar
 8 | except ImportError:
 9 |     raise ModuleNotFoundError(
10 |         "pvl is not installed. Please install pvl to parse PVL labels with it."
11 |     )
12 | 
13 | 
14 | class TimelessOmniDecoder(pvl.decoder.OmniDecoder):
15 |     """"""
16 |     def __init__(self, *args, **kwargs):
17 |         super().__init__(*args, grammar=pvl.grammar.OmniGrammar(), **kwargs)
18 | 
19 |     def decode_datetime(self, value: str):
20 |         raise ValueError
21 | 
22 | 
23 | @cache
24 | def cached_pvl_load(reference):
25 |     """"""
26 |     import pvl
27 | 
28 |     return pvl.load(reference, decoder=TimelessOmniDecoder())
29 | 


--------------------------------------------------------------------------------
/pdr/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/__init__.py


--------------------------------------------------------------------------------
/pdr/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Union
 3 | 
 4 | import numpy as np
 5 | import pytest
 6 | 
 7 | from dustgoggles.tracker import Tracker
 8 | from pdr.tests.objects import (
 9 |     STUB_IMAGE_LABEL,
10 |     STUB_BINARY_TABLE_LABEL,
11 |     STUB_DSV_TABLE_LABEL,
12 | )
13 | 
14 | 
15 | @pytest.fixture(scope="session")
16 | def tracker_factory(tmp_path_factory):
17 |     tracker_log_dir = tmp_path_factory.mktemp("tracker_logs", numbered=False)
18 | 
19 |     def make_tracker(path):
20 |         return Tracker(path.name.replace(".", "_"), outdir=tracker_log_dir)
21 | 
22 |     return make_tracker
23 | 
24 | 
25 | def make_product(
26 |     dir: Path,
27 |     name: str,
28 |     content: Union[np.ndarray, bytes, str],
29 |     label: str,
30 |     **extra_label_params: Union[str, int]
31 | ):
32 |     if isinstance(content, np.ndarray):
33 |         content = content.tobytes()
34 |         mode = "wb"
35 |     elif isinstance(content, bytes):
36 |         mode = "wb"
37 |     else:
38 |         mode = "w"
39 | 
40 |     label = label.format(product_name=name, **extra_label_params)
41 | 
42 |     fpath = dir / (name + ".QQQ")
43 |     lpath = dir / (name + ".LBL")
44 | 
45 |     with fpath.open(mode) as stream:
46 |         stream.write(content)
47 |     with lpath.open("w") as stream:
48 |         stream.write(label)
49 |     return (name, fpath, lpath)
50 | 
51 | 
52 | @pytest.fixture(scope="session")
53 | def products_dir(tmp_path_factory):
54 |     return tmp_path_factory.mktemp("products", numbered=False)
55 | 
56 | 
57 | @pytest.fixture(scope="session")
58 | def uniband_image_product(products_dir):
59 |     zeros = np.zeros((100, 100), dtype=np.uint8)
60 |     return make_product(
61 |         products_dir, "UB-IMG-PROD", zeros, STUB_IMAGE_LABEL, bands=1
62 |     )
63 | 
64 | 
65 | @pytest.fixture(scope="session")
66 | def multiband_image_product(products_dir):
67 |     zeros = np.zeros((100, 100, 3), dtype=np.uint8)
68 |     return make_product(
69 |         products_dir, "MB-IMG-PROD", zeros, STUB_IMAGE_LABEL, bands=3
70 |     )
71 | 
72 | 
73 | @pytest.fixture(scope="session")
74 | def binary_table_product(products_dir):
75 |     dtype = np.dtype([("x", np.uint8), ("y", np.float32), ("z", np.float64)])
76 |     row = np.array([(1, 4.4, 8.8)], dtype=dtype)
77 |     table = np.tile(row, 10)
78 |     return make_product(
79 |         products_dir, "BIN-TBL-PROD", table, STUB_BINARY_TABLE_LABEL
80 |     )
81 | 
82 | 
83 | @pytest.fixture(scope="session")
84 | def dsv_table_product(products_dir):
85 |     table = "5.5| cat| -12\r\n" * 10
86 |     return make_product(
87 |         products_dir, "DSV-TBL-PROD", table, STUB_DSV_TABLE_LABEL
88 |     )
89 | 


--------------------------------------------------------------------------------
/pdr/tests/data/F187B51_cycle_3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/F187B51_cycle_3.gif


--------------------------------------------------------------------------------
/pdr/tests/data/Simple_Animated_Clock.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/Simple_Animated_Clock.webp


--------------------------------------------------------------------------------
/pdr/tests/data/catseye_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/catseye_1.png


--------------------------------------------------------------------------------
/pdr/tests/data/concert.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/concert.jpeg


--------------------------------------------------------------------------------
/pdr/tests/data/kings_river_canyon.tiff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/kings_river_canyon.tiff


--------------------------------------------------------------------------------
/pdr/tests/data/squirrel.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/squirrel.jpg


--------------------------------------------------------------------------------
/pdr/tests/data/weather.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/weather.bmp


--------------------------------------------------------------------------------
/pdr/tests/objects.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | 
  4 | def takes_a_few_things(a, b, c, *, d: Optional[int] = 1, e=5, **_):
  5 |     return a + b + c + d + e
  6 | 
  7 | 
  8 | def takes_x_only(x):
  9 |     return x + 1
 10 | 
 11 | 
 12 | STUB_BINARY_TABLE_LABEL = """
 13 | ^TABLE          = "{product_name}.QQQ"
 14 | RECORD_TYPE     = STREAM
 15 | FILE_RECORDS    = 10
 16 | RECORD_BYTES    = 13
 17 | OBJECT           = TABLE
 18 |     INTERCHANGE_FORMAT      = BINARY
 19 |     ROWS                    = 10
 20 |     ROW_BYTES               = 13
 21 |     COLUMNS                 = 3
 22 |     OBJECT                  = COLUMN
 23 |         NAME                = "X"
 24 |         DATA_TYPE           = "UNSIGNED_INTEGER"
 25 |         START_BYTE          = 1
 26 |         BYTES               = 1
 27 |     END_OBJECT              = COLUMN
 28 |     OBJECT                  = COLUMN
 29 |         NAME                = "Y"
 30 |         DATA_TYPE           = "PC_REAL"
 31 |         START_BYTE          = 2
 32 |         BYTES               = 4
 33 |     END_OBJECT              = COLUMN
 34 |     OBJECT                  = COLUMN
 35 |         NAME                = "X"
 36 |         DATA_TYPE           = "PC_REAL"
 37 |         START_BYTE          = 6
 38 |         BYTES               = 8
 39 |     END_OBJECT              = COLUMN
 40 | END_OBJECT                  = TABLE
 41 | END
 42 | """
 43 | 
 44 | STUB_DSV_TABLE_LABEL = """
 45 | ^SPREADSHEET          = "{product_name}.QQQ"
 46 | RECORD_TYPE     = STREAM
 47 | FILE_RECORDS    = 10
 48 | RECORD_BYTES    = 17
 49 | OBJECT           = SPREADSHEET
 50 |     INTERCHANGE_FORMAT      = ASCII
 51 |     ROWS                    = 10
 52 |     FIELD_DELIMITER         = VERTICAL_BAR
 53 |     COLUMNS                 = 3
 54 |     OBJECT                  = COLUMN
 55 |         NAME                = "X"
 56 |         DATA_TYPE           = "ASCII_INTEGER"
 57 |     END_OBJECT              = COLUMN
 58 |     OBJECT                  = COLUMN
 59 |         NAME                = "Y"
 60 |         DATA_TYPE           = "ASCII_REAL"
 61 |     END_OBJECT              = COLUMN
 62 |     OBJECT                  = COLUMN
 63 |         NAME                = "X"
 64 |         DATA_TYPE           = "ASCII_REAL"
 65 |     END_OBJECT              = COLUMN
 66 | END_OBJECT                  = TABLE
 67 | END
 68 | """
 69 | 
 70 | 
 71 | STUB_IMAGE_LABEL = """
 72 | ^IMAGE = "{product_name}.QQQ"
 73 | SPACECRAFT_NAME = "ORBITER"
 74 | OBJECT       = IMAGE
 75 |     INTERCHANGE_FORMAT              = BINARY
 76 |     LINES                           = 100
 77 |     LINE_SAMPLES                    = 100
 78 |     SAMPLE_TYPE                     = LSB_UNSIGNED_INTEGER
 79 |     SAMPLE_BITS                     = 8
 80 |     BANDS                           = {bands}
 81 |     BAND_STORAGE_TYPE               = BAND_SEQUENTIAL
 82 |     FIRST_LINE                      = 1
 83 |     FIRST_LINE_SAMPLE               = 1
 84 |     SAMPLE_BIT_MASK                 = 2#0111111111111111#
 85 |     INVALID_CONSTANT                = 0
 86 |     MISSING_CONSTANT                = 0
 87 | END_OBJECT       = IMAGE
 88 | END
 89 | """
 90 | 
 91 | SILLY_LABEL = """
 92 | PDS_VERSION_ID                    = NO
 93 | /* FILE DATA ELEMENTS */
 94 | RECORD_TYPE                         = ABSOLUTELY_NOT
 95 | RECORD_BYTES                        = 1000000
 96 | FILE_RECORDS                        = -1
 97 | /* pointer to CAT */
 98 | ^CAT           = "MEOW.CAT"   /* 0:SPECTRUM IR; 1:IMAGE */  
 99 | CAT_NAME             = LILY
100 | SOME_PARAMETER              = "1000" /* h h h   h i! */ 
101 | OTHER_CATS                    = {
102 | "this_one"}
103 | DESCRIPTION                   = "This is a really
104 | nice cat. MONTMORILLONITE = 100.
105 |  Great cat"
106 | /* Misidentification Data Elements */
107 | NOTHING:FF         = "B"
108 | MEOW_SEQUENCE_NUMBERS         = (1, 2,      
109 | 3, 4, "5"
110 | )
111 | /* Coordinate System State: Tail */
112 | 
113 | 
114 | GROUP                              = TAIL_COORDINATE_SYSTEM_PARMS
115 |  COORDINATE_SYSTEM_NAME              = TAIL_FRAME
116 |  OBJECT                             =  TIP_OF_TAIL_FORMAT    
117 |     POINTINESS                       = 12
118 |  END_OBJECT                          = TAIL_TIP_FORMAT
119 |  COORDINATE_SYSTEM_INDEX_NAME        = ("CURL", "FUR", "POSE")
120 |  ARTICULATION_DEVICE_ANGLE           = ( -0.000045 <rad>, -0.785042 <rad> ) 
121 | END_GROUP                          = I_FORGOT
122 | END
123 | """
124 | 
125 | BLOCK_TEXT = """OBJECT                            = IMAGE
126 |   INTERCHANGE_FORMAT              = BINARY
127 |   LINES                           = 650
128 |   LINE_SAMPLES                    = 350
129 |   SAMPLE_TYPE                     = IEEE_REAL
130 |   SAMPLE_BITS                     = 32
131 |   BANDS                           = 3
132 |   BAND_STORAGE_TYPE               = BAND_SEQUENTIAL
133 |   FIRST_LINE                      = 375
134 |   FIRST_LINE_SAMPLE               = 1
135 |   SAMPLE_BIT_MASK                 = 2#0111111111111111#
136 |   INVALID_CONSTANT                = (0.0,0.0,0.0)
137 |   MISSING_CONSTANT                = (0.0,0.0,0.0)
138 | END_OBJECT                        = IMAGE
139 | """
140 | 
141 | QUBE_BLOCK_TEXT = """OBJECT = SPECTRAL_QUBE
142 |   AXES = 3
143 |   AXIS_NAME = (SAMPLE, LINE, BAND)
144 |   ISIS_STRUCTURE_VERSION_ID = "2.1"
145 |   /* Core Description */
146 |   CORE_ITEMS = (100, 66, 17)
147 |   CORE_NAME = "CALIBRATED SPECTRAL RADIANCE"
148 |   CORE_ITEM_BYTES = 4
149 |   CORE_ITEM_TYPE = IEEE_REAL
150 |   CORE_BASE = 0.000000
151 |   CORE_MULTIPLIER = 1.000000
152 |   CORE_UNIT = "uWATT*CM**-2*SR**-1*uM**-1"
153 |   CORE_NULL = -1.0
154 |   CORE_VALID_MINIMUM = 0.0
155 |   CORE_LOW_REPR_SATURATION = -32767.0
156 |   CORE_LOW_INSTR_SATURATION = -32766.0
157 |   CORE_HIGH_REPR_SATURATION = -32765.0
158 |   CORE_HIGH_INSTR_SATURATION = -32764.0
159 |   SUFFIX_ITEMS = (0,0,8)
160 |   BAND_SUFFIX_ITEM_BYTES = 4
161 | END_OBJECT
162 | """
163 | 
164 | # TODO: Can we leave out even more stuff?
165 | MINIMAL_PDS4_LABEL = """<?xml version="1.0" encoding="UTF-8" standalone="no"?>
166 | <?xml-model href="https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1G00.sch"
167 |     schematypens="http://purl.oclc.org/dsdl/schematron"?>
168 | <Product_Observational xmnls="http://pds.nasa.gov/pds4/pds/v1">
169 |   <Identification_Area>
170 |     <logical_identifier>urn:nasa:pds:mc_pdr_testsuite:test_labels:test_minimal_label.dat</logical_identifier>
171 |   </Identification_Area>
172 |   <Observation_Area></Observation_Area>
173 |   <Reference_List></Reference_List>
174 |   <File_Area_Observational></File_Area_Observational>
175 | </Product_Observational>"""
176 | 


--------------------------------------------------------------------------------
/pdr/tests/test_bit_handling.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from pdr.bit_handling import expand_bit_strings
 6 | from pdr.loaders.queries import read_table_structure
 7 | from pdr.parselabel.pds3 import literalize_pvl_block, parse_pvl
 8 | from pdr.pdrtypes import DataIdentifiers
 9 | 
10 | BIT_STUB = """
11 | OBJECT                  = COLUMN
12 |     NAME                = BITS1
13 |     BYTES               = 2
14 |     START_BYTE          = 1
15 |     DATA_TYPE           = "MSB_BIT_STRING"
16 |     OBJECT              = BIT_COLUMN
17 |         NAME            = BITS2
18 |         BIT_DATA_TYPE   = "MSB_INTEGER"
19 |         BITS            = 3
20 |         START_BIT       = 1
21 |     END_OBJECT          = BIT_COLUMN
22 |     OBJECT              = BIT_COLUMN
23 |         NAME            = BITS2
24 |         BIT_DATA_TYPE   = "MSB_INTEGER"
25 |         BITS            = 3
26 |         START_BIT       = 5
27 |     END_OBJECT          = BIT_COLUMN
28 |     OBJECT              = BIT_COLUMN
29 |         NAME            = BITS3
30 |         BIT_DATA_TYPE   = "MSB_INTEGER"
31 |         BITS            = 4
32 |         START_BIT       = 9
33 |     END_OBJECT          = BIT_COLUMN
34 |     OBJECT              = BIT_COLUMN
35 |         NAME            = BITS4
36 |         BIT_DATA_TYPE   = "MSB_INTEGER"
37 |         BITS            = 4
38 |         START_BIT       = 13
39 |     END_OBJECT          = BIT_COLUMN
40 | END_OBJECT              = COLUMN
41 | """
42 | 
43 | NULL_IDENTIFIERS = {field: "" for field in DataIdentifiers.__required_keys__}
44 | 
45 | 
46 | def test_bit_handling():
47 |     block = parse_pvl(BIT_STUB)[0]
48 |     fmtdef = read_table_structure(block, 'TABLE', None, None, NULL_IDENTIFIERS)
49 |     bits = random.choices((0, 1), k=16)
50 |     table = pd.DataFrame(
51 |         {'BITS1': [int("".join(map(str, bits)), 2).to_bytes(2, 'big')]}
52 |     )
53 |     table = expand_bit_strings(table, fmtdef)
54 |     strings = table.loc[0, 'BITS1']
55 |     assert strings[0] == ''.join(map(str, bits[0:3]))
56 |     assert strings[1] == ''.join(map(str, bits[4:7]))
57 |     assert strings[2] == "".join(map(str, bits[8:12]))
58 |     assert strings[3] == "".join(map(str, bits[12:16]))
59 | 


--------------------------------------------------------------------------------
/pdr/tests/test_browsify.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | from pdr.browsify import (
 7 |     find_masked_bounds,
 8 |     find_unmasked_bounds,
 9 |     normalize_range,
10 |     eightbit,
11 |     colorfill_maskedarray,
12 |     browsify,
13 | )
14 | 
15 | import pytest
16 | try:
17 |     from PIL import Image
18 |     pil_available = True
19 | except ImportError:
20 |     pil_available = False
21 | 
22 | RNG = np.random.default_rng()
23 | # NOTE: all these tests have miniscule chances of randomly failing.
24 | 
25 | 
26 | def test_find_masked_bounds():
27 |     array = np.ma.masked_outside(RNG.poisson(10, (1024, 1024)), 1, 20)
28 |     bounds = find_masked_bounds(array, 0, 0)
29 |     assert bounds == (1, 20)
30 |     bounds2 = find_masked_bounds(array, 10, 10)
31 |     assert bounds2[0] > 1
32 |     assert bounds2[1] < 20
33 | 
34 | 
35 | def test_find_unmasked_bounds():
36 |     array, _ = np.indices((100, 100))
37 |     bounds = find_unmasked_bounds(array, 0, 0)
38 |     assert bounds == (0, 99)
39 |     bounds2 = find_unmasked_bounds(array, 10, 10)
40 |     assert bounds2[0] == 9
41 |     assert bounds2[1] == 89
42 | 
43 | 
44 | def test_normalize_range():
45 |     array = RNG.poisson(50, (1024, 1024))
46 |     norm = normalize_range(array)
47 |     assert norm.min() == 0
48 |     assert norm.max() == 1
49 |     norm2 = normalize_range(array, clip=10)
50 |     assert norm2.std() > norm.std()
51 | 
52 | 
53 | def test_eightbit():
54 |     array = RNG.poisson(100, (1024, 1024))
55 |     eight = eightbit(array, 10)
56 |     assert eight.min() == 0
57 |     assert eight.max() == 255
58 |     assert eight.dtype == np.dtype("uint8")
59 |     assert eight.std() / eight.mean() > array.std() / array.mean()
60 | 
61 | 
62 | def test_colorfill_maskedarray():
63 |     arr = RNG.poisson(100, (1024, 1024))
64 |     masked = np.ma.masked_outside(arr, 10, 90)
65 |     filled = colorfill_maskedarray(masked)
66 |     assert np.equal(filled[masked.mask], np.array([0, 255, 255])).all()
67 | 
68 | 
69 | def test_browsify_df(tmp_path):
70 |     obj = pd.DataFrame({"a": [1, 2], "b": ["cat", "dog"]})
71 |     browsify(obj, tmp_path / "browse")
72 |     df = pd.read_csv(tmp_path / "browse.csv")
73 |     assert (df["a"] == [1, 2]).all()
74 |     assert (df["b"] == ["cat", "dog"]).all()
75 | 
76 | 
77 | @pytest.mark.skipif(not pil_available, reason="PIL not available")
78 | def test_browsify_array(tmp_path):
79 |     arr = np.ma.masked_outside(RNG.poisson(100, (1024, 1024)), 10, 90)
80 | 
81 |     browsify(arr, tmp_path / "browse")
82 |     im = Image.open(tmp_path / "browse.jpg")
83 |     assert im.size == (1024, 1024)
84 |     # compression artifacts etc. mean it's not precisely equal
85 |     assert (
86 |         np.abs(
87 |             np.subtract(
88 |                 np.asarray(im)[arr.mask], np.array([0, 255, 255])
89 |             ).mean()
90 |         )
91 |         < 5
92 |     )
93 | 


--------------------------------------------------------------------------------
/pdr/tests/test_data.py:
--------------------------------------------------------------------------------
 1 | import pdr
 2 | 
 3 | from pdr.tests.objects import STUB_IMAGE_LABEL
 4 | 
 5 | 
 6 | def test_data_init_basic(uniband_image_product):
 7 |     prod_name, fpath, lpath = uniband_image_product
 8 |     expected_label = STUB_IMAGE_LABEL.format(product_name=prod_name, bands=1)
 9 | 
10 |     data = pdr.read(fpath)
11 |     assert data.LABEL == expected_label
12 |     assert data._target_path('IMAGE') == str(fpath)
13 |     for k, v in data.identifiers.items():
14 |         if k == 'SPACECRAFT_NAME':
15 |             assert v == 'ORBITER'
16 |         else:
17 |             assert v == ''
18 |     assert data.keys() == ["LABEL", "IMAGE"]
19 |     assert data.metaget("^IMAGE") == prod_name + ".QQQ"
20 |     assert data.get_absolute_paths('x')[0] == (fpath.parent / 'x').absolute()
21 |     data2 = pdr.read(lpath)
22 |     assert data.LABEL == data2.LABEL
23 | 


--------------------------------------------------------------------------------
/pdr/tests/test_datatypes.py:
--------------------------------------------------------------------------------
 1 | from itertools import product
 2 | 
 3 | from pdr.datatypes import sample_types
 4 | 
 5 | 
 6 | def test_sample_types():
 7 |     pds3_data_types = (
 8 |         "CHARACTER",
 9 |         "IEEE_REAL",
10 |         "LSB_INTEGER",
11 |         "LSB_UNSIGNED_INTEGER",
12 |         "MSB_INTEGER",
13 |         "MSB_UNSIGNED_INTEGER",
14 |         "PC_REAL",
15 |         "UNSIGNED_INTEGER",
16 |         "VAX_UNSIGNED_INTEGER",
17 |         "ASCII_REAL",
18 |     )
19 |     bit_depths = [1, 2, 4, 8]
20 |     numpy_dtype_strings = []
21 |     for dt, depth in product(pds3_data_types, bit_depths):
22 |         try:
23 |             numpy_dtype_strings.append(sample_types(dt, depth, True))
24 |         except NotImplementedError:
25 |             assert ("REAL" in dt) and (depth in (1, 2))
26 |     expected_dtype_strings = [
27 |         # CHARACTER
28 |         "S1",
29 |         "S2",
30 |         "S4",
31 |         "S8",
32 |         # IEEE_REAL
33 |         ">f",
34 |         ">d",
35 |         # LSB_INTEGER
36 |         "<b",
37 |         "<h",
38 |         "<i4",
39 |         "<i8",
40 |         # LSB_UNSIGNED_INTEGER
41 |         "<B",
42 |         "<H",
43 |         "<u4",
44 |         "<u8",
45 |         # MSB_INTEGER
46 |         ">b",
47 |         ">h",
48 |         ">i4",
49 |         ">i8",
50 |         # MSB_UNSIGNED_INTEGER
51 |         ">B",
52 |         ">H",
53 |         ">u4",
54 |         ">u8",
55 |         # PC_REAL
56 |         "<f",
57 |         "<d",
58 |         # UNSIGNED_INTEGER
59 |         ">B",
60 |         ">H",
61 |         ">u4",
62 |         ">u8",
63 |         # VAX_UNSIGNED_INTEGER
64 |         "<B",
65 |         "<H",
66 |         "<u4",
67 |         "<u8",
68 |         # ASCII_REAL
69 |         "S1",
70 |         "S2",
71 |         "S4",
72 |         "S8",
73 |     ]
74 |     assert numpy_dtype_strings == expected_dtype_strings
75 | 


--------------------------------------------------------------------------------
/pdr/tests/test_func.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from dustgoggles.tracker import TrivialTracker
  4 | 
  5 | from pdr.func import (
  6 |     call_kwargfiltered,
  7 |     filterkwargs,
  8 |     get_argnames,
  9 |     get_all_argnames,
 10 |     get_non_optional_argnames,
 11 |     sigparams,
 12 |     sig_union,
 13 |     softquery,
 14 |     specialize,
 15 | )
 16 | from pdr.tests.objects import takes_a_few_things, takes_x_only
 17 | 
 18 | 
 19 | def test_filterkwargs():
 20 |     assert filterkwargs(
 21 |         takes_a_few_things,
 22 |         {"b": 1, "e": 2, "irrelevant": "now is the winter of our..."},
 23 |     ) == {"b": 1, "e": 2}
 24 | 
 25 | 
 26 | def test_call_kwargfiltered():
 27 |     assert call_kwargfiltered(takes_x_only, **{"x": 1, "y": 2}) == 2
 28 | 
 29 | 
 30 | def test_sigparams():
 31 |     sig1 = sigparams(takes_x_only)
 32 |     sig2 = sigparams(takes_a_few_things)
 33 |     assert {p.name for p in sig1} == {"x"}
 34 |     assert {p.name for p in sig2} == {"_", "a", "b", "c", "d", "e"}
 35 | 
 36 | 
 37 | def test_sig_union():
 38 |     union = sig_union(takes_x_only, takes_a_few_things)
 39 |     assert {p.name for p in union.parameters.values()} == {
 40 |         "_",
 41 |         "a",
 42 |         "b",
 43 |         "c",
 44 |         "d",
 45 |         "e",
 46 |         "x",
 47 |     }
 48 | 
 49 |     def dispatch(a, b, c, x, *, d=1, e=5, **_):
 50 |         if x < 5:
 51 |             return takes_a_few_things(a, b, c, d=d, e=e)
 52 |         return takes_x_only(x)
 53 | 
 54 |     assert dispatch(1, 2, 3, 4, d=1, e=5) == 12
 55 |     assert dispatch(1, 2, 3, 5, d=1, e=5) == 6
 56 | 
 57 | 
 58 | def test_specialize():
 59 |     def check_big(a):
 60 |         if a > 5:
 61 |             return True, a / 2
 62 |         return False, None
 63 | 
 64 |     ifbig = specialize(takes_a_few_things, check_big)
 65 |     # NOTE: this function cannot filter inappropriate arguments
 66 |     #  passed as positional.
 67 |     assert ifbig(a=1, b=2, c=3, d=1, e=1) == 8
 68 |     assert ifbig(a=8, b=2, c=3, d=1, e=1) == 4
 69 | 
 70 | 
 71 | def test_get_argnames():
 72 |     assert get_argnames(takes_x_only) == {"x"}
 73 | 
 74 | 
 75 | def test_get_non_optional_argnames():
 76 |     assert get_non_optional_argnames(takes_a_few_things) == {
 77 |         "a",
 78 |         "b",
 79 |         "c",
 80 |         "e",
 81 |     }
 82 | 
 83 | 
 84 | def test_get_all_argnames():
 85 |     assert get_all_argnames(takes_x_only, takes_a_few_things) == {
 86 |         "a",
 87 |         "b",
 88 |         "c",
 89 |         "d",
 90 |         "e",
 91 |         "x",
 92 |         "_",
 93 |     }
 94 |     assert get_all_argnames(
 95 |         takes_x_only, takes_a_few_things, nonoptional=True
 96 |     ) == {"a", "b", "c", "e", "x"}
 97 | 
 98 | 
 99 | def test_softquery():
100 |     def b_gen(a):
101 |         return a + 1
102 | 
103 |     def c_gen(a, b, nothing_really: Optional[int] = None):
104 |         return a + b
105 | 
106 |     def f_gen(a, b, c, d, e):
107 |         return a + b + c + d + e
108 | 
109 |     def target(a, b, c, d, e, f, tracker):
110 |         tracker.track()
111 |         return a * b * c * d * e * f
112 | 
113 |     querydict = {'b': b_gen, 'c': c_gen, 'f': f_gen}
114 |     kwargdict = {'a': 5, 'd': 100, 'tracker': TrivialTracker()}
115 |     try:
116 |         # this should fail because the pipeline doesn't generate an 'e' and we
117 |         # don''t have one in kwargdict
118 |         softquery(target, querydict, kwargdict)
119 |         raise TypeError
120 |     except TypeError:
121 |         pass
122 |     kwargdict['e'] = 20
123 |     # result should be:
124 |     # b = a + 1 == 5 + 1 == 6
125 |     # c = a + b == 5 + 6 == 11
126 |     # f = a + b + c + d + e == 5 + 6 + 11 + 100 + 20 == 142
127 |     # then: a * b * c * d * e * f == 5 * 6 * 11 * 100 * 20 * 142 == 93720000
128 |     assert target(**softquery(target, querydict, kwargdict)) == 93720000
129 | 


--------------------------------------------------------------------------------
/pdr/tests/test_image.py:
--------------------------------------------------------------------------------
 1 | import pdr
 2 | 
 3 | 
 4 | def test_image_simple_2d(uniband_image_product, tracker_factory):
 5 |     prod_name, fpath, lpath = uniband_image_product
 6 |     data = pdr.read(fpath, debug=True, tracker=tracker_factory(fpath))
 7 |     assert data.IMAGE.sum() == 0
 8 | 
 9 | 
10 | def test_image_simple_3d(multiband_image_product, tracker_factory):
11 |     prod_name, fpath, lpath = multiband_image_product
12 |     data = pdr.read(fpath, debug=True, tracker=tracker_factory(fpath))
13 |     assert data.IMAGE.sum() == 0
14 | 


--------------------------------------------------------------------------------
/pdr/tests/test_import.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import ast
 3 | 
 4 | 
 5 | def test_delayed_import():
 6 |     imports_to_delay = ['numpy', 'pandas']
 7 |     commands = f"import sys; import pdr; " \
 8 |                f"print(not any(module in sys.modules for module in {imports_to_delay}))"
 9 |     out = run_isolated(commands)
10 |     assert ast.literal_eval(out)
11 | 
12 | 
13 | def run_isolated(commands_for_interpreter):
14 | 
15 |     p = subprocess.run(['python', '-c', commands_for_interpreter],
16 |                        capture_output=True,
17 |                        text=True)
18 |     stdout = p.stdout
19 |     return stdout
20 | 


--------------------------------------------------------------------------------
/pdr/tests/test_loader_helpers.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from itertools import product
 3 | from pathlib import Path
 4 | 
 5 | # noinspection PyProtectedMember
 6 | from pdr.loaders._helpers import (
 7 |     looks_like_ascii,
 8 |     quantity_start_byte,
 9 |     count_from_bottom_of_file,
10 |     _check_delimiter_stream,
11 |     check_explicit_delimiter
12 | )
13 | 
14 | 
15 | def test_looks_like_ascii():
16 |     names = ('SPREADSHEET', 'ASCII_TABLE', 'IMAGE')
17 |     formats = ('ASCII', 'STREAM')
18 |     expected = (True, True, True, True, True, False)
19 |     for (name, format_), value in zip(product(names, formats), expected):
20 |         assert value == looks_like_ascii(
21 |             {'INTERCHANGE_FORMAT': format_}, name
22 |         )
23 | 
24 | 
25 | def test_quantity_start_byte():
26 |     units = "BYTES", "RECORDS"
27 |     record_bytes = 100, None
28 |     expected = 99, 99, 9900, None
29 |     for (unit, rb), ex in zip(product(units, record_bytes), expected):
30 |         assert quantity_start_byte({'units': unit, 'value': 100}, rb) == ex
31 | 
32 | 
33 | def test_count_from_bottom_of_file(tmp_path):
34 |     fn = [tmp_path / 'foo.bin', tmp_path / 'FOO.bin']
35 |     rows = 100
36 |     row_bytes = 256
37 |     with fn[0].open('wb') as stream:
38 |         stream.write(b'\x00' * rows * row_bytes * 2)
39 |     assert (
40 |             count_from_bottom_of_file(fn, rows, row_bytes) == rows * row_bytes
41 |     )
42 | 
43 | 
44 | def test_check_delimiter_stream():
45 |     byte_target = {"units": "BYTES", "value": 19200}
46 |     rec_target = {"units": "RECORDS", "value": 1200}
47 |     identifiers = {
48 |         "SPACECRAFT_ID": "NOSTALGIA_FOR_INFINITY",
49 |         'RECORD_BYTES': 100,
50 |         "ETC": ...,
51 |         'RECORD_TYPE': 'BINARY'
52 |     }
53 |     empty_block = {}
54 |     bytes_block = {"BYTES", 100}
55 |     # should never say a stream with a byte quantity is delimited
56 |     assert _check_delimiter_stream(identifiers, "TABLE", byte_target, empty_block) is False
57 |     assert _check_delimiter_stream(identifiers, "TABLE", ("", byte_target), empty_block) is False
58 |     # should never say a stream with specified record bytes is delimited
59 |     assert _check_delimiter_stream(identifiers, "TABLE", rec_target, empty_block) is False
60 |     identifiers['RECORD_BYTES'] = None
61 |     # should never say a non-STREAM stream is delimited
62 |     assert _check_delimiter_stream(identifiers, "TABLE", rec_target, empty_block) is False
63 |     # should never say something that isn't ASCII/SPREADSHEET/HEADER is delimited
64 |     identifiers['RECORD_TYPE'] = 'STREAM'
65 |     assert _check_delimiter_stream(identifiers, "TABLE", rec_target, empty_block) is False
66 |     # should never say something whose length is declared at the block level in bytes is delimited
67 |     assert _check_delimiter_stream(identifiers, "SPREADSHEET", rec_target, bytes_block) is False
68 |     # if all the above conditions aren't satisfied, should say it's delimited
69 |     assert _check_delimiter_stream(identifiers, "SPREADSHEET", rec_target, empty_block) is True
70 | 
71 | 
72 | def test_check_explicit_delimiter():
73 |     assert check_explicit_delimiter({'FIELD_DELIMITER': 'VERTICAL_BAR'}) == '|'
74 |     assert check_explicit_delimiter({}) == ','
75 |     try:
76 |         check_explicit_delimiter({'FIELD_DELIMITER': 'FENCE'})
77 |         raise KeyError
78 |     except KeyError:
79 |         pass
80 | 


--------------------------------------------------------------------------------
/pdr/tests/test_metadata.py:
--------------------------------------------------------------------------------
 1 | import importlib.util
 2 | 
 3 | from pdr import Metadata
 4 | from pdr.parselabel.pds3 import parse_pvl
 5 | from pdr.tests.objects import SILLY_LABEL
 6 | 
 7 | import pytest
 8 | if importlib.util.find_spec("Levenshtein"):
 9 |     lev_available = True
10 | else:
11 |     lev_available = False
12 | 
13 | 
14 | def test_metadata_1():
15 |     meta = Metadata(parse_pvl(SILLY_LABEL), 'PDS3')
16 |     assert meta.metaget('POINTINESS') == 12
17 |     assert meta.metablock(
18 |         'TAIL_COORDINATE_SYSTEM_PARMS'
19 |     )['ARTICULATION_DEVICE_ANGLE'][0]['units'] == 'rad'
20 |     assert meta.metaget_('MEOW_SEQUENCE_NUMBERS') == (1, 2, 3, 4, '5')
21 | 
22 | 
23 | @pytest.mark.skipif(not lev_available, reason="Levenshtein not available")
24 | def test_fuzzy_metadata():
25 |     meta = Metadata(parse_pvl(SILLY_LABEL), 'PDS3')
26 |     assert meta.metaget_fuzzy('KAT') == 'MEOW.CAT'
27 | 


--------------------------------------------------------------------------------
/pdr/tests/test_np_utils.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | import os
 3 | 
 4 | import numpy as np
 5 | 
 6 | from pdr.np_utils import (
 7 |     make_c_contiguous,
 8 |     casting_to_float,
 9 |     np_from_buffered_io,
10 |     ibm32_to_np_f32,
11 |     ibm64_to_np_f64,
12 |     enforce_order_and_object,
13 | )
14 | 
15 | RNG = np.random.default_rng()
16 | 
17 | 
18 | def test_make_c_contiguous():
19 |     arr = np.arange(0, 100, 5)
20 |     arr = arr[0:-1:2]
21 |     assert arr.flags["C_CONTIGUOUS"] is False
22 |     arr = make_c_contiguous(arr)
23 |     assert arr.flags["C_CONTIGUOUS"] is True
24 | 
25 | 
26 | def test_casting_to_float():
27 |     uint8 = np.arange(0, 100, dtype=np.uint8)
28 |     assert casting_to_float(uint8, 1.1)
29 |     assert not casting_to_float(uint8, 1)
30 | 
31 | 
32 | def test_np_from_buffered_io(tmp_path):
33 |     arr = RNG.poisson(20, (100, 100)).astype(np.uint8)
34 |     fpath = tmp_path / "arr.img.gz"
35 |     with gzip.open(fpath, "wb") as stream:
36 |         stream.write(arr.tobytes())
37 |     with gzip.open(fpath, "rb") as buf:
38 |         in1 = np_from_buffered_io(buf, np.dtype("b"))
39 |         assert np.all(in1.reshape(arr.shape) == arr)
40 |         in2 = np_from_buffered_io(buf, np.dtype("b"), 10, 10)
41 |         assert np.all(in2 == arr.ravel()[10:20])
42 | 
43 | 
44 | def test_enforce_order_and_object():
45 |     gross = np.dtype([("f1", "V4"), ("f2", "i2"), ("f3", ">i2")])
46 |     grossarray = np.array([(b"\x00\x00\x00\x01", 12, 12)], dtype=gross)
47 |     enforced = enforce_order_and_object(grossarray)
48 |     assert np.all(enforced == grossarray)
49 |     assert enforced.dtype[0] == np.dtype("O")
50 |     assert enforced.dtype[2] == np.dtype("i2")
51 |     enforced2 = enforce_order_and_object(np.array([b"\x00"], dtype="V"))
52 |     assert enforced2[0] == b"\x00"
53 |     assert enforced2.dtype == np.dtype("O")
54 |     enforced3 = enforce_order_and_object(np.array([3], dtype=">i2"))
55 |     assert enforced3[0] == 3
56 |     assert enforced3.dtype == np.dtype("i2")
57 |     enforced4 = enforce_order_and_object(np.array([3], dtype=">i2"))
58 |     assert enforced4[0] == 3
59 |     assert enforced4.dtype == np.dtype("i2")
60 | 
61 | 
62 | def test_ibm_to_np():
63 |     assert ibm32_to_np_f32(np.frombuffer(b"\x00\x00\x01\xc2", "i4")) == -1
64 |     assert (
65 |         ibm64_to_np_f64(
66 |             np.frombuffer(b"\x00\x00\x00\x00\x00\x00\x01\xc2", "i8")
67 |         )
68 |         == -1
69 |     )
70 | 


--------------------------------------------------------------------------------
/pdr/tests/test_parselabel_pds3.py:
--------------------------------------------------------------------------------
 1 | from pdr.parselabel.pds3 import parse_pvl
 2 | from pdr.tests.objects import SILLY_LABEL
 3 | 
 4 | 
 5 | def test_parse_label():
 6 |     params, _ = parse_pvl(SILLY_LABEL)
 7 |     assert params['^CAT'] == 'MEOW.CAT'
 8 |     assert params['CAT_NAME'] == 'LILY'
 9 |     assert params[
10 |                'TAIL_COORDINATE_SYSTEM_PARMS'
11 |            ]['TIP_OF_TAIL_FORMAT']['POINTINESS'] == 12
12 |     assert params[
13 |                'TAIL_COORDINATE_SYSTEM_PARMS'
14 |            ]['ARTICULATION_DEVICE_ANGLE'][0] == \
15 |         {'value': -4.5e-05, 'units': 'rad'}
16 | 


--------------------------------------------------------------------------------
/pdr/tests/test_parselabel_pds4.py:
--------------------------------------------------------------------------------
 1 | from pdr.pds4_tools.reader.label_objects import Label
 2 | from pdr.parselabel.pds4 import reformat_pds4_tools_label
 3 | 
 4 | from pdr.tests.objects import MINIMAL_PDS4_LABEL
 5 | 
 6 | 
 7 | # pds4_tools offers no obvious way to parse a Label out of a str,
 8 | # nor from an open file handle (which could be a stringio instance).
 9 | 
10 | def test_parse_label(tmp_path):
11 |     minimal_pds4_label_f = tmp_path / "minimal_pds4.xml"
12 |     with open(minimal_pds4_label_f, "wt") as fp:
13 |         fp.write(MINIMAL_PDS4_LABEL)
14 |     unpacked, params = reformat_pds4_tools_label(
15 |         Label.from_file(minimal_pds4_label_f)
16 |     )
17 |     assert sorted(params) == [
18 |         'File_Area_Observational',
19 |         'Identification_Area',
20 |         'Observation_Area',
21 |         'Product_Observational',
22 |         'Reference_List',
23 |         'logical_identifier'
24 |     ]
25 | 
26 |     PO = unpacked["Product_Observational"]
27 |     assert PO["Observation_Area"] is None
28 |     assert PO["Reference_List"] is None
29 |     assert PO["File_Area_Observational"] is None
30 | 
31 |     IA = PO["Identification_Area"]
32 |     assert IA["logical_identifier"] == "urn:nasa:pds:mc_pdr_testsuite:test_labels:test_minimal_label.dat"
33 | 


--------------------------------------------------------------------------------
/pdr/tests/test_primary_desktop_image.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | import pdr
 6 | 
 7 | IMPATH = Path(__file__).parent / 'data'
 8 | 
 9 | try:
10 |     from PIL import Image
11 |     pil_available = True
12 | except ImportError:
13 |     pil_available = False
14 | 
15 | 
16 | # NOTE: loose value checks in this module are intended to allow for
17 | # differences in environment-level versions of libjpeg etc.
18 | 
19 | 
20 | @pytest.mark.skipif(not pil_available, reason="PIL not available")
21 | def test_simple_primary_jpeg():
22 |     im = pdr.read(IMPATH / 'squirrel.jpg')
23 |     assert abs(im.IMAGE.mean() - 125.5) < 0.5
24 |     assert im.metaget('mode') == 'RGB'
25 |     assert im.metaget('format') == 'JPEG'
26 |     assert im.standard == 'JPEG'
27 | 
28 | 
29 | @pytest.mark.skipif(not pil_available, reason="PIL not available")
30 | def test_phone_camera_mpo():
31 |     im = pdr.read(IMPATH / 'concert.jpeg')
32 |     assert abs(im.IMAGE.mean() - 40 < 0.5)
33 |     assert abs(im.Undefined_1.mean() - 5 < 0.5)
34 |     assert im.metaget(
35 |         'MPEntry'
36 |     )[0]['Attribute']['MPType'] == 'Baseline MP Primary Image'
37 |     assert im.metaget('Model') == 'iPhone 13 Pro Max'
38 |     assert im.metaget('Longitude') == (82.0, 33.0, 3.61)
39 |     assert im.metaget('mode') == 'RGB'
40 |     assert im.standard == 'MPO'
41 | 
42 | 
43 | @pytest.mark.skipif(not pil_available, reason="PIL not available")
44 | def test_simple_tiff():
45 |     im = pdr.read(IMPATH / 'kings_river_canyon.tiff')
46 |     assert abs(im.IMAGE.mean() - 152.6 < 0.5)
47 |     assert im.metaget('mimetype') == 'image/tiff'
48 |     assert im.metaget('mode') == 'L'
49 |     assert im.standard == 'TIFF'
50 | 
51 | 
52 | @pytest.mark.skipif(not pil_available, reason="PIL not available")
53 | def test_anigif():
54 |     im = pdr.read(IMPATH / 'F187B51_cycle_3.gif')
55 |     assert len(im) == 43
56 |     assert abs(im.FRAME_30.mean() - 115.5 < 0.5)
57 |     assert abs(im.FRAME_5.mean() - 1.5 < 0.5)
58 |     assert im.metaget('mode') == 'P'
59 |     assert im.metaget('palette')[(238, 255, 0)] == 0
60 | 
61 | 
62 | @pytest.mark.skipif(not pil_available, reason="PIL not available")
63 | def test_png():
64 |     im = pdr.read(IMPATH / 'catseye_1.png')
65 |     assert abs(im.IMAGE.mean() - 19.4 < 0.5)
66 |     assert im.metaget('mode') == 'RGB'
67 |     assert im.metaget('ExifOffset') == 168
68 | 
69 | 
70 | @pytest.mark.skipif(not pil_available, reason="PIL not available")
71 | def test_bmp():
72 |     im = pdr.read(IMPATH / 'weather.bmp')
73 |     assert abs(im.IMAGE.mean() - 118.6 < 0.5)
74 |     assert im.metaget('mode') == 'RGB'
75 |     assert im.standard == 'BMP'
76 | 
77 | 
78 | @pytest.mark.skipif(not pil_available, reason="PIL not available")
79 | def test_animated_webp():
80 |     im = pdr.read(IMPATH / 'Simple_Animated_Clock.webp')
81 |     assert len(im) == 287
82 |     assert abs(im.FRAME_286.mean() - 1.5 < 0.5)
83 |     assert im.metaget('mode') == 'RGBA'
84 |     assert im.standard == 'WEBP'
85 | 


--------------------------------------------------------------------------------
/pdr/tests/test_primary_fits.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import pdr
 4 | 
 5 | import pytest
 6 | try:
 7 |     from astropy.io import fits
 8 |     fits_available = True
 9 | except ImportError:
10 |     fits_available = False
11 | 
12 | RNG = np.random.default_rng()
13 | 
14 | 
15 | @pytest.mark.skipif(not fits_available, reason="astropy.io.fits not available")
16 | def test_array_roundtrip(tmp_path):
17 |     arr = RNG.poisson(100, (100, 100)).astype(np.uint8)
18 |     hdul = fits.HDUList()
19 |     hdul.append(fits.ImageHDU(arr, name='POISSON'))
20 |     hdul.writeto(tmp_path / 'temp.fits')
21 |     data = pdr.read(tmp_path / 'temp.fits')
22 |     assert data.keys() == ['POISSON']
23 |     assert np.all(data.POISSON == arr)
24 | 


--------------------------------------------------------------------------------
/pdr/tests/test_queries.py:
--------------------------------------------------------------------------------
 1 | from pdr.parselabel.pds3 import parse_pvl, literalize_pvl
 2 | from pdr.loaders.queries import (
 3 |     generic_image_properties,
 4 |     get_qube_band_storage_type,
 5 |     generic_qube_properties,
 6 |     extract_axplane_metadata,
 7 | )
 8 | 
 9 | from pdr.tests.objects import BLOCK_TEXT, QUBE_BLOCK_TEXT
10 | 
11 | 
12 | def basesamp():
13 |     block = literalize_pvl(parse_pvl(BLOCK_TEXT)[0]["IMAGE"])
14 |     base = base_sample_info(block)
15 |     assert base == {"BYTES_PER_PIXEL": 4, "SAMPLE_TYPE": "IEEE_REAL"}
16 |     assert im_sample_type(base) == ">"
17 | 
18 | 
19 | def test_generic_properties():
20 |     block = parse_pvl(BLOCK_TEXT)[0]["IMAGE"]
21 |     props = generic_image_properties(block, ">f")
22 |     assert props == {
23 |         "BYTES_PER_PIXEL": 4,
24 |         "is_vax_real": False,
25 |         "sample_type": ">f",
26 |         "nrows": 650,
27 |         "ncols": 350,
28 |         "nbands": 3,
29 |         "band_storage_type": "BAND_SEQUENTIAL",
30 |         "rowpad": 0,
31 |         "colpad": 0,
32 |         "bandpad": 0,
33 |         "linepad": 0,
34 |     }
35 | 
36 | 
37 | def test_qube_props():
38 |     params, _ = parse_pvl(QUBE_BLOCK_TEXT)
39 |     qube_block = params["SPECTRAL_QUBE"]
40 |     band_storage_type = get_qube_band_storage_type(qube_block)
41 |     props = generic_qube_properties(qube_block, band_storage_type)
42 |     assert props == {
43 |         "BYTES_PER_PIXEL": 4,
44 |         "sample_type": ">f",
45 |         "axnames": ("SAMPLE", "LINE", "BAND"),
46 |         "ncols": 100,
47 |         "nrows": 66,
48 |         "nbands": 17,
49 |         "band_storage_type": "BAND_SEQUENTIAL",
50 |         "rowpad": 0,
51 |         "colpad": 0,
52 |         "bandpad": 8,
53 |         "suffix_bands": 8,
54 |         "linepad": 0,
55 |         "is_vax_real": False,
56 |     }
57 |     assert extract_axplane_metadata(qube_block, props) == {
58 |         "rowpad": 0,
59 |         "colpad": 0,
60 |         "bandpad": 8,
61 |         "suffix_bands": 8,
62 |     }
63 | 


--------------------------------------------------------------------------------
/pdr/tests/test_scaling.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import pdr
 4 | from pdr._scaling import find_special_constants
 5 | from pdr.parselabel.pds3 import parse_pvl
 6 | 
 7 | RNG = np.random.default_rng()
 8 | 
 9 | STUB = """
10 | OBJECT =                    IMAGE
11 |     INVALID_CONSTANT =      33
12 | END_OBJECT
13 | END
14 | """
15 | 
16 | 
17 | def test_find_special_constants():
18 |     meta = pdr.Metadata(parse_pvl(STUB), 'PDS3')
19 |     arr = RNG.choice(np.array([33, -32766, 100]), (100, 100))
20 |     specials = find_special_constants(meta, arr.astype(np.int16), 'IMAGE')
21 |     assert specials == {"INVALID_CONSTANT": 33, "ISIS_LOW_INST_SAT": -32766}
22 | 


--------------------------------------------------------------------------------
/pdr/tests/test_table.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import pdr
 4 | 
 5 | 
 6 | def test_simple_binary_table(binary_table_product, tracker_factory):
 7 |     prod_name, fpath, lpath = binary_table_product
 8 |     data = pdr.read(fpath, debug=True, tracker=tracker_factory(fpath))
 9 |     assert list(data.TABLE.columns) == ['X_0', 'Y', 'X_1']
10 |     assert list(data.TABLE.dtypes) == [
11 |         np.dtype('uint8'), np.dtype('float32'), np.dtype('float64')
12 |     ]
13 |     assert data.TABLE.loc[0, 'X_0'] == 1
14 |     assert np.isclose(data.TABLE.loc[5, 'Y'], 4.4)
15 |     assert np.isclose(data.TABLE.loc[9, "X_1"], 8.8)
16 | 
17 | 
18 | def test_simple_dsv_table(dsv_table_product, tracker_factory):
19 |     prod_name, fpath, lpath = dsv_table_product
20 |     data = pdr.read(fpath, debug=True, tracker=tracker_factory(fpath))
21 |     assert list(data.SPREADSHEET.columns) == ['X_0', 'Y', 'X_1']
22 |     assert list(data.SPREADSHEET.dtypes) == [
23 |         np.dtype('float64'), np.dtype('O'), np.dtype('int64')
24 |     ]
25 |     assert np.isclose(data.SPREADSHEET.loc[0, 'X_0'], 5.5)
26 |     assert data.SPREADSHEET.loc[5, 'Y'] == 'cat'
27 |     assert data.SPREADSHEET.loc[9, "X_1"] == -12
28 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # pyproject.toml documentation for reference:
 2 | #   https://packaging.python.org/en/latest/
 3 | #   https://setuptools.pypa.io/en/latest/userguide/
 4 | 
 5 | [project]
 6 | name        = "pdr"
 7 | version     = "1.4.0"
 8 | description = "Planetary Data Reader"
 9 | readme      = "README.md"
10 | license     = { file = "LICENSE.md" }
11 | 
12 | authors     = [
13 |     { name = "Chase Million",     email = "chase@millionconcepts.com"    },
14 |     { name = "Michael St. Clair", email = "mstclair@millionconcepts.com" },
15 |     { name = "Sierra Brown",      email = "sierra@millionconcepts.com"   },
16 |     { name = "Sabrina Curtis",    email = "scurtis@millionconcepts.com"  },
17 |     { name = "Zack Weinberg",     email = "zack@millionconcepts.com"     },
18 | ]
19 | 
20 | classifiers = [
21 |     "Development Status :: 4 - Beta",
22 |     "License :: OSI Approved :: BSD License",
23 |     "Operating System :: OS Independent",
24 |     "Programming Language :: Python :: 3",
25 | ]
26 | 
27 | requires-python = ">=3.9"
28 | dependencies = [
29 |     "dustgoggles",
30 |     "more_itertools",
31 |     "multidict",
32 |     "numpy",
33 |     "pandas>=2.0.0",
34 |     "rms-vax"
35 | ]
36 | 
37 | [project.optional-dependencies]
38 | pillow        = ["pillow"]
39 | fits          = ["astropy"]
40 | notebooks     = ["jupyter"]
41 | pvl           = ["pvl"]
42 | tests         = ["pytest"]
43 | fuzzy         = ["Levenshtein"]
44 | 
45 | [project.urls]
46 | Repository = "https://github.com/MillionConcepts/pdr"
47 | 
48 | [build-system]
49 | requires = ["setuptools >= 64"]
50 | build-backend = "setuptools.build_meta"
51 | 
52 | [tool.setuptools.packages.find]
53 | where = ["."]
54 | include = ["pdr*"]
55 | namespaces = false
56 | 
57 | # Recommended for new projects by pytest manual.
58 | [tool.pytest.ini_options]
59 | addopts = [
60 |     "--import-mode=importlib"
61 | ]
62 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # All package configuration is now in pyproject.toml.  This file exists
2 | # solely for backward compatibility, e.g. allowing people to continue
3 | # to run "python3 setup.py develop" instead of "pip install -e ."
4 | 
5 | import setuptools
6 | setuptools.setup()
7 | 


--------------------------------------------------------------------------------