├── .coveragerc ├── .gitattributes ├── .github ├── scripts │ ├── adjust-coverage-config │ └── find-gnu-tar └── workflows │ └── ci-unittest.yml ├── .gitignore ├── .mailmap ├── .readthedocs.yaml ├── CITATION.cff ├── Example_Jupyter_Notebook.ipynb ├── LICENSE.md ├── MANIFEST.in ├── README.md ├── docs ├── CITATION.cff ├── LICENSE.md ├── code_of_conduct.md ├── environment.yml ├── index.md ├── jlitebadge.svg ├── pdr_api.md ├── pdr_joss_paper.pdf ├── pdr_pdart_proposal_roses20.pdf ├── supported_datasets.md └── version_history.md ├── environment.yml ├── minimal_environment.yml ├── mkdocs.yml ├── pdr ├── __init__.py ├── _scaling.py ├── bit_handling.py ├── browsify.py ├── datatypes.py ├── errors.py ├── formats │ ├── __init__.py │ ├── cassini.py │ ├── checkers.py │ ├── clementine.py │ ├── dawn.py │ ├── diviner.py │ ├── epoxi.py │ ├── galileo.py │ ├── ground.py │ ├── ihw.py │ ├── iue.py │ ├── juno.py │ ├── lp.py │ ├── lro.py │ ├── lroc.py │ ├── mariner.py │ ├── mer.py │ ├── mex.py │ ├── mgn.py │ ├── mgs.py │ ├── mro.py │ ├── msl_apxs.py │ ├── msl_ccam.py │ ├── msl_cmn.py │ ├── msl_places.py │ ├── msl_rems.py │ ├── msx.py │ ├── nh.py │ ├── odyssey.py │ ├── phoenix.py │ ├── pvo.py │ ├── rosetta.py │ ├── saturn_rpx.py │ ├── themis.py │ ├── ulysses.py │ ├── vega.py │ ├── viking.py │ └── voyager.py ├── func.py ├── loaders │ ├── __init__.py │ ├── _helpers.py │ ├── astrowrap.py │ ├── datawrap.py │ ├── dispatch.py │ ├── handlers.py │ ├── image.py │ ├── queries.py │ ├── table.py │ ├── text.py │ └── utility.py ├── np_utils.py ├── parselabel │ ├── __init__.py │ ├── pds3.py │ ├── pds4.py │ └── utils.py ├── pd_utils.py ├── pdr.py ├── pdrtypes.py ├── pds4_tools │ ├── CREDITS │ ├── LICENSES │ ├── __about__.py │ ├── __init__.py │ ├── extern │ │ ├── __init__.py │ │ ├── appdirs.py │ │ ├── argparse.py │ │ ├── cached_property.py │ │ ├── ordered_dict.py │ │ ├── six.py │ │ └── zscale.py │ ├── reader │ │ ├── __init__.py │ │ ├── array_objects.py │ │ ├── core.py │ │ ├── data.py │ │ ├── data_types.py │ │ ├── general_objects.py │ │ ├── header_objects.py │ │ ├── label_objects.py │ │ ├── read_arrays.py │ │ ├── read_headers.py │ │ ├── read_label.py │ │ ├── read_tables.py │ │ └── table_objects.py │ └── utils │ │ ├── __init__.py │ │ ├── compat.py │ │ ├── constants.py │ │ ├── data_access.py │ │ ├── deprecation.py │ │ ├── exceptions.py │ │ ├── helpers.py │ │ └── logging.py ├── pil_utils.py ├── pvl_utils.py ├── tests │ ├── __init__.py │ ├── conftest.py │ ├── data │ │ ├── F187B51_cycle_3.gif │ │ ├── Simple_Animated_Clock.webp │ │ ├── catseye_1.png │ │ ├── concert.jpeg │ │ ├── kings_river_canyon.tiff │ │ ├── squirrel.jpg │ │ └── weather.bmp │ ├── objects.py │ ├── test_bit_handling.py │ ├── test_browsify.py │ ├── test_data.py │ ├── test_datatypes.py │ ├── test_func.py │ ├── test_image.py │ ├── test_import.py │ ├── test_loader_helpers.py │ ├── test_metadata.py │ ├── test_np_utils.py │ ├── test_parselabel_pds3.py │ ├── test_parselabel_pds4.py │ ├── test_primary_desktop_image.py │ ├── test_primary_fits.py │ ├── test_queries.py │ ├── test_scaling.py │ └── test_table.py └── utils.py ├── pyproject.toml └── setup.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source_pkgs = pdr 3 | omit = 4 | */formats/* 5 | */pds4_tools/extern/* 6 | */pvl_utils.py 7 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-language=Python 2 | -------------------------------------------------------------------------------- /.github/scripts/adjust-coverage-config: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | """ 4 | Read a .coveragerc from stdin, adjust it for use in a CI build, and 5 | write it back out to stdout. 6 | 7 | If files are listed on the command line, they are assumed to be 8 | coverage databases, and a [paths] section is added to the .coveragerc 9 | (replacing any existing [paths] section) that instructs coverage.py 10 | to treat the common path prefix of each coverage database's files 11 | as equivalent. When used this way, coverage.py must be importable. 12 | """ 13 | 14 | import sys 15 | 16 | from argparse import ArgumentParser 17 | from configparser import ConfigParser 18 | from pathlib import Path 19 | 20 | 21 | DATABASE_NAME = "coverage.dat" 22 | 23 | 24 | def remap_paths_for_databases(cfg, databases): 25 | """ 26 | Compute a set of path remapping rules that will render all of 27 | the databases in DATABASES mergeable, by stripping out the common 28 | path prefix found in each database. 29 | """ 30 | from collections import defaultdict 31 | from coverage import CoverageData 32 | from os.path import commonprefix 33 | from pathlib import PurePosixPath, PureWindowsPath 34 | 35 | prefixes = set() 36 | for db_fname in databases: 37 | db = CoverageData(basename=db_fname) 38 | db.read() 39 | prefixes.add(commonprefix(list(db.measured_files()))) 40 | 41 | packages = defaultdict(set) 42 | for p in prefixes: 43 | if '\\' in p or (len(p) >= 2 and p[0].isalpha() and p[1] == ':'): 44 | name = PureWindowsPath(p).name 45 | else: 46 | name = PurePosixPath(p).name 47 | packages[name].add(p) 48 | 49 | pkg_names = sorted(packages.keys()) 50 | 51 | cfg["run"]["relative_files"] = "true" 52 | cfg["run"]["source_pkgs"] = " ".join(pkg_names) 53 | 54 | cfg["paths"] = {} 55 | for pkg in pkg_names: 56 | pkg_paths = ['', pkg + '/'] 57 | pkg_paths.extend(sorted(packages[pkg])) 58 | cfg["paths"]["src_" + pkg] = "\n".join(pkg_paths) 59 | 60 | 61 | def adjust_omit(cfg): 62 | """ 63 | Adjust the "omit" setting to be more appropriate for use in CI; 64 | the stock .coveragerc is tailored for interactive use. 65 | """ 66 | GLOBS_TO_DROP = ( 67 | "*/formats/*", 68 | "*/pvl_utils.py", 69 | ) 70 | 71 | run_section = cfg["run"] 72 | pruned_omit_globs = [] 73 | for glob in run_section.get("omit", "").splitlines(): 74 | glob = glob.strip() 75 | if glob not in GLOBS_TO_DROP: 76 | pruned_omit_globs.append(glob) 77 | 78 | if ( 79 | len(pruned_omit_globs) == 0 80 | or len(pruned_omit_globs) == 1 and pruned_omit_globs[0] == "" 81 | ): 82 | del run_section["omit"] 83 | else: 84 | run_section["omit"] = "\n".join(pruned_omit_globs) 85 | 86 | 87 | def change_database_name(cfg): 88 | """ 89 | Give the coverage database a more convenient name for use in 90 | cross-platform CI. 91 | """ 92 | cfg["run"]["data_file"] = str(Path.cwd() / DATABASE_NAME) 93 | 94 | 95 | def main(): 96 | ap = ArgumentParser(description=__doc__) 97 | ap.add_argument("databases", nargs="*", 98 | help="Coverage databases to be merged") 99 | args = ap.parse_args() 100 | 101 | # this must match how coverage.py initializes ConfigParser 102 | cfg = ConfigParser(interpolation=None) 103 | 104 | with sys.stdin as ifp: 105 | cfg.read_file(ifp, source="") 106 | 107 | if args.databases: 108 | remap_paths_for_databases(cfg, args.databases) 109 | 110 | adjust_omit(cfg) 111 | change_database_name(cfg) 112 | 113 | with sys.stdout as ofp: 114 | cfg.write(ofp) 115 | 116 | 117 | main() 118 | -------------------------------------------------------------------------------- /.github/scripts/find-gnu-tar: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | """ 4 | Find GNU tar, whose pathname transformation options we need, and which 5 | is named 'tar' on Github's Linux and Windows CI runners but 'gtar' on 6 | their MacOS runners. 7 | """ 8 | 9 | import os 10 | import stat 11 | import sys 12 | 13 | from argparse import ArgumentParser 14 | from pathlib import Path 15 | 16 | 17 | if os.name == "nt": 18 | EXE_SUFFIX = ".exe" 19 | def is_executable_mode(mode): 20 | return True 21 | else: 22 | EXE_SUFFIX = "" 23 | def is_executable_mode(mode): 24 | return (stat.S_IMODE(mode) & 0o111) != 0 25 | 26 | 27 | def is_executable_file(path, debug): 28 | if debug: 29 | sys.stderr.write(f" {path}: ") 30 | try: 31 | st = os.stat(path) 32 | except FileNotFoundError: 33 | if debug: 34 | sys.stderr.write("not found\n") 35 | return False 36 | 37 | if not stat.S_ISREG(st.st_mode): 38 | if debug: 39 | sys.stderr.write("not a regular file (mode={})\n" 40 | .format(stat.filemode(st.st_mode))) 41 | return False 42 | 43 | if not is_executable_mode(st.st_mode): 44 | if debug: 45 | sys.stderr.write("not executable (mode={}, os={})\n" 46 | .format(stat.filemode(st.st_mode, os.name))) 47 | return False 48 | 49 | if debug: 50 | sys.stderr.write(" ok\n") 51 | return True 52 | 53 | 54 | 55 | def find_gnu_tar(debug=False): 56 | GTAR_CMD = "gtar" + EXE_SUFFIX 57 | TAR_CMD = "tar" + EXE_SUFFIX 58 | candidate = None 59 | for d in os.get_exec_path(): 60 | # Resolve symlinks in the directory components of the path, 61 | # but *not* the command name, because changing the command 62 | # name might alter the behavior of the command. 63 | p = Path(d).resolve() 64 | if debug: 65 | sys.stderr.write(f"checking {p}\n") 66 | gtar = p / GTAR_CMD 67 | tar = p / TAR_CMD 68 | if is_executable_file(gtar, debug): 69 | # gtar is preferred 70 | return gtar 71 | if is_executable_file(tar, debug): 72 | # use tar only if we don't find a gtar later in the path 73 | candidate = tar 74 | if candidate is not None: 75 | return candidate 76 | sys.stderr.write(f"neither {GTAR_CMD} nor {TAR_CMD} found in PATH\n") 77 | sys.exit(1) 78 | 79 | 80 | def main(): 81 | ap = ArgumentParser(description=__doc__) 82 | ap.add_argument("--debug", action="store_true", 83 | help="Print debugging information during the search") 84 | args = ap.parse_args() 85 | 86 | sys.stdout.write(str(find_gnu_tar(args.debug)) + "\n") 87 | sys.exit(0) 88 | 89 | 90 | main() 91 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *ipynb* 2 | *pycache* 3 | /test_files 4 | /output_files 5 | /input_files 6 | /pdr.egg-info/ 7 | /.idea/ 8 | /pdrtestsuite/ 9 | *scratch.py 10 | *scratch.ipynb 11 | /pdr/tests/data/ 12 | /pdr/oldtests/ 13 | **pdrtests.log** 14 | /pdr/tests/reference/temp/ 15 | *.dot 16 | *.profile 17 | data/** 18 | build/** 19 | dist/** 20 | git 21 | meta.yaml 22 | *.DS_store 23 | */.tracker_logs 24 | /.coverage 25 | /.pytest_cache/ 26 | /htmlcov/ 27 | six_old.py 28 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | Chase Million <1483210+cmillion@users.noreply.github.com> 2 | Chase Million cmillion 3 | Chase Million cmillion <> 4 | 5 | Michael Aye 6 | 7 | Michael St. Clair 8 | Michael St. Clair M. St. Clair <64057573+m-stclair@users.noreply.github.com> 9 | Michael St. Clair m-stclair <64057573+m-stclair@users.noreply.github.com> 10 | Michael St. Clair michael <64057573+m-stclair@users.noreply.github.com> 11 | Michael St. Clair michael mstclair@millionconcepts.com 12 | 13 | Sabrina Curtis curtiss9 <98858647+curtiss9@users.noreply.github.com> 14 | 15 | Sierra Brown <88336748+Sierra-MC@users.noreply.github.com> 16 | Sierra Brown Sierra V. Kaufman 17 | Sierra Brown Sierra-MC 18 | 19 | Zack Weinberg 20 | Zack Weinberg 21 | 22 | GitHub 23 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for MkDocs projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the version of Python and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "mambaforge-22.9" 12 | 13 | mkdocs: 14 | configuration: mkdocs.yml 15 | 16 | # Optionally declare the Python requirements required to build your docs 17 | conda: 18 | environment: docs/environment.yml 19 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | If you use pdr for any published work, please cite it using the reference below: 2 | 3 | @article{Brown2024, 4 | author = {{Brown}, Sierra V. and {St. Clair}, Michael and {Million}, Chase 5 | and {Curtis}, Sabrina and {Aye}, K. -Michael and {Weinberg}, Zack}, 6 | year = {2024}, 7 | journal = {Journal of Open Source Software}, 8 | title = {PDR: The Planetary Data Reader}, 9 | year = {2024}, 10 | url = {https://doi.org/10.21105/joss.07256}, 11 | publisher = {The Open Journal}, 12 | volume = {9}, 13 | number = {102}, 14 | pages = {7256}, 15 | doi = {10.21105/joss.07256} 16 | } 17 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # No, it isn't possible to do this in pyproject.toml :-( 2 | include .coveragerc 3 | include environment.yml 4 | include minimal_environment.yml 5 | include docs/version_history.md 6 | include pdr/pds4_tools/CREDITS 7 | include pdr/pds4_tools/LICENSES 8 | include pdr/tests/data/* 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | README.md 2 | ## The Planetary Data Reader (pdr) 3 | 4 | This tool provides a single command---`read(‘/path/to/file’)`---for ingesting 5 | _all_ common planetary data types. It reads almost all "primary observational 6 | data" products currently archived in the PDS (under PDS3 or PDS4), and the 7 | fraction of products it does not read is continuously shrinking. 8 | [Currently-supported datasets are listed here.](docs/supported_datasets.md) 9 | 10 | If the software fails while attempting to read from datasets that we have 11 | listed as supported, please submit an issue with a link to the file and 12 | information about the error (if applicable). There might also be datasets that 13 | work but are not listed. We would like to hear about those too. If a dataset 14 | is not yet supported that you would like us to consider prioritizing, 15 | [please fill out this request form](https://docs.google.com/forms/d/1JHyMDzC9LlXY4MOMcHqV5fbseSB096_PsLshAMqMWBw/viewform). 16 | 17 | ### Attribution 18 | If you use _pdr_ in your work, please cite us using our [JOSS Paper](docs/pdr_joss_paper.pdf): [![DOI](https://joss.theoj.org/papers/10.21105/joss.07256/status.svg)](https://doi.org/10.21105/joss.07256). 19 | A BibTex style citation is available in [CITATION.cff](CITATION.cff). 20 | 21 | ### Installation 22 | _pdr_ is now on `conda` and `pip`. We recommend (and only officially support) 23 | installation into a `conda` environment. You can do this like so: 24 | 25 | ``` 26 | conda create --name pdrenv 27 | conda activate pdrenv 28 | conda install -c conda-forge pdr 29 | ``` 30 | The minimum supported version of Python is _3.9_. 31 | 32 | Using the conda install will install some optional dependencies in the environment.yml 33 | file for pdr including: `astropy` and `pillow`. If you'd prefer to forego those 34 | optional dependencies, please use minimal_environment.yml in your 35 | installation. This is not supported through a direct conda install as 36 | described above and will require additional steps. Optional dependencies 37 | and the added functionality they support are listed below: 38 | 39 | - `pvl`: allows `Data.load("LABEL", as_pvl=True)`, which will load PDS3 40 | labels as `pvl` objects rather than plain text 41 | - `astropy`: adds support for FITS files 42 | - `jupyter`: allows usage of the Example Jupyter Notebook (and other jupyter 43 | notebooks you create) 44 | - `pillow`: adds support for reading a variety of 'desktop' image formats 45 | (TIFF, JPEG, etc.) and for browse image rendering 46 | - `Levenshtein`: allows use of `metaget_fuzzy`, a fuzzy-matching metadata 47 | parsing function 48 | 49 | For pip users, no optional dependencies will be packaged with pdr. The extras 50 | tags are: 51 | - `pvl`: installs `pvl` 52 | - `fits`: installs `astropy` 53 | - `notebooks`: installs `jupyter` 54 | - `pillow`: installs `pillow` 55 | - `fuzzy`: installs `Levenshtein` 56 | 57 | Example syntax for using pip to install syntax with `astropy` and `pillow` optional 58 | dependencies: 59 | ``` 60 | pip install pdr[fits, pillow] 61 | ``` 62 | 63 | #### NOTE: `pdr` is not currently compatible with python 3.13 when installed with `pip`, it can be used with python 3.13 through `conda` 64 | 65 | ### Usage 66 | 67 | You can check out our example Notebook on a JupyterLite server for a 68 | quick interactive demo of functionality: 69 | [![JupyterLite](docs/jlitebadge.svg)](https://millionconcepts.github.io/jlite-pdr-demo/) 70 | 71 | Additional information on usage including examples, output data types, notes 72 | and caveats, tests, etc. can now be accessed in our documentation on 73 | readthedocs at: https://pdr.readthedocs.io [![Documentation Status](https://readthedocs.org/projects/pdr/badge/?version=latest)](https://pdr.readthedocs.io/en/latest/?badge=latest) 74 | 75 | 76 | ### Contributing 77 | 78 | Thank you for wanting to contribute to `pdr` and improving efforts to make 79 | planetary science data accessible. Please review our code of conduct before 80 | contributing. [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](docs/code_of_conduct.md) 81 | 82 | If you have found a bug, a dataset that we claim to support that's not opening 83 | properly, or you have a feature request, please file an issue. We will also 84 | review pull requests, but would probably prefer you start the conversation with 85 | us first, so we can expect your contributions and make sure they will be within 86 | scope. 87 | 88 | If you need general support you can find us on [OpenPlanetary Slack](https://app.slack.com/client/T04CWPQL9/C04CWPQM5) 89 | (available to [OpenPlanetary members](https://www.openplanetary.org/join)) 90 | or feel free to [email](mailto:sierra@millionconcepts.com) the team. 91 | 92 | --- 93 | This work is supported by NASA grant No. 80NSSC21K0885. 94 | -------------------------------------------------------------------------------- /docs/CITATION.cff: -------------------------------------------------------------------------------- 1 | If you use pdr for any published work, please cite it using the reference below: 2 | 3 | @article{Brown2024, 4 | author = {{Brown}, Sierra V. and {St. Clair}, Michael and {Million}, Chase 5 | and {Curtis}, Sabrina and {Aye}, K. -Michael and {Weinberg}, Zack}, 6 | year = {2024}, 7 | journal = {Journal of Open Source Software}, 8 | title = {PDR: The Planetary Data Reader}, 9 | year = {2024}, 10 | url = {https://doi.org/10.21105/joss.07256}, 11 | publisher = {The Open Journal}, 12 | volume = {9}, 13 | number = {102}, 14 | pages = {7256}, 15 | doi = {10.21105/joss.07256} 16 | } 17 | -------------------------------------------------------------------------------- /docs/code_of_conduct.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior can be found [here](https://www.contributor-covenant.org/version/2/1/code_of_conduct/#:~:text=Examples%20of%20unacceptable,a%20professional%20setting). 29 | 30 | ## Enforcement Responsibilities 31 | 32 | Community leaders are responsible for clarifying and enforcing our standards of 33 | acceptable behavior and will take appropriate and fair corrective action in 34 | response to any behavior that they deem inappropriate, threatening, offensive, 35 | or harmful. 36 | 37 | Community leaders have the right and responsibility to remove, edit, or reject 38 | comments, commits, code, wiki edits, issues, and other contributions that are 39 | not aligned to this Code of Conduct, and will communicate reasons for moderation 40 | decisions when appropriate. 41 | 42 | ## Scope 43 | 44 | This Code of Conduct applies within all community spaces, and also applies when 45 | an individual is officially representing the community in public spaces. 46 | Examples of representing our community include using an official e-mail address, 47 | posting via an official social media account, or acting as an appointed 48 | representative at an online or offline event. 49 | 50 | ## Enforcement 51 | 52 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 53 | reported to [Sierra Brown](mailto:sierra@millionconcepts.com). 54 | All complaints will be reviewed and investigated promptly and fairly. 55 | 56 | All community leaders are obligated to respect the privacy and security of the 57 | reporter of any incident. 58 | 59 | ## Enforcement Guidelines 60 | 61 | Community leaders will follow these Community Impact Guidelines in determining 62 | the consequences for any action they deem in violation of this Code of Conduct: 63 | 64 | ### 1. Correction 65 | 66 | **Community Impact**: Use of inappropriate language or other behavior deemed 67 | unprofessional or unwelcome in the community. 68 | 69 | **Consequence**: A private, written warning from community leaders, providing 70 | clarity around the nature of the violation and an explanation of why the 71 | behavior was inappropriate. A public apology may be requested. 72 | 73 | ### 2. Warning 74 | 75 | **Community Impact**: A violation through a single incident or series of 76 | actions. 77 | 78 | **Consequence**: A warning with consequences for continued behavior. No 79 | interaction with the people involved, including unsolicited interaction with 80 | those enforcing the Code of Conduct, for a specified period of time. This 81 | includes avoiding interactions in community spaces as well as external channels 82 | like social media. Violating these terms may lead to a temporary or permanent 83 | ban. 84 | 85 | ### 3. Temporary Ban 86 | 87 | **Community Impact**: A serious violation of community standards, including 88 | sustained inappropriate behavior. 89 | 90 | **Consequence**: A temporary ban from any sort of interaction or public 91 | communication with the community for a specified period of time. No public or 92 | private interaction with the people involved, including unsolicited interaction 93 | with those enforcing the Code of Conduct, is allowed during this period. 94 | Violating these terms may lead to a permanent ban. 95 | 96 | ### 4. Permanent Ban 97 | 98 | **Community Impact**: Demonstrating a pattern of violation of community 99 | standards, including sustained inappropriate behavior, harassment of an 100 | individual, or aggression toward or disparagement of classes of individuals. 101 | 102 | **Consequence**: A permanent ban from any sort of public interaction within the 103 | community. 104 | 105 | ## Attribution 106 | 107 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 108 | version 2.1, available at 109 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 110 | 111 | Community Impact Guidelines were inspired by 112 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 113 | 114 | For answers to common questions about this code of conduct, see the FAQ at 115 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 116 | [https://www.contributor-covenant.org/translations][translations]. 117 | 118 | [homepage]: https://www.contributor-covenant.org 119 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 120 | [Mozilla CoC]: https://github.com/mozilla/diversity 121 | [FAQ]: https://www.contributor-covenant.org/faq 122 | [translations]: https://www.contributor-covenant.org/translations 123 | -------------------------------------------------------------------------------- /docs/environment.yml: -------------------------------------------------------------------------------- 1 | name: pdr 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.11 7 | - mkdocs 8 | - mkdocstrings 9 | - mkdocstrings-python 10 | - mkdocs-material -------------------------------------------------------------------------------- /docs/pdr_api.md: -------------------------------------------------------------------------------- 1 | # pdr 2 | 3 | ::: pdr 4 | options: 5 | heading_level: 2 6 | 7 | ## _scaling 8 | 9 | ::: pdr._scaling 10 | options: 11 | heading_level: 3 12 | 13 | ## bit_handling 14 | 15 | ::: pdr.bit_handling 16 | options: 17 | heading_level: 3 18 | 19 | ## browsify 20 | 21 | ::: pdr.browsify 22 | options: 23 | heading_level: 3 24 | 25 | ## datatypes 26 | 27 | ::: pdr.datatypes 28 | options: 29 | heading_level: 3 30 | 31 | ## errors 32 | 33 | ::: pdr.errors 34 | options: 35 | heading_level: 3 36 | 37 | ## formats 38 | 39 | ::: pdr.formats 40 | options: 41 | heading_level: 3 42 | 43 | ### formats.cassini 44 | 45 | ::: pdr.formats.cassini 46 | options: 47 | heading_level: 4 48 | 49 | ### formats.checkers 50 | 51 | ::: pdr.formats.checkers 52 | options: 53 | heading_level: 4 54 | 55 | ### formats.clementine 56 | 57 | ::: pdr.formats.clementine 58 | options: 59 | heading_level: 4 60 | 61 | ### formats.dawn 62 | 63 | ::: pdr.formats.dawn 64 | options: 65 | heading_level: 4 66 | 67 | ### formats.diviner 68 | 69 | ::: pdr.formats.diviner 70 | options: 71 | heading_level: 4 72 | 73 | ### formats.epoxi 74 | 75 | ::: pdr.formats.epoxi 76 | options: 77 | heading_level: 4 78 | 79 | ### formats.galileo 80 | 81 | ::: pdr.formats.galileo 82 | options: 83 | heading_level: 4 84 | 85 | ### formats.ground 86 | 87 | ::: pdr.formats.ground 88 | options: 89 | heading_level: 4 90 | 91 | ### formats.ihw 92 | 93 | ::: pdr.formats.ihw 94 | options: 95 | heading_level: 4 96 | 97 | ### formats.juno 98 | 99 | ::: pdr.formats.juno 100 | options: 101 | heading_level: 4 102 | 103 | ### formats.lro 104 | 105 | ::: pdr.formats.lro 106 | options: 107 | heading_level: 4 108 | 109 | ### formats.lroc 110 | 111 | ::: pdr.formats.lroc 112 | options: 113 | heading_level: 4 114 | 115 | ### formats.mariner 116 | 117 | ::: pdr.formats.mariner 118 | options: 119 | heading_level: 4 120 | 121 | ### formats.mer 122 | 123 | ::: pdr.formats.mer 124 | options: 125 | heading_level: 4 126 | 127 | ### formats.mex 128 | 129 | ::: pdr.formats.mex 130 | options: 131 | heading_level: 4 132 | 133 | ### formats.mgn 134 | 135 | ::: pdr.formats.mgn 136 | options: 137 | heading_level: 4 138 | 139 | ### formats.mgs 140 | 141 | ::: pdr.formats.mgs 142 | options: 143 | heading_level: 4 144 | 145 | ### formats.mro 146 | 147 | ::: pdr.formats.mro 148 | options: 149 | heading_level: 4 150 | 151 | ### formats.msl_apxs 152 | 153 | ::: pdr.formats.msl_apxs 154 | options: 155 | heading_level: 4 156 | 157 | ### formats.msl_ccam 158 | 159 | ::: pdr.formats.msl_ccam 160 | options: 161 | heading_level: 4 162 | 163 | ### formats.msl_cmn 164 | 165 | ::: pdr.formats.msl_cmn 166 | options: 167 | heading_level: 4 168 | 169 | ### formats.msl_places 170 | ::: pdr.formats.msl_places 171 | options: 172 | heading_level: 4 173 | 174 | ### formats.msl_rems 175 | ::: pdr.formats.msl_rems 176 | options: 177 | heading_level: 4 178 | 179 | ### formats.nh 180 | 181 | ::: pdr.formats.nh 182 | options: 183 | heading_level: 4 184 | 185 | ### formats.odyssey 186 | 187 | ::: pdr.formats.odyssey 188 | options: 189 | heading_level: 4 190 | 191 | ### formats.phoenix 192 | 193 | ::: pdr.formats.phoenix 194 | options: 195 | heading_level: 4 196 | 197 | ### formats.pvo 198 | 199 | ::: pdr.formats.pvo 200 | options: 201 | heading_level: 4 202 | 203 | ### formats.rosetta 204 | 205 | ::: pdr.formats.rosetta 206 | options: 207 | heading_level: 4 208 | 209 | ### formats.saturn_rpx 210 | 211 | ::: pdr.formats.saturn_rpx 212 | options: 213 | heading_level: 4 214 | 215 | ### formats.themis 216 | 217 | ::: pdr.formats.themis 218 | options: 219 | heading_level: 4 220 | 221 | ### formats.ulysses 222 | 223 | ::: pdr.formats.ulysses 224 | options: 225 | heading_level: 4 226 | 227 | ### formats.vega 228 | 229 | ::: pdr.formats.vega 230 | options: 231 | heading_level: 4 232 | 233 | ### formats.viking 234 | 235 | ::: pdr.formats.viking 236 | options: 237 | heading_level: 4 238 | 239 | ### formats.voyager 240 | 241 | ::: pdr.formats.voyager 242 | options: 243 | heading_level: 4 244 | 245 | ## func 246 | 247 | ::: pdr.func 248 | options: 249 | heading_level: 3 250 | 251 | ## loaders 252 | 253 | ::: pdr.loaders 254 | options: 255 | heading_level: 3 256 | 257 | ### loaders._helpers 258 | 259 | ::: pdr.loaders._helpers 260 | options: 261 | heading_level: 4 262 | 263 | ### loaders.astrowrap 264 | ::: pdr.loaders.astrowrap 265 | options: 266 | heading_level: 4 267 | 268 | ### loaders.datawrap 269 | 270 | ::: pdr.loaders.datawrap 271 | options: 272 | heading_level: 4 273 | 274 | ### loaders.dispatch 275 | 276 | ::: pdr.loaders.dispatch 277 | options: 278 | heading_level: 4 279 | 280 | ### loaders.handlers 281 | 282 | ::: pdr.loaders.handlers 283 | options: 284 | heading_level: 4 285 | 286 | ### loaders.image 287 | 288 | ::: pdr.loaders.image 289 | options: 290 | heading_level: 4 291 | 292 | ### loaders.queries 293 | 294 | ::: pdr.loaders.queries 295 | options: 296 | heading_level: 4 297 | 298 | ### loaders.table 299 | 300 | ::: pdr.loaders.table 301 | options: 302 | heading_level: 4 303 | 304 | ### loaders.text 305 | 306 | ::: pdr.loaders.text 307 | options: 308 | heading_level: 4 309 | 310 | ### loaders.utility 311 | 312 | ::: pdr.loaders.utility 313 | options: 314 | heading_level: 4 315 | 316 | ## np_utils 317 | 318 | ::: pdr.np_utils 319 | options: 320 | heading_level: 3 321 | 322 | ## parselabel 323 | 324 | ::: pdr.parselabel 325 | options: 326 | heading_level: 3 327 | 328 | ### parselabel.pds3 329 | 330 | ::: pdr.parselabel.pds3 331 | options: 332 | heading_level: 4 333 | 334 | ### parselabel.pds4 335 | 336 | ::: pdr.parselabel.pds4 337 | options: 338 | heading_level: 4 339 | 340 | ### parselabel.utils 341 | 342 | ::: pdr.parselabel.utils 343 | options: 344 | heading_level: 4 345 | 346 | ## pd_utils 347 | 348 | ::: pdr.pd_utils 349 | options: 350 | heading_level: 3 351 | 352 | ## pdr 353 | 354 | ::: pdr.pdr 355 | options: 356 | heading_level: 3 357 | 358 | ## pdrtypes 359 | 360 | ::: pdr.pdrtypes 361 | options: 362 | heading_level: 3 363 | 364 | ### pil_utils 365 | 366 | ::: pdr.pil_utils 367 | options: 368 | heading_level: 3 369 | 370 | ## pvl_utils 371 | 372 | ::: pdr.pvl_utils 373 | options: 374 | heading_level: 3 375 | 376 | ## utils 377 | 378 | ::: pdr.utils 379 | options: 380 | heading_level: 3 381 | 382 | 383 | -------------------------------------------------------------------------------- /docs/pdr_joss_paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/docs/pdr_joss_paper.pdf -------------------------------------------------------------------------------- /docs/pdr_pdart_proposal_roses20.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/docs/pdr_pdart_proposal_roses20.pdf -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: pdr 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python>=3.9 6 | - cytoolz 7 | - numpy 8 | - pandas>=2.0.0 9 | - git 10 | - dustgoggles 11 | - pip 12 | - multidict 13 | - more-itertools 14 | - rms-vax 15 | # optional dependencies (use minimal_environment.yml if you'd prefer not to install) 16 | - pvl 17 | - astropy 18 | - jupyter 19 | - pillow 20 | - pytest 21 | - Levenshtein 22 | -------------------------------------------------------------------------------- /minimal_environment.yml: -------------------------------------------------------------------------------- 1 | name: pdr 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python>=3.9 6 | - cytoolz 7 | - numpy 8 | - pandas>=2.0.0 9 | - git 10 | - pip 11 | - multidict 12 | - more-itertools 13 | - dustgoggles 14 | - rms-vax 15 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Planetary Data Reader 2 | site_description: one tool to read them all; https://github.com/MillionConcepts/pdr 3 | theme: 4 | name: material 5 | plugins: 6 | - search 7 | - mkdocstrings: 8 | handlers: 9 | python: 10 | options: 11 | filters: [] 12 | show_signature_annotations: true 13 | 14 | nav: 15 | - 'index.md' 16 | - 'supported_datasets.md' 17 | - 'version_history.md' 18 | - 'code_of_conduct.md' 19 | - Joss Publication: "https://joss.theoj.org/papers/10.21105/joss.07256" 20 | - License: 'LICENSE.md' 21 | - API Reference: 'pdr_api.md' 22 | -------------------------------------------------------------------------------- /pdr/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import os.path as _osp 3 | import sys 4 | from typing import Collection, Optional, TYPE_CHECKING, Union 5 | 6 | from pdr.pdr import Data, Metadata 7 | 8 | if TYPE_CHECKING: 9 | from pathlib import Path 10 | 11 | __version__ = "1.4.0" 12 | 13 | pkg_dir = _osp.abspath(_osp.dirname(__file__)) 14 | 15 | 16 | def read( 17 | fp: Union[str, Path], 18 | debug: bool = False, 19 | label_fn: Optional[Union[Path, str]] = None, 20 | search_paths: Union[Collection[str], str] = (), 21 | skip_existence_check: bool = False, 22 | **kwargs 23 | ) -> Data: 24 | """ 25 | Read a data product with PDR. `fn` can be any file associated with the 26 | product, preferably a detached label file if it exists. Returns a Data 27 | object that provides an interface to the data and metadata in all available 28 | files associated with the product. 29 | """ 30 | return Data( 31 | fp, 32 | debug=debug, 33 | label_fn=label_fn, 34 | search_paths=search_paths, 35 | skip_existence_check=skip_existence_check, 36 | **kwargs 37 | ) 38 | 39 | 40 | def fastread( 41 | fp: Union[str, Path], 42 | debug: bool = False, 43 | search_paths: Union[Collection[str], str] = (), 44 | **kwargs 45 | ) -> Data: 46 | """ 47 | Read a file with PDR, with the assumption that the label is either 48 | attached to `fp` or that `fp` is itself a detached label file, and ignoring 49 | the usual double-check for `fp`'s actual existence in the filesystem. 50 | Intended for cases when you want access to a product's metadata very 51 | quickly and you know exactly where its label is. 52 | """ 53 | return read(fp, debug, fp, search_paths, True, **kwargs) 54 | 55 | 56 | # pdr.open() is an alias for pdr.read() 57 | setattr(sys.modules[__name__], 'open', read) 58 | -------------------------------------------------------------------------------- /pdr/_scaling.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from functools import wraps 3 | from itertools import product 4 | from numbers import Integral, Number, Real 5 | from typing import Optional, Sequence, Union 6 | 7 | import numpy as np 8 | 9 | from pdr.formats.checkers import specialblock 10 | from pdr.datatypes import PDS3_CONSTANT_NAMES, IMPLICIT_PDS3_CONSTANTS 11 | from pdr.np_utils import casting_to_float 12 | from pdr.pdrtypes import PDRLike 13 | 14 | 15 | def find_special_constants( 16 | data: PDRLike, obj: np.ndarray, name: str 17 | ) -> dict[str, Number]: 18 | """ 19 | attempts to find special constants in an ndarray associated with a PDS3 20 | object by referencing the label and "standard" special constant values. 21 | """ 22 | # NOTE: doesn't do anything for PDS4 products at present, although this 23 | # may not be important; usually pds4_tools handles it. 24 | 25 | block = specialblock(data, name) 26 | # check for explicitly-defined special constants 27 | specials = { 28 | name: block[name] 29 | for name in PDS3_CONSTANT_NAMES 30 | if (name in block.keys()) and not (block[name] == "N/A") 31 | } 32 | for k in specials.keys(): 33 | if isinstance(specials[k], Sequence): 34 | specials[k] = specials[k][0] 35 | # ignore uint8 implicit constants (0, 255) for now -- too problematic 36 | # TODO: maybe add an override 37 | if obj.dtype.name == "uint8": 38 | return specials 39 | # check for implicit constants appropriate to the sample type 40 | implicit_possibilities = IMPLICIT_PDS3_CONSTANTS[obj.dtype.name] 41 | # can't check for nans with "in" because it's an equality check, so 42 | # we don't intend this to be used, just want to make the key and put 43 | # in a value that won't conflict later 44 | if np.any(~np.isfinite(obj.data)): 45 | specials["INVALIDS"] = np.nan 46 | return specials | { 47 | possibility: constant 48 | for possibility, constant in implicit_possibilities.items() 49 | if constant in obj 50 | } 51 | 52 | 53 | def mask_specials(obj, specials): 54 | """""" 55 | obj = np.ma.masked_array(obj) 56 | if np.nan in specials: 57 | # masks infs and nans as well 58 | obj.mask = np.ma.mask_or(np.isin(obj.data, specials), 59 | ~np.isfinite(obj.data)) 60 | else: 61 | obj.mask = np.isin(obj.data, specials) 62 | return obj 63 | 64 | 65 | def fit_to_scale( 66 | arr: np.ndarray, 67 | scale: Union[Integral, Real], 68 | offset: Union[Integral, Real] 69 | ) -> np.ndarray: 70 | """ 71 | Return a version of `arr` cast to the minimum dtype that will hold its 72 | range of values after multiplying by `offset` and adding `scale`. 73 | 74 | Supports: 75 | 76 | float32, float64, uint8, int8, uint16, int16, uint32, int32, uint64, int64. 77 | """ 78 | if arr.dtype.char not in 'bBhHiIlLqQnNpPf': 79 | raise TypeError(f"This function does not support {arr.dtype.name}") 80 | if arr.dtype.char in 'fd' or int(scale + offset) != scale + offset: 81 | bases, widths, infofunc = ('f',), (4, 8), np.finfo 82 | else: 83 | bases, widths, infofunc = ('u', 'i'), (1, 2, 4, 8), np.iinfo 84 | amin, amax = map(int, (arr.min(), arr.max())) 85 | smin, smax = amin * scale + offset, amax * scale + offset 86 | for base, width in product(bases, widths): 87 | candidate = np.dtype(f'{base}{width}') 88 | cinfo = infofunc(candidate) 89 | if smin >= cinfo.min and smax <= cinfo.max: 90 | return arr.astype(candidate) 91 | raise TypeError("Unable to find a suitable data type for scaling.") 92 | 93 | 94 | def overflow_wrap(array_func): 95 | @wraps(array_func) 96 | def with_upcasting(arr, scale, offset, *args, **kwargs): 97 | with warnings.catch_warnings(): 98 | warnings.filterwarnings("error", message=".*overflow enc.*") 99 | try: 100 | return array_func(arr, scale, offset, *args, **kwargs) 101 | except (OverflowError, RuntimeWarning): 102 | arr = fit_to_scale(arr, scale, offset) 103 | return array_func(arr, scale, offset, *args, **kwargs) 104 | 105 | return with_upcasting 106 | 107 | 108 | def _copy_scale(obj, offset, scale): 109 | try: 110 | # TODO: we should also be doing this per-plane scaling in inplace case 111 | if len(obj) == len(scale) == len(offset) > 1: 112 | planes = [ 113 | obj[ix] * scale[ix] + offset[ix] for ix in range(len(scale)) 114 | ] 115 | stacked = np.rollaxis(np.ma.dstack(planes), 2) 116 | return stacked 117 | except TypeError: 118 | pass # len() is not usable on a float object 119 | return obj * scale + offset 120 | 121 | 122 | def _inplace_scale(obj, offset, scale): 123 | if len(obj) == len(scale) == len(offset) > 1: 124 | for ix, _ in enumerate(scale): 125 | obj[ix] = obj[ix] * scale[ix] + offset[ix] 126 | else: 127 | obj *= scale 128 | obj += offset 129 | return obj 130 | 131 | 132 | def scale_array( 133 | meta: PDRLike, 134 | obj: np.ndarray, 135 | object_name: str, 136 | inplace: bool = False, 137 | float_dtype: Optional["np.dtype"] = None, 138 | ): 139 | """""" 140 | from pdr.formats.checkers import specialblock 141 | 142 | block = specialblock(meta, object_name) 143 | scale, offset = 1, 0 144 | if "SCALING_FACTOR" in block.keys(): 145 | scale = block["SCALING_FACTOR"] 146 | if isinstance(scale, dict): 147 | scale = scale["value"] 148 | if "OFFSET" in block.keys(): 149 | offset = block["OFFSET"] 150 | if isinstance(offset, dict): 151 | offset = offset["value"] 152 | # meaningfully better for enormous unscaled arrays 153 | if (scale == 1) and (offset == 0): 154 | return obj 155 | # try to perform the operation in-place if requested, although if 156 | # we're casting to float, we can't 157 | # TODO: detect rollover cases, etc. 158 | if inplace is True and not casting_to_float(obj, scale, offset): 159 | return overflow_wrap(_inplace_scale)(obj, offset, scale) 160 | # if we're casting to float, permit specification of dtype 161 | # prior to operation (float64 is numpy's default and often excessive) 162 | if casting_to_float(obj, scale, offset): 163 | if float_dtype is not None: 164 | obj = obj.astype(float_dtype) 165 | return overflow_wrap(_copy_scale)(obj, offset, scale) 166 | 167 | 168 | # TODO: shake this out much more vigorously 169 | # noinspection PyUnresolvedReferences 170 | def scale_pds4_tools_struct(struct: object) -> np.ndarray: 171 | """see pds4_tools.reader.read_arrays.new_array""" 172 | # TODO: apply bit_mask 173 | from pdr.pds4_tools.reader.data_types import apply_scaling_and_value_offset 174 | 175 | array = struct.data 176 | element_array = struct.meta_data["Element_Array"] 177 | scale_kwargs = { 178 | "scaling_factor": element_array.get("scaling_factor"), 179 | "value_offset": element_array.get("value_offset"), 180 | } 181 | # TODO: is this important? 182 | # dtype = pds_to_numpy_type(struct.meta_data.data_type(), 183 | # data=array, **scale_kwargs) 184 | special_constants = struct.meta_data.get("Special_Constants") 185 | array = apply_scaling_and_value_offset( 186 | array, special_constants=special_constants, **scale_kwargs 187 | ) 188 | if hasattr(array, "mask"): 189 | return np.ma.masked_array(np.asarray(array.data), array.mask) 190 | return np.asarray(array) 191 | -------------------------------------------------------------------------------- /pdr/errors.py: -------------------------------------------------------------------------------- 1 | class AlreadyLoadedError(Exception): 2 | """ 3 | We already loaded this object and haven't been instructed to reload it. 4 | """ 5 | pass 6 | 7 | 8 | class DuplicateKeyWarning(UserWarning): 9 | """This product has duplicate object names; we're renaming them.""" 10 | pass 11 | -------------------------------------------------------------------------------- /pdr/formats/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements a wide variety of special-case behaviors for 3 | nonconforming or malformatted data products. It implements these behaviors as 4 | functions in distinct submodules organized by 'dataset' (mission, instrument, 5 | etc.); the `checkers` submodule contains dispatch functions that preempt 6 | generic behaviors and redirect them to functions from one of the dataset 7 | submodules. See the documentation for `checkers` for details on this behavior. 8 | """ 9 | 10 | from .checkers import * 11 | import pdr.formats.cassini as cassini 12 | import pdr.formats.clementine as clementine 13 | import pdr.formats.dawn as dawn 14 | import pdr.formats.diviner as diviner 15 | import pdr.formats.epoxi as epoxi 16 | import pdr.formats.galileo as galileo 17 | import pdr.formats.ground as ground 18 | import pdr.formats.ihw as ihw 19 | import pdr.formats.iue as iue 20 | import pdr.formats.juno as juno 21 | import pdr.formats.lp as lp 22 | import pdr.formats.lroc as lroc 23 | import pdr.formats.lro as lro 24 | import pdr.formats.mariner as mariner 25 | import pdr.formats.mer as mer 26 | import pdr.formats.mex as mex 27 | import pdr.formats.mgn as mgn 28 | import pdr.formats.mgs as mgs 29 | import pdr.formats.mro as mro 30 | import pdr.formats.msl_apxs as msl_apxs 31 | import pdr.formats.msl_cmn as msl_cmn 32 | import pdr.formats.msl_ccam as msl_ccam 33 | import pdr.formats.msl_places as msl_places 34 | import pdr.formats.msl_rems as msl_rems 35 | import pdr.formats.msx as msx 36 | import pdr.formats.nh as nh 37 | import pdr.formats.odyssey as odyssey 38 | import pdr.formats.phoenix as phoenix 39 | import pdr.formats.pvo as pvo 40 | import pdr.formats.rosetta as rosetta 41 | import pdr.formats.saturn_rpx as saturn_rpx 42 | import pdr.formats.themis as themis 43 | import pdr.formats.ulysses as ulysses 44 | import pdr.formats.vega as vega 45 | import pdr.formats.viking as viking 46 | import pdr.formats.voyager as voyager 47 | -------------------------------------------------------------------------------- /pdr/formats/clementine.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import pdr.loaders.queries 4 | 5 | 6 | def get_offset(data, pointer): 7 | """ 8 | HITS 9 | * clem_GEO 10 | * bsr_rdr_data 11 | """ 12 | start_row = int(re.split(r",|[(|)]", data.metaget(f"^{pointer}"))[2]) 13 | return True, (start_row - 1) * data.metaget("RECORD_BYTES") 14 | 15 | 16 | def get_fn(data, object_name): 17 | """ 18 | HITS 19 | * clem_GEO 20 | * bsr_rdr_data 21 | """ 22 | target = re.split(r",|[(|)]", data.metaget(f"^{object_name}"))[1] 23 | return True, target 24 | 25 | 26 | def get_structure(block, name, filename, data, identifiers): 27 | """ 28 | HITS: 29 | * clem_GEO 30 | * bsr_rdr_data 31 | """ 32 | fmtdef = pdr.loaders.queries.read_table_structure( 33 | block, name, filename, data, identifiers 34 | ) 35 | import numpy as np 36 | import pandas as pd 37 | 38 | fmtdef = pd.concat([fmtdef, fmtdef], ignore_index=True) 39 | fmtdef["NAME"] = fmtdef["NAME"].str.split("_", expand=True)[0] 40 | fmtdef["NAME"] = fmtdef["NAME"].str.cat(map(str, fmtdef.index), sep="_") 41 | fmtdef.ITEM_OFFSET = 8 42 | fmtdef.ITEM_BYTES = 8 43 | from pdr.loaders.queries import _fill_empty_byte_rows 44 | from pdr.pd_utils import insert_sample_types_into_df 45 | 46 | fmtdef['BYTES'] = np.nan 47 | fmtdef = _fill_empty_byte_rows(fmtdef) 48 | fmtdef, dt = insert_sample_types_into_df(fmtdef, identifiers) 49 | return fmtdef, dt 50 | -------------------------------------------------------------------------------- /pdr/formats/dawn.py: -------------------------------------------------------------------------------- 1 | class DoesNotExistError(Exception): 2 | """""" 3 | pass 4 | 5 | 6 | def dawn_history_hdu_exception(): 7 | """ 8 | filter out spurious HISTORY pointer 9 | 10 | HITS 11 | * dawn 12 | * fc_edr_fit 13 | * fc_rdr_fit 14 | """ 15 | raise DoesNotExistError( 16 | "Dawn FITS HISTORY extensions do not actually exist." 17 | ) 18 | -------------------------------------------------------------------------------- /pdr/formats/diviner.py: -------------------------------------------------------------------------------- 1 | # because these can contain the value "NaN", combined with the fact that they 2 | # are space-padded, pd.read_csv sometimes casts some columns to object, 3 | # turning some of their values into strings and some into float, throwing 4 | # warnings and making it obnoxious to work with them (users will randomly not 5 | # be able to, e.g., add two columns together without a data cleaning step). 6 | def diviner_l4_table_loader(fmtdef_dt, filename): 7 | """ 8 | because these can contain the value "NaN", combined with the fact that they 9 | are space-padded, pd.read_csv sometimes casts some columns to object, 10 | turning some of their values into strings and some into float, throwing 11 | warnings and making it obnoxious to work with them (users will randomly not 12 | be able to, e.g., add two columns together without a data cleaning step). 13 | 14 | HITS 15 | * diviner 16 | * l4 17 | """ 18 | import numpy as np 19 | import pandas as pd 20 | 21 | table = pd.DataFrame( 22 | np.loadtxt(filename, delimiter=",", skiprows=1), 23 | columns=[c for c in fmtdef_dt[0]["NAME"] if "PLACEHOLDER" not in c], 24 | ) 25 | return table 26 | -------------------------------------------------------------------------------- /pdr/formats/epoxi.py: -------------------------------------------------------------------------------- 1 | from pdr.loaders.queries import table_position 2 | 3 | 4 | def cart_model_get_position(identifiers, block, target, name, start_byte): 5 | """ 6 | The cartesian shape model's RECORD_BYTES and all three of the tables' 7 | ROW_BYTES should be 79 but the label lists them as 80. 8 | 9 | HITS 10 | * epoxi 11 | * shape 12 | """ 13 | table_props = table_position(identifiers, block, target, name, start_byte) 14 | row_bytes = 79 15 | table_props["start"] = row_bytes * (target[1] - 1) 16 | table_props["length"] = row_bytes * block["ROWS"] 17 | return table_props 18 | 19 | 20 | def hriv_deconv_mask_start_byte(name, hdulist): 21 | """ 22 | The EPOXI HRIV deconvolved radiance files have incorrect start byte 23 | specifications for the MASK HDU. 24 | 25 | HITS 26 | * epoxi 27 | * hriv_deconvolved 28 | """ 29 | if 'HEADER' in name: 30 | return hdulist.fileinfo('MASK')['hdrLoc'] 31 | return hdulist.fileinfo('MASK')['datLoc'] 32 | -------------------------------------------------------------------------------- /pdr/formats/ground.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | def mssso_cal_start_byte(name, hdulist): 4 | """ 5 | A small subset of MSSSO CASPIR calibration images have the wrong start byte 6 | for the IMAGE pointer in their PDS3 labels 7 | 8 | HITS 9 | * sl9_jupiter_impact 10 | * mssso_cal 11 | """ 12 | if 'HEADER' in name: 13 | return 0 14 | return hdulist.fileinfo(0)['datLoc'] 15 | 16 | 17 | def wff_atm_special_block(data, name): 18 | """ 19 | One WFF/ATM DEM image opens fine (BBMESA2X2), the other two (SCHOONER2X2 20 | and SEDAN2X2) have their LINES and LINE_SAMPLES values backwards. 21 | 22 | HITS 23 | * wff_atm 24 | * dem_img 25 | """ 26 | block = data.metablock_(name) 27 | 28 | if data.metaget_("PRODUCT_ID").startswith("S"): 29 | real_line_samples = block["LINES"] 30 | real_lines = block["LINE_SAMPLES"] 31 | 32 | block["LINES"] = real_lines 33 | block["LINE_SAMPLES"] = real_line_samples 34 | return True, block 35 | 36 | return False, block 37 | 38 | def ebrocc_geom_get_position(identifiers, block, target, name, start_byte): 39 | """ 40 | ROW_BYTES = 45 in the labels, but it should be 47 41 | 42 | HITS 43 | * ground_based 44 | * ring_occ_1989_geometry 45 | """ 46 | from pdr.loaders.queries import table_position 47 | 48 | table_props = table_position(identifiers, block, target, name, start_byte) 49 | n_rows = block["ROWS"] 50 | row_bytes = block["ROW_BYTES"] + 2 51 | table_props["length"] = n_rows * row_bytes 52 | return table_props 53 | 54 | def trivial_header_loader(): 55 | """ 56 | The HEADER pointer is just the SPREADSHEET table's header row, and it does 57 | not open because "BYTES = UNK" 58 | 59 | HITS 60 | * apollo 61 | * BUG 62 | """ 63 | warnings.warn( 64 | f"This product's HEADER pointer is not currently supported." 65 | ) 66 | return True 67 | -------------------------------------------------------------------------------- /pdr/formats/ihw.py: -------------------------------------------------------------------------------- 1 | def curve_table_loader(filename, fmtdef_dt): 2 | """ 3 | The labels do not always count column bytes correctly. 4 | 5 | HITS 6 | * ihw_isrn 7 | * curve 8 | """ 9 | import pandas as pd 10 | names = [c for c in fmtdef_dt[0].NAME if "PLACEHOLDER" not in c] 11 | table = pd.read_csv(filename, header=None, sep=r"\s+") 12 | assert len(table.columns) == len(names), "mismatched column count" 13 | table.columns = names 14 | return table 15 | 16 | 17 | def add_newlines_table_loader(fmtdef_dt, block, filename, start_byte): 18 | """ 19 | Some Halley V1.0 tables (MSN, PPN, and IRSN datasets) are missing 20 | newline characters between rows. (Also applies to some ICE ephemeris tables) 21 | 22 | HITS 23 | * ihw 24 | * ms_radar 25 | * ms_vis 26 | * ice 27 | * ephem_tbl 28 | """ 29 | from io import StringIO 30 | import pandas as pd 31 | from pdr.utils import head_file 32 | 33 | with head_file(filename) as f: 34 | f.read(start_byte) 35 | newlines_added = bytearray() 36 | for row in range(0, block["ROWS"]): 37 | bytes_ = f.read(block["ROW_BYTES"]) 38 | newlines_added += bytes_ + b"\n" # Add a newline to each row 39 | string_buffer = StringIO(newlines_added.decode()) 40 | 41 | # Adapted from _interpret_as_ascii() 42 | fmtdef, dt = fmtdef_dt 43 | colspecs = [] 44 | for record in fmtdef.to_dict("records"): 45 | col_length = int(record["BYTES"]) 46 | colspecs.append((record["SB_OFFSET"], record["SB_OFFSET"] + col_length)) 47 | string_buffer.seek(0) 48 | table = pd.read_fwf(string_buffer, header=None, colspecs=colspecs) 49 | string_buffer.close() 50 | table.columns = fmtdef.NAME.tolist() 51 | table = table.drop([k for k in table.keys() if "PLACEHOLDER" in k], axis=1) 52 | return table 53 | 54 | 55 | def get_special_block(data, name): 56 | """ 57 | A handful of MSN Radar tables have column names that were not reading 58 | correctly and were ending up as "NaN". Which also caused an AttributeError 59 | when running ix check. 60 | 61 | HITS 62 | * ihw 63 | * ms_radar 64 | """ 65 | block = data.metablock_(name) 66 | for item in iter(block.items()): 67 | if "COLUMN" in item: 68 | if item[1]["START_BYTE"] == 17 and "NAME" not in item[1]: 69 | item[1].add("NAME", ">=1SEC") 70 | if item[1]["START_BYTE"] == 21 and "NAME" not in item[1]: 71 | item[1].add("NAME", ">=8SEC") 72 | return block 73 | 74 | 75 | def get_structure(block, name, filename, data, identifiers): 76 | """ 77 | SSN products with a SPECTRUM pointer were opening with an incorrect 78 | column name. 79 | 80 | HITS 81 | * ihw 82 | * spec_hal_cal 83 | """ 84 | from pdr.loaders.queries import read_table_structure 85 | from pdr.pd_utils import insert_sample_types_into_df 86 | 87 | fmtdef = read_table_structure( 88 | block, name, filename, data, identifiers 89 | ) 90 | fmtdef.at[0, "NAME"] = fmtdef.at[0, "COLUMN_NAME"] 91 | 92 | fmtdef, dt = insert_sample_types_into_df(fmtdef, identifiers) 93 | return fmtdef, dt 94 | -------------------------------------------------------------------------------- /pdr/formats/iue.py: -------------------------------------------------------------------------------- 1 | def get_special_block(data, name): 2 | """ 3 | A subset of the IUE resampled SSI/LSI comet images have a typo in their 4 | labels: the QUALITY_IMAGE pointer name does not match its OBJECT name. 5 | 6 | HITS 7 | * iue 8 | * comet_image 9 | """ 10 | if data.metablock_(name) is not None: 11 | return False, None 12 | return True, data.metablock_("QUALITY_QUALITY_IMAGE") 13 | -------------------------------------------------------------------------------- /pdr/formats/juno.py: -------------------------------------------------------------------------------- 1 | def jiram_rdr_sample_type(): 2 | """ 3 | JIRAM RDRs, both images and tables, are labeled as MSB but 4 | are actually LSB. 5 | 6 | HITS 7 | * juno_jiram 8 | * IMG_RDR 9 | * SPE_RDR 10 | """ 11 | return "B" 11 | -------------------------------------------------------------------------------- /pdr/formats/mariner.py: -------------------------------------------------------------------------------- 1 | def get_special_block(data, name): 2 | """ 3 | Mariner 9 IRIS tables have 316 ROW_PREFIX_BYTES followed by 1 column 4 | with 1500 ITEMS. The column's START_BYTE = 317, but it should be 1. 5 | 6 | HITS 7 | * mariner 8 | * iris 9 | """ 10 | block = data.metablock_(name) 11 | block["COLUMN"]["START_BYTE"] = 1 12 | return block 13 | -------------------------------------------------------------------------------- /pdr/formats/mer.py: -------------------------------------------------------------------------------- 1 | def rss_spreadsheet_loader(filename, fmtdef_dt): 2 | """ 3 | The RSS UHFD labels have the wrong ROWS value for most products. 4 | 5 | HITS 6 | * mer_rss 7 | *uhfd 8 | """ 9 | import pandas as pd 10 | 11 | fmtdef, dt = fmtdef_dt 12 | table = pd.read_csv(filename, header=None, sep=",") 13 | assert len(table.columns) == len(fmtdef.NAME.tolist()) 14 | table.columns = fmtdef.NAME.tolist() 15 | return table 16 | -------------------------------------------------------------------------------- /pdr/formats/mex.py: -------------------------------------------------------------------------------- 1 | from pdr.loaders.queries import table_position 2 | 3 | 4 | def marsis_get_position(identifiers, block, target, name, start_byte): 5 | """ 6 | HITS 7 | * mex_marsis 8 | * TEC_EDR 9 | """ 10 | table_props = table_position(identifiers, block, target, name, start_byte) 11 | n_records = identifiers["FILE_RECORDS"] 12 | record_bytes = 143 13 | table_props["length"] = n_records * record_bytes 14 | return table_props 15 | 16 | 17 | def aspera_table_loader(filename, fmtdef_dt): 18 | """ 19 | The ASPERA IMA EDRs are ascii csv tables containing 2 data types: SENSOR 20 | and MODE. The VALUES column is repeated and has 96 items total. In the MODE 21 | rows only the first VALUES item contains data, and should be followed by 95 22 | 'missing' items. 23 | In reality these rows have 96 empty/missing items because of an extra 24 | comma. This special case cuts off the extra column during the pd.read_csv() 25 | call. 26 | 27 | HITS 28 | * mex_aspera 29 | * ima 30 | """ 31 | import pandas as pd 32 | 33 | fmtdef, dt = fmtdef_dt 34 | table = pd.read_csv( 35 | filename, header=None, usecols=range(len(fmtdef.NAME.tolist())) 36 | ) 37 | assert len(table.columns) == len(fmtdef.NAME.tolist()) 38 | table.columns = fmtdef.NAME.tolist() 39 | return table 40 | 41 | 42 | def aspera_ima_ddr_structure(block, name, filename, data, identifiers): 43 | """ 44 | The ASPERA IMA DDR table opens correctly as written in its label, but 45 | the BYTES values for columns 3 and 4 are wrong. 46 | 47 | HITS 48 | * mex_aspera 49 | * ima_ddr 50 | """ 51 | from pdr.loaders.queries import read_table_structure 52 | 53 | fmtdef = read_table_structure( 54 | block, name, filename, data, identifiers 55 | ) 56 | fmtdef.at[2, "BYTES"] = 12 57 | fmtdef.at[3, "BYTES"] = 12 58 | return fmtdef, None 59 | 60 | 61 | def pfs_edr_special_block(data, name): 62 | """ 63 | The PFS EDRs have a few errors in their labels prior to orbit 8945, after 64 | which they are corrected. 65 | 66 | HITS 67 | * mex_marsis 68 | * raw_lwc 69 | * raw_swc 70 | * cal_lwc 71 | * cal_swc 72 | * hk_early_mission 73 | * orb001_lwc 74 | * orb001_swc 75 | """ 76 | block = data.metablock_(name) 77 | orbit_number = data.metaget_("ORBIT_NUMBER") 78 | 79 | if orbit_number == "N/A" or int(orbit_number) < 8945: 80 | # Fixes the number of rows in the table by replacing ROWS with 81 | # FILE_RECORDS. 82 | block["ROWS"] = data.metaget_("FILE_RECORDS") 83 | # Replaces the time columns' DATA_TYPEs with the correct type based on 84 | # products created later in the mission. 85 | for item in iter(block.items()): 86 | if "COLUMN" in item: 87 | if item[1]["NAME"] == "OBT OBSERVATION TIME": 88 | item[1]["DATA_TYPE"] = "PC_REAL" 89 | if item[1]["NAME"] == "SCET OBSERVATION TIME": 90 | item[1]["DATA_TYPE"] = "PC_UNSIGNED_INTEGER" 91 | return True, block 92 | return False, block 93 | 94 | 95 | def mrs_ddr_atmo_position(identifiers, block, target, name, start_byte): 96 | """ 97 | The MRS derived atmosphere profiles were opening with data cut off at the 98 | ends of the tables. Recalculating the table length with ROW_BYTES = 278 99 | instead of 276 fixes it. 100 | 101 | HITS 102 | * mex_mrs 103 | * occ_atmo 104 | """ 105 | table_props = table_position(identifiers, block, target, name, start_byte) 106 | row_bytes = 278 107 | table_props["length"] = row_bytes * block["ROWS"] 108 | return table_props 109 | 110 | 111 | def mrs_get_position(identifiers, block, target, name, start_byte): 112 | """ 113 | MRS ICL level 1b DOPPLER_TABLEs and ODF level 2 RANGING_TABLEs undercount 114 | ROW_BYTES by 1. 115 | 116 | HITS 117 | * mex_mrs 118 | * lvl_1b_icl (partial) 119 | * lvl_2_odf (partial) 120 | """ 121 | table_props = table_position(identifiers, block, target, name, start_byte) 122 | row_bytes = block["ROW_BYTES"] + 1 123 | table_props["length"] = row_bytes * block["ROWS"] 124 | return table_props 125 | 126 | 127 | def mrs_l1b_odf_table_loader(filename, fmtdef_dt): 128 | """ 129 | MRS level 1b ODF labels have variable and sometimes incorrect ROW_BYTES 130 | values. 131 | 132 | HITS 133 | * mex_mrs 134 | * lvl_1b_odf 135 | """ 136 | import pandas as pd 137 | 138 | fmtdef, dt = fmtdef_dt 139 | table = pd.read_csv(filename, header=None, sep=r"\s+") 140 | table.columns = [ 141 | f for f in fmtdef['NAME'] if not f.startswith('PLACEHOLDER') 142 | ] 143 | return table 144 | 145 | 146 | def mrs_l1b_odf_rmp_redirect(data): 147 | """ 148 | RMP tables are a subset of MRS level 1b ODFs that were not opening because 149 | their pointer and object names do not match. 150 | 151 | HITS: 152 | * mex_mrs 153 | * lvl_1b_odf (partial) 154 | """ 155 | object_name = "RAMP_TABLE" 156 | block = data.metablock_(object_name) 157 | return block 158 | 159 | def vmc_rdr_hdu_selection(name, hdulist): 160 | """ 161 | The VMC RDRs have 1 IMAGE pointer and 2 IMAGE objects. From the volume's 162 | readme: "The first layer includes the calibrated values, and the second 163 | layer includes the raw values." It is unclear whether or not the 'second 164 | layer' is a copy of the EDR image or if intermediate calibration steps 165 | have been applied to it. 166 | Assuming the single band image is akin to the EDRs, this special case 167 | returns the multiband calibrated image. 168 | 169 | HITS 170 | * mex_vmc 171 | * rdr 172 | """ 173 | 174 | return hdulist.fileinfo(1)['datLoc'] 175 | -------------------------------------------------------------------------------- /pdr/formats/mgn.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | 3 | from pdr.utils import head_file 4 | 5 | 6 | def geom_table_loader(filename, fmtdef_dt): 7 | """ 8 | The Magellan radar system geometry tables include null bytes between rows. 9 | 10 | HITS 11 | * gal_nims 12 | * impact 13 | * mgn_image 14 | * midr_tables 15 | """ 16 | import pandas as pd 17 | from pdr.utils import head_file 18 | 19 | fmtdef, dt = fmtdef_dt 20 | with head_file(filename) as buf: 21 | bytes_ = buf.read().replace(b"\x00", b"") 22 | string_buffer = StringIO(bytes_.decode()) 23 | string_buffer.seek(0) 24 | table = pd.read_csv(string_buffer, header=None) 25 | names = [n for n in fmtdef['NAME'] if 'PLACEHOLDER' not in n] 26 | assert len(table.columns) == len(names), 'column name mismatch' 27 | string_buffer.close() 28 | table.columns = names 29 | return table 30 | 31 | 32 | def orbit_table_in_img_loader(): 33 | """ 34 | HITS 35 | * mgn_post_mission 36 | * fmap 37 | * fmap_browse 38 | """ 39 | return True 40 | 41 | 42 | def get_fn(data): 43 | """ 44 | HITS 45 | * mgn_post_mission 46 | * fmap 47 | * fmap_browse 48 | """ 49 | target = data.filename 50 | return True, target 51 | 52 | 53 | def occultation_loader(identifiers, fmtdef_dt, block, filename): 54 | """ 55 | Checks end of each row for newline character. If missing, removes 56 | extraneous newline from middle of the row and adjusts for the extra byte. 57 | Adapted from _interpret_as_ascii() 58 | 59 | HITS 60 | * mgn_occult 61 | * ddr 62 | """ 63 | import pandas as pd 64 | 65 | fmtdef, dt = fmtdef_dt 66 | record_length = block["ROW_BYTES"] 67 | 68 | # Checks end of each row for newline character. If missing, removes extraneous 69 | # newline from middle of the row and adjusts for the extra byte. 70 | with head_file(filename) as f: 71 | processed = bytearray() 72 | for row in range(0, identifiers["FILE_RECORDS"]): 73 | bytes_ = f.read(record_length) 74 | if not bytes_.endswith(b"\n"): 75 | new_bytes_ = bytes_.replace(b"\n", b"") + f.read(1) 76 | processed += new_bytes_ 77 | else: 78 | processed += bytes_ 79 | string_buffer = StringIO(processed.decode()) 80 | # adapted from _interpret_as_ascii() 81 | colspecs = [] 82 | position_records = fmtdef.to_dict("records") 83 | for record in position_records: 84 | col_length = record["BYTES"] 85 | colspecs.append((record["SB_OFFSET"], record["SB_OFFSET"] + col_length)) 86 | string_buffer.seek(0) 87 | table = pd.read_fwf(string_buffer, header=None, colspecs=colspecs) 88 | string_buffer.close() 89 | 90 | table.columns = fmtdef.NAME.tolist() 91 | return table.drop("PLACEHOLDER_0", axis=1) 92 | 93 | 94 | def gvanf_sample_type(): 95 | return ">B" 96 | -------------------------------------------------------------------------------- /pdr/formats/mgs.py: -------------------------------------------------------------------------------- 1 | from pdr.loaders.queries import read_table_structure 2 | 3 | 4 | def get_odf_structure(block, name, filename, data, identifiers): 5 | """""" 6 | from pdr.pd_utils import insert_sample_types_into_df 7 | fmtdef = read_table_structure( 8 | block, name, filename, data, identifiers 9 | ) 10 | fmtdef.at[7, "BYTES"] = 2 11 | fmtdef[f"ROW_BYTES"] = block.get(f"ROW_BYTES") 12 | 13 | fmtdef, dt = insert_sample_types_into_df(fmtdef, identifiers) 14 | return fmtdef, dt 15 | 16 | 17 | def get_ecs_structure(block, name, filename, data, identifiers): 18 | """ 19 | HITS 20 | * mgs_rss_raw 21 | * ecs 22 | """ 23 | from pdr.pd_utils import insert_sample_types_into_df, compute_offsets 24 | fmtdef = read_table_structure( 25 | block, name, filename, data, identifiers 26 | ) 27 | fmtdef.at[5, "START_BYTE"] = 80 28 | fmtdef[f"ROW_BYTES"] = block.get(f"ROW_BYTES") 29 | 30 | fmtdef = compute_offsets(fmtdef) 31 | fmtdef, dt = insert_sample_types_into_df(fmtdef, identifiers) 32 | return fmtdef, dt 33 | 34 | 35 | def mola_pedr_special_block(data, name, identifiers): 36 | """ 37 | Fix for FILE_RECORDS = "UNK" and ROWS = "UNK" in the MOLA PEDR labels. 38 | This special case calculates ROWS using the count_from_bottom_of_file() 39 | logic in reverse. 40 | 41 | HITS 42 | * mgs_mola 43 | * pedr 44 | * mgs_sampler 45 | * pedr 46 | """ 47 | import os 48 | from pathlib import Path 49 | from pdr.loaders.queries import data_start_byte 50 | 51 | block = data.metablock_(name) 52 | target = data.metaget_("^"+name) 53 | start_byte = data_start_byte(identifiers, block, target, data.filename) 54 | 55 | table_bytes = os.path.getsize(Path(data.filename)) - start_byte 56 | block["ROWS"] = int(table_bytes / block["ROW_BYTES"]) 57 | 58 | return block 59 | -------------------------------------------------------------------------------- /pdr/formats/mro.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | 3 | from pdr.loaders.queries import read_table_structure 4 | from pdr.utils import head_file 5 | 6 | 7 | def get_structure(block, name, filename, data, identifiers): 8 | """ 9 | The first column in the MCS (EDR/RDR/DDR) format files are just named "1" 10 | which is being read as 'int'. This was causing problems in read_table 11 | during the table.drop call 12 | 13 | HITS 14 | * mro 15 | * mcs_edr 16 | * mcs_rdr 17 | """ 18 | fmtdef = read_table_structure( 19 | block, name, filename, data, identifiers 20 | ) 21 | fmtdef["NAME"] = fmtdef["NAME"].values.astype(str) 22 | return fmtdef, None 23 | 24 | 25 | def mcs_ddr_table_loader(fmtdef_dt, block, filename, start_byte): 26 | """Reads each row of the table and removes extra newline characters. 27 | Adapted from _interpret_as_ascii().""" 28 | with head_file(filename) as f: 29 | f.read(start_byte) 30 | newlines_removed = bytearray() 31 | for row in range(0, block["ROWS"]): 32 | bytes_ = f.read(block["ROW_BYTES"]) 33 | newlines_removed += bytes_.replace(b"\n", b"") + b"\n" 34 | string_buffer = StringIO(newlines_removed.decode()) 35 | import pandas as pd 36 | from pdr.pd_utils import compute_offsets 37 | 38 | # Adapted from _interpret_as_ascii() 39 | fmtdef, dt = fmtdef_dt 40 | colspecs = [] 41 | position_records = compute_offsets(fmtdef).to_dict("records") 42 | for record in position_records: 43 | col_length = record["BYTES"] 44 | colspecs.append((record["SB_OFFSET"], record["SB_OFFSET"] + col_length)) 45 | string_buffer.seek(0) 46 | table = pd.read_fwf(string_buffer, header=None, colspecs=colspecs) 47 | string_buffer.close() 48 | 49 | table.columns = fmtdef.NAME.tolist() 50 | return table 51 | 52 | def crism_mrdr_ancill_position(identifiers, block, target, name, start_byte): 53 | """ 54 | ROW_BYTES = 14 in the labels, but it should be 16 (the RECORD_BYTES) 55 | 56 | HITS 57 | * crism 58 | * ancil_mrdr 59 | """ 60 | from pdr.loaders.queries import table_position 61 | 62 | table_props = table_position(identifiers, block, target, name, start_byte) 63 | n_rows = block["ROWS"] 64 | row_bytes = identifiers["RECORD_BYTES"] 65 | table_props["length"] = n_rows * row_bytes 66 | return table_props 67 | 68 | -------------------------------------------------------------------------------- /pdr/formats/msl_apxs.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | 4 | def table_loader(pointer): 5 | """ 6 | we don't support these right now, or maybe ever 7 | 8 | HITS 9 | * msl_apxs 10 | * APXS_SCIENCE_EDR 11 | """ 12 | warnings.warn( 13 | f"The MSL APXS {pointer} tables are not currently supported." 14 | ) 15 | return True 16 | 17 | def trivial_header_loader(): 18 | """ 19 | The HEADER pointer is just the SPREADSHEET table's header row, and it does 20 | not open because "BYTES = UNK" 21 | 22 | HITS 23 | * msl_apxs 24 | * APXS_OXIDE_RDR 25 | * APXS_SPECTRUM_RDR 26 | """ 27 | warnings.warn( 28 | f"The MSL APXS RDR HEADER pointers are not currently supported." 29 | ) 30 | return True 31 | -------------------------------------------------------------------------------- /pdr/formats/msl_ccam.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | 4 | def image_reply_table_loader(): 5 | """ 6 | HITS 7 | * msl_ccam 8 | * CCAM_RMI_EDR 9 | """ 10 | warnings.warn( 11 | "MSL ChemCam IMAGE_REPLY binary tables are not supported " 12 | "due to a formatting error in label files." 13 | ) 14 | return True 15 | -------------------------------------------------------------------------------- /pdr/formats/msl_cmn.py: -------------------------------------------------------------------------------- 1 | def spreadsheet_loader(filename): 2 | """ 3 | HITS 4 | * msl_cmn 5 | * DIFFRACTION_ALL_RDR 6 | * ENERGY_SINGLE_RDR 7 | * MINERAL_TABLES 8 | * msl_sam 9 | * l0_qms 10 | * l1a_qms 11 | * l1b_qms 12 | """ 13 | import pandas as pd 14 | return pd.read_csv(filename) 15 | 16 | 17 | def trivial_header_loader(): 18 | """ 19 | HITS 20 | * msl_cmn 21 | * DIFFRACTION_ALL_RDR 22 | * ENERGY_SINGLE_RDR 23 | * MINERAL_TABLES 24 | * msl_sam 25 | * l0_hk 26 | * l0_qms 27 | * l0_gc 28 | * l0_tls 29 | * l1a_hk 30 | * l1a_qms 31 | * l1a_gc 32 | * l1a_tls 33 | * l1b_qms 34 | * l1b_gc 35 | * l2_qms 36 | * l2_gc 37 | * l2_tls 38 | """ 39 | return True 40 | 41 | 42 | def fix_mangled_name(data): 43 | """ 44 | HITS 45 | * msl_cmn 46 | * HOUSEKEEPING 47 | """ 48 | object_name = "CHMN_HSKN_HEADER_TABLE" 49 | block = data.metablock_(object_name) 50 | return block 51 | 52 | 53 | def get_offset(object_name): 54 | """ 55 | incorrectly specifies object length rather than start byte 56 | 57 | HITS 58 | * msl_cmn 59 | * DIFFRACTION_ALL_RDR 60 | * ENERGY_SINGLE_RDR 61 | * MINERAL_TABLES 62 | * CCD_FRAME 63 | * DIFFRACTION_SINGLE 64 | * DIFFRACTION_SPLIT 65 | * DIFFRACTION_ALL 66 | * ENERGY_ALL 67 | * ENERGY_SINGLE 68 | * ENERGY_SPLIT 69 | * HOUSKEEPING 70 | * TRANSMIT_RAW 71 | """ 72 | if object_name == "HISTOGRAM": 73 | return True, 300 74 | if object_name == "CHMN_HSK_HEADER_TABLE": 75 | return True, 0 76 | return False, None 77 | -------------------------------------------------------------------------------- /pdr/formats/msl_places.py: -------------------------------------------------------------------------------- 1 | def spreadsheet_loader(filename, fmtdef_dt): 2 | """ 3 | HITS 4 | * msl_places 5 | * localizations 6 | """ 7 | import pandas as pd 8 | 9 | fmtdef, dt = fmtdef_dt 10 | table = pd.read_csv(filename, sep=",") 11 | assert len(table.columns) == len(fmtdef.NAME.tolist()) 12 | table.columns = fmtdef.NAME.tolist() 13 | return table 14 | -------------------------------------------------------------------------------- /pdr/formats/msl_rems.py: -------------------------------------------------------------------------------- 1 | def edr_table_loader(filename, fmtdef_dt, block, start_byte): 2 | """ 3 | The ROW_SUFFIX_BYTES are either miscounted by a few bytes, or we don't 4 | handle them correctly. There appears to be a related issue with the tables' 5 | start bytes as well. This special case bypasses both issues. 6 | 7 | HITS 8 | * msl_rems 9 | * edr_SP 10 | """ 11 | import pandas as pd 12 | 13 | fmtdef, dt = fmtdef_dt 14 | 15 | # number of rows to skip (there are multiple table pointers per product) 16 | skips = int(start_byte / 399) 17 | table = pd.read_csv(filename, header=None, 18 | skiprows=skips, 19 | nrows=block["ROWS"]) 20 | 21 | col_names = [c for c in fmtdef_dt[0]['NAME'] if "PLACEHOLDER" not in c] 22 | assert len(table.columns) == len(col_names), "mismatched column count" 23 | table.columns = col_names 24 | return table 25 | 26 | 27 | def edr_offset(data, name): 28 | """ 29 | HITS: 30 | * msl_rems 31 | * edr_HSDEF 32 | # edr_HSREG 33 | """ 34 | start_byte = data.metaget_("^"+name)[1] - 1 35 | return True, start_byte 36 | 37 | 38 | def rdr_table_loader(filename, fmtdef_dt): 39 | """ 40 | Missing values are variations of "UNK" and "NULL", which cause mixed dtype 41 | warnings when using the default pd.read_csv() parameters. 42 | 43 | HITS 44 | * msl_rems 45 | * rdr_rmd 46 | * rdr_rnv 47 | * rdr_rtl 48 | """ 49 | import pandas as pd 50 | 51 | fmtdef, dt = fmtdef_dt 52 | 53 | missing_const = [' UNK', ' UNK', ' UNK', ' UNK', 54 | ' UNK', ' UNK', 55 | ' NULL', ' NULL'] 56 | table = pd.read_csv(filename, header=None, 57 | na_values=missing_const) 58 | 59 | col_names = [c for c in fmtdef_dt[0]['NAME'] if "PLACEHOLDER" not in c] 60 | assert len(table.columns) == len(col_names), "mismatched column count" 61 | table.columns = col_names 62 | return table 63 | -------------------------------------------------------------------------------- /pdr/formats/msx.py: -------------------------------------------------------------------------------- 1 | 2 | def cube_envi_header_position(identifiers, block, target, name, start_byte, fn): 3 | """ 4 | The ENVI_HEADER pointer's BYTES = "N/A" 5 | 6 | HITS 7 | * msx 8 | * cubes 9 | """ 10 | from pdr.loaders.queries import table_position 11 | import os 12 | from pathlib import Path 13 | 14 | table_props = table_position(identifiers, block, target, name, start_byte) 15 | table_props["length"] = os.path.getsize(Path(fn)) 16 | return table_props 17 | 18 | -------------------------------------------------------------------------------- /pdr/formats/nh.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | 4 | def get_fn(data): 5 | """ 6 | The PEPSSI DDRs have an extra space at the start of the SPREADSHEET 7 | pointer's filename that causes 'file not found' errors. 8 | 9 | HITS 10 | * nh_derived 11 | * atmos_comp 12 | * nh_pepssi 13 | * flux_resampled 14 | """ 15 | label = Path(data.labelname) 16 | return True, Path(label.parent, f"{label.stem}.csv") 17 | -------------------------------------------------------------------------------- /pdr/formats/odyssey.py: -------------------------------------------------------------------------------- 1 | def map_table_loader(filename, fmtdef_dt): 2 | """ 3 | A few products open fine from their labels, but most do not. Seems like 4 | a byte counting issue in the labels. 5 | 6 | HITS 7 | * mars_odyssey 8 | * maps 9 | """ 10 | import pandas as pd 11 | names = [c for c in fmtdef_dt[0]['NAME'] if 'PLACEHOLDER' not in c] 12 | # Some tables use tabs as column delimiters, others use spaces. 13 | table = pd.read_csv(filename, header=None, sep=r"\s+") 14 | assert len(table.columns) == len(names), "Mismatched column count" 15 | table.columns = names 16 | return table 17 | -------------------------------------------------------------------------------- /pdr/formats/phoenix.py: -------------------------------------------------------------------------------- 1 | def elec_em6_structure(block, name, filename, data, identifiers): 2 | """ 3 | ELEC EDR em6/TBL tables: All the START_BYTEs in TBL_0_STATE_DATA.FMT 4 | are off by 36 bytes. 5 | 6 | HITS 7 | * phoenix 8 | * elec_edr (partial) 9 | """ 10 | from pdr.pd_utils import insert_sample_types_into_df, compute_offsets 11 | from pdr.loaders.queries import read_table_structure 12 | fmtdef = read_table_structure( 13 | block, name, filename, data, identifiers 14 | ) 15 | for line in range(0, len(fmtdef)): 16 | if fmtdef.at[line, "BLOCK_NAME"] == "TBL0 DATA": 17 | fmtdef.at[line, "START_BYTE"] -= 36 18 | fmtdef = compute_offsets(fmtdef) 19 | return insert_sample_types_into_df(fmtdef, identifiers) 20 | 21 | 22 | def afm_rdr_structure(block, name, filename, data, identifiers): 23 | """ 24 | AFM RDR header tables: Several columns' NAME fields start with lowercase 25 | letters, which is_an_assignment_line() in /parselabel/pds3.py evaluates as 26 | NOT an assignment statement. 27 | 28 | HITS 29 | * phoenix 30 | * afm_rdr 31 | """ 32 | from pdr.loaders.queries import read_table_structure 33 | fmtdef = read_table_structure(block, name, filename, data, identifiers) 34 | fmtdef.insert(1, 'NAME', fmtdef.pop('NAME')) 35 | for line in range(0, len(fmtdef)): 36 | col_number_text = fmtdef.at[line, "COLUMN_NUMBER"] 37 | if ( 38 | isinstance(col_number_text, str) 39 | and "NAME" in col_number_text 40 | ): 41 | fmtdef.at[ 42 | line, "COLUMN_NUMBER" 43 | ] = col_number_text.split("NAME = ")[0] 44 | fmtdef.at[line, "NAME"] = col_number_text.split("NAME = ")[1] 45 | return fmtdef, None 46 | 47 | 48 | def afm_table_loader(filename, fmtdef_dt, name): 49 | """ 50 | AFM RDR tables: Several labels miscount bytes somewhere in the tables 51 | 52 | HITS 53 | * phoenix 54 | * afm_rdr 55 | """ 56 | import pandas as pd 57 | 58 | if "HEADER_TABLE" in name: 59 | num_rows_skipped = 0 60 | num_rows = 4 61 | elif name == "AFM_F_ERROR_TABLE": 62 | num_rows_skipped = 4 63 | num_rows = 512 64 | elif name == "AFM_F_HEIGHT_TABLE": 65 | num_rows_skipped = 516 66 | num_rows = 512 67 | elif name == "AFM_B_ERROR_TABLE": 68 | num_rows_skipped = 1028 69 | num_rows = 512 70 | elif name == "AFM_B_HEIGHT_TABLE": 71 | num_rows_skipped = 1540 72 | num_rows = 512 73 | table = pd.read_csv( 74 | filename, 75 | header=None, 76 | sep=",", 77 | skiprows=num_rows_skipped, nrows=num_rows 78 | ) 79 | names = [c for c in fmtdef_dt[0]['NAME'] if "PLACEHOLDER" not in c] 80 | assert len(table.columns) == len(names), "mismatched column count" 81 | table.columns = names 82 | return table 83 | 84 | 85 | def phxao_header_position(identifiers, block, target, name, start_byte): 86 | """ 87 | PHXAO tables: Some table headers have lost trailing whitespace 88 | assumed to be present by the label. Treat as newline-delimited 89 | instead; the record count is correct. 90 | 91 | HITS 92 | * phoenix 93 | * atm_phxao 94 | """ 95 | from pdr.loaders.queries import _extract_table_records 96 | return { 97 | "as_rows": True, 98 | "start": 0, 99 | "length": _extract_table_records(block), 100 | } 101 | 102 | 103 | def phxao_table_offset(filename, identifiers): 104 | """ 105 | PHXAO tables: Some table headers have lost trailing whitespace 106 | assumed to be present by the label. Recalculate the table offset 107 | assuming that the table itself is still fixed-width. 108 | 109 | HITS 110 | * phoenix 111 | * atm_phxao 112 | """ 113 | from pdr.loaders._helpers import count_from_bottom_of_file 114 | rows = identifiers["ROWS"] 115 | row_bytes = identifiers["ROW_BYTES"] 116 | start_byte = count_from_bottom_of_file( 117 | filename, rows, row_bytes=row_bytes 118 | ) 119 | return True, start_byte 120 | 121 | 122 | def wcl_edr_special_block(data, name): 123 | """ 124 | WCL EDR ema/emb/emc tables: the START_BYTE for columns 13 and 14 are 125 | off by 1 and 2 bytes respectively. (The em8/em9/emf tables are fine.) 126 | 127 | HITS 128 | * phoenix 129 | * wcl_edr (partial) 130 | """ 131 | block = data.metablock_(name) 132 | 133 | for item in iter(block.items()): 134 | if "COLUMN" in item: 135 | if item[1]["COLUMN_NUMBER"] == 13: 136 | item[1]["START_BYTE"] -= 1 137 | if item[1]["COLUMN_NUMBER"] == 14: 138 | item[1]["START_BYTE"] -= 2 139 | return block 140 | 141 | 142 | def wcl_rdr_offset(data, name): 143 | """WCL RDR CP/CV tables: in the labels, each pointer's start byte is 144 | missing '' even though the units are bytes rather than file_records. 145 | This doesn't fix the header table though, they still need attention.""" 146 | target = data.metaget_("^"+name) 147 | start_byte = target[-1] - 1 148 | return True, start_byte 149 | 150 | 151 | def led_edr_structure(block, name, filename, data, identifiers): 152 | """ 153 | TEGA_LED.FMT: the CONTAINER's REPETITIONS should be 1000, not 1010 154 | 155 | HITS 156 | * phoenix 157 | * lededr 158 | """ 159 | from pdr.pd_utils import insert_sample_types_into_df, compute_offsets 160 | from pdr.loaders.queries import read_table_structure 161 | 162 | fmtdef = read_table_structure( 163 | block, name, filename, data, identifiers 164 | ) 165 | real_repetitions = 1000 166 | real_fmtdef_len = 5 + (real_repetitions * 3) 167 | fmtdef = fmtdef.iloc[0:real_fmtdef_len, :] 168 | 169 | for line in range(0, len(fmtdef)): 170 | if fmtdef.at[line, "BLOCK_NAME"] == "LED_RECORDS": 171 | fmtdef.at[line, "BLOCK_REPETITIONS"] = 1000 172 | 173 | fmtdef = compute_offsets(fmtdef) 174 | return insert_sample_types_into_df(fmtdef, identifiers) 175 | 176 | 177 | def sc_rdr_structure(block, name, filename, data, identifiers): 178 | """ 179 | TEGA_SCRDR.FMT: most of the START_BYTEs are off by 4 because column 2 180 | ("TEGA_TIME") is actually 8 bytes, not 4 181 | 182 | HITS 183 | * phoenix 184 | * scrdr 185 | """ 186 | from pdr.pd_utils import insert_sample_types_into_df, compute_offsets 187 | from pdr.loaders.queries import read_table_structure 188 | 189 | fmtdef = read_table_structure( 190 | block, name, filename, data, identifiers 191 | ) 192 | for line in range(0, len(fmtdef)): 193 | if fmtdef.at[line, "COLUMN_NUMBER"] == 2: 194 | fmtdef.at[line, "BYTES"] = 8 195 | if fmtdef.at[line, "COLUMN_NUMBER"] >= 3: 196 | fmtdef.at[line, "START_BYTE"] += 4 197 | 198 | fmtdef = compute_offsets(fmtdef) 199 | return insert_sample_types_into_df(fmtdef, identifiers) 200 | -------------------------------------------------------------------------------- /pdr/formats/pvo.py: -------------------------------------------------------------------------------- 1 | def orpa_low_res_loader(data, name): 2 | """ 3 | ORPA low resolution: labels for earlier orbits have the correct 4 | ROW_BYTES, but there is a typo introduced later that says 'ROW_BYTES = 5 | 241' instead of 243 6 | 7 | HITS 8 | * pvo 9 | * orpa_lowres 10 | """ 11 | block = data.metablock_(name) 12 | block["ROW_BYTES"] = 243 13 | return block 14 | 15 | 16 | def oims_12s_loader(data, name): 17 | """ 18 | OIMS 12 second averages: all labels say 'ROWS = 42' regardless of the 19 | data's actual length 20 | 21 | HITS 22 | * pvo 23 | * oims_12s 24 | """ 25 | block = data.metablock_(name) 26 | block["ROWS"] = data.metaget_("FILE_RECORDS") 27 | return block 28 | -------------------------------------------------------------------------------- /pdr/formats/rosetta.py: -------------------------------------------------------------------------------- 1 | def rosetta_table_loader(filename, fmtdef_dt): 2 | """ 3 | HITS 4 | * rosetta_rpc 5 | * RPCMIP 6 | """ 7 | import astropy.io.ascii 8 | 9 | table = astropy.io.ascii.read(filename).to_pandas() 10 | fmtdef, dt = fmtdef_dt 11 | table.columns = fmtdef["NAME"].to_list() 12 | return table 13 | 14 | 15 | def midas_rdr_sps_structure(block, name, filename, data, identifiers): 16 | """ 17 | SPS TIME_SERIES tables are made up of a repeated container with 4 columns 18 | followed by a non-repeated checksum column. In compute_offsets() the 19 | `block_names` list ends up out of order, so SB_OFFSET is not calculated 20 | correctly for columns in the repeated CONTAINER. 21 | 22 | TODO: This seems like a more general issue with how compute_offsets() 23 | handles a repeated container followed by a single column 24 | 25 | HITS 26 | * rosetta_dust 27 | * RDR_midas_sps 28 | """ 29 | from pdr.pd_utils import insert_sample_types_into_df, compute_offsets 30 | from pdr.loaders.queries import read_table_structure 31 | import pandas as pd 32 | 33 | fmtdef = read_table_structure( 34 | block, name, filename, data, identifiers 35 | ) 36 | for end in ("_PREFIX", "_SUFFIX", ""): 37 | length = block.get(f"ROW{end}_BYTES") 38 | if length is not None: 39 | fmtdef[f"ROW{end}_BYTES"] = length 40 | 41 | # Add a placeholder row to the start of the fmtdef so that the 42 | # "block_names" list in compute_offsets() is in the right order and 43 | # SB_OFFSET is calculated correctly 44 | placeholder_row = { 45 | "NAME": "PLACEHOLDER_block", 46 | "DATA_TYPE": "VOID", 47 | "BYTES": 0, 48 | "START_BYTE": 1, 49 | "BLOCK_REPETITIONS": 1, 50 | "BLOCK_NAME": "CONTROL_DATA", # matches the checksum column's BLOCK_NAME 51 | "ROW_PREFIX_BYTES": 46, 52 | } 53 | fmtdef = pd.concat( 54 | [pd.DataFrame([placeholder_row]), fmtdef] 55 | ).reset_index(drop=True) 56 | 57 | fmtdef = compute_offsets(fmtdef) 58 | return insert_sample_types_into_df(fmtdef, identifiers) 59 | 60 | 61 | def fix_pad_length_structure(block, name, filename, data, identifiers): 62 | """ 63 | The MIDAS FSC tables and several CONSERT ptypes have ROW_PREFIX_BYTES, 64 | ROW_SUFFIX_BYTES, and a COLUMN with multiple ITEMS. compute_offsets() 65 | calculates the wrong end_byte and pad_length values from the BYTES and 66 | ROW_BYTES values in their labels. 67 | 68 | HITS 69 | * rosetta_consert 70 | * l2_land 71 | * l2_orbit 72 | * l3_land 73 | * l3_land_fss 74 | * l3_orbit 75 | * l3_orbit_fss 76 | * l4_land 77 | * l4_orbit 78 | * l4_orbit_grnd 79 | * rosetta_dust 80 | * RDR_midas_fsc 81 | """ 82 | from pdr.pd_utils import insert_sample_types_into_df, compute_offsets 83 | from pdr.loaders.queries import read_table_structure 84 | fmtdef = read_table_structure( 85 | block, name, filename, data, identifiers 86 | ) 87 | for end in ("_PREFIX", "_SUFFIX", ""): 88 | length = block.get(f"ROW{end}_BYTES") 89 | if length is not None: 90 | fmtdef[f"ROW{end}_BYTES"] = length 91 | 92 | # to calculate end_byte correctly in compute_offsets() 93 | fmtdef["BYTES"] = fmtdef["ITEM_BYTES"] 94 | # to calculate pad_length correctly in compute_offsets() 95 | fmtdef["ROW_BYTES"] = fmtdef["ROW_BYTES"] + fmtdef["ROW_PREFIX_BYTES"] 96 | 97 | fmtdef = compute_offsets(fmtdef) 98 | return insert_sample_types_into_df(fmtdef, identifiers) 99 | -------------------------------------------------------------------------------- /pdr/formats/saturn_rpx.py: -------------------------------------------------------------------------------- 1 | def rpx_img_hdu_start_byte(name, hdulist): 2 | """ 3 | The multiple *_IMAGE pointers in these files all point at the same FITS 4 | HDU (each pointer illegally represents one band of the image). 5 | 6 | HITS 7 | * saturn_rpx 8 | * hst_raw_img 9 | * hst_raw_mask 10 | * hst_cal_img 11 | * hst_cal_mask 12 | * hst_eng_data 13 | * hst_eng_mask 14 | """ 15 | if 'HEADER' in name: 16 | return 0 17 | return hdulist.fileinfo(0)['datLoc'] 18 | -------------------------------------------------------------------------------- /pdr/formats/themis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | 4 | from dustgoggles.structures import listify 5 | 6 | from pdr.parselabel.pds3 import pointerize 7 | 8 | 9 | def get_visgeo_qube_offset(data): 10 | """""" 11 | return True, data.metaget_("^QUBE")[1] - 1 12 | 13 | 14 | def trivial_themis_geo_loader(pointer): 15 | """ 16 | HITS 17 | * themis 18 | * ir_GEO_v2 19 | * vis_GEO_v2 20 | """ 21 | warnings.warn(f"THEMIS {pointer} objects are not currently supported.") 22 | return True 23 | 24 | 25 | def check_gzip_fn(data, object_name): 26 | """ 27 | Some THEMIS QUBEs are stored in gzipped formats. The labels do not always 28 | bother to mention this. 29 | 30 | HITS 31 | * themis 32 | * BTR 33 | * ABR 34 | * PBT_v1 35 | * PBT_v2 36 | * ALB_v2 37 | * ir_GEO_v2 38 | * vis_GEO_v2 39 | * ir_EDR 40 | * vis_EDR 41 | * vis_RDR 42 | """ 43 | target = data.metaget(pointerize(object_name)) 44 | if isinstance(target, (dict, int)): 45 | return False, None 46 | filename = listify(target)[0] 47 | if filename.endswith("gz"): 48 | return filename 49 | return True, [filename, f"{filename}.gz"] 50 | 51 | 52 | def get_qube_offset(data): 53 | """ 54 | some THEMIS QUBEs mis-specify file records. 55 | 56 | HITS 57 | * themis 58 | * ir_GEO_v2 59 | * vis_GEO_v2 60 | """ 61 | if ( 62 | data.metaget_("FILE_RECORDS") 63 | >= os.stat(data.file_mapping["QUBE"]).st_size 64 | ): 65 | return True, data.metaget_("^QUBE")[-1] - 1 66 | return False, None 67 | -------------------------------------------------------------------------------- /pdr/formats/ulysses.py: -------------------------------------------------------------------------------- 1 | def gas_table_loader(filename, fmtdef_dt): 2 | """ 3 | GASDATA.FMT has the wrong START_BYTE for columns in the container. 4 | After manually changing the labels during testing, START_BYTE was still 5 | not incrementing correctly with each repetition of the container. 6 | This fixes both issues with 1 special case. 7 | 8 | HITS 9 | * ulysses 10 | * gas 11 | """ 12 | import pandas as pd 13 | fmtdef, dt = fmtdef_dt 14 | # Some tables use tabs as column deliminators, others use spaces. 15 | table = pd.read_csv(filename, skiprows=17, sep=r"\s+", header=None) 16 | assert len(table.columns) == len(fmtdef.NAME.tolist()) 17 | table.columns = fmtdef.NAME.tolist() 18 | return table 19 | 20 | 21 | def get_sample_type(base_samp_info): 22 | """ 23 | The bit column's data_type is BIT_STRING, which throws errors. Guessing 24 | this should be MSB_BIT_STRING. The tables look correct when compared to 25 | their ASCII versions. 26 | 27 | HITS 28 | * ulysses 29 | * epac_pha_bin 30 | """ 31 | from pdr.datatypes import sample_types 32 | sample_type = base_samp_info["SAMPLE_TYPE"] 33 | sample_bytes = base_samp_info["BYTES_PER_PIXEL"] 34 | 35 | if "BIT_STRING" == sample_type: 36 | sample_type = "MSB_BIT_STRING" 37 | return True, sample_types( 38 | sample_type, int(sample_bytes), for_numpy=True 39 | ) 40 | return False, None 41 | 42 | 43 | def get_special_block(data, name, identifiers): 44 | """ 45 | START_BYTE is wrong for repeated columns within the container. ITEM_BYTES 46 | is also off by 1. 47 | 48 | HITS 49 | * ulysses 50 | * epac_all_chan 51 | * epac_omni_ele 52 | * epac_omni_pro 53 | * epac_pha_asc 54 | * epac_pha_bin 55 | * epac_prtl 56 | * epac_pstl 57 | """ 58 | block = data.metablock_(name) 59 | if "ULY-J-EPAC-4-SUMM-PSTL" in identifiers["DATA_SET_ID"]: 60 | block["CONTAINER"]["COLUMN"]["ITEM_BYTES"] = 13 61 | block["CONTAINER"]["COLUMN"]["START_BYTE"] = 1 62 | elif "ULY-J-EPAC-4-SUMM-ALL-CHAN" in identifiers["DATA_SET_ID"]: 63 | block.getall('CONTAINER')[0]['COLUMN']['START_BYTE'] = 1 64 | block.getall('CONTAINER')[1]['CONTAINER']['START_BYTE'] = 1 65 | block.getall('CONTAINER')[1]['CONTAINER']['COLUMN']['START_BYTE'] = 1 66 | return block 67 | -------------------------------------------------------------------------------- /pdr/formats/vega.py: -------------------------------------------------------------------------------- 1 | def get_structure(block, name, filename, data, identifiers): 2 | """ 3 | "Encounter data" tables miscount the last column's START_BYTE by 1 4 | 5 | HITS 6 | * vega 7 | * ducma 8 | """ 9 | from pdr.loaders.queries import read_table_structure 10 | fmtdef = read_table_structure( 11 | block, name, filename, data, identifiers 12 | ) 13 | 14 | if "encounter data" in block['DESCRIPTION']: 15 | fmtdef.at[10, "START_BYTE"] = 62 16 | return fmtdef, None 17 | 18 | 19 | def fix_array_structure(name, block, fn, data, identifiers): 20 | """ 21 | HITS 22 | 23 | * giotto 24 | * pia 25 | * vega 26 | * puma_mode 27 | """ 28 | from pdr.datatypes import sample_types 29 | from pdr.loaders.queries import read_table_structure, \ 30 | check_array_for_subobject 31 | 32 | if not block.get("INTERCHANGE_FORMAT") == "BINARY": 33 | return None, None 34 | has_sub = check_array_for_subobject(block) 35 | if not has_sub: 36 | dt = sample_types(block["DATA_TYPE"], block["BYTES"], True) 37 | return None, dt 38 | fmtdef = read_table_structure(block, name, fn, data, identifiers) 39 | specbytes = block.get("COLLECTION").get("BYTES") 40 | specstart = fmtdef.loc[ 41 | fmtdef['NAME'] == 'PLACEHOLDER_SPECTRUM', "START_BYTE" 42 | ].iloc[0] 43 | fmtdef.loc[fmtdef['NAME'] == 'PLACEHOLDER_SPECTRUM', "AXIS_ITEMS"] = ( 44 | (specbytes - specstart + 1) 45 | / len(fmtdef.loc[fmtdef['BLOCK_NAME'].str.contains('SPECTRUM')]) 46 | ) 47 | # Sometimes arrays define start_byte, sometimes their elements do 48 | if "START_BYTE" in fmtdef.columns: 49 | fmtdef['START_BYTE'] = fmtdef['START_BYTE'].fillna(1) 50 | from pdr.pd_utils import compute_offsets, insert_sample_types_into_df 51 | 52 | return insert_sample_types_into_df(compute_offsets(fmtdef), identifiers) 53 | -------------------------------------------------------------------------------- /pdr/formats/viking.py: -------------------------------------------------------------------------------- 1 | def seis_table_loader(filepath, fmtdef_dt): 2 | """ 3 | The Viking 2 seismometer tables have mangled labels. The raw data tables 4 | are variable length CSVs, and labels for the summary tables count column 5 | bytes wrong. Half the labels define columns that do not match the data. 6 | 7 | HITS 8 | * viking 9 | * seis_raw 10 | * seis_summary 11 | """ 12 | import pandas as pd 13 | 14 | col_names = [c for c in fmtdef_dt[0].NAME if "PLACEHOLDER" not in c] 15 | filename = filepath.split("/")[-1] 16 | # The summary tables have miscounted bytes in their labels. The columns are 17 | # separated by whitespace, so can be read by read_csv() instead. Also, both 18 | # labels define a SEISMIC_TIME_SOLS column that doesn't exist in the data. 19 | if "summary" in filename.lower(): 20 | table = pd.read_csv(filepath, header=None, sep=r"\s+") 21 | col_names.remove("SEISMIC_TIME_SOLS") 22 | if "event_wind_summary" in filename.lower(): 23 | # event_wind_summary.tab has a column not included in the label. It 24 | # is listed in: https://pds-geosciences.wustl.edu/viking/vl2-m-seis-5-rdr-v1/vl_9020/document/vpds_event_winds_format.txt 25 | col_names.insert(7, "ORIGINAL_LINES_COUNT") 26 | # The raw event tables are variable-length CSVs. Their labels include a 27 | # SEISMIC_SOL column that doesn't exist in the data. 28 | elif "event" in filename.lower(): 29 | table = pd.read_csv(filepath, header=None, sep=",") 30 | col_names.remove("SEISMIC_SOL") 31 | # The raw high-rate tables are variable-length CSVs. Their labels list the 32 | # correct number of columns. 33 | elif "high" in filename.lower(): 34 | table = pd.read_csv(filepath, header=None, sep=",") 35 | else: 36 | raise ValueError("Unknown Viking 2 SEIS table format.") 37 | assert len(table.columns) == len(col_names), "mismatched column count" 38 | table.columns = col_names 39 | return table 40 | -------------------------------------------------------------------------------- /pdr/formats/voyager.py: -------------------------------------------------------------------------------- 1 | def mag_special_block(data, name): 2 | """ 3 | ROW_BYTES are listed as 144 in the labels for Uranus and Neptune MAG RDRs. 4 | Their tables look the same, but the Neptune products open wrong. Setting 5 | ROW_BYTES to 145 fixes it. 6 | 7 | HITS 8 | * vg_mag 9 | * rdr_nep 10 | """ 11 | block = data.metablock_(name) 12 | block["ROW_BYTES"] = 145 13 | return block 14 | 15 | 16 | def get_structure(block, name, filename, data, identifiers): 17 | """ 18 | The VGR_PLS_HR_2017.FMT for PLS 1-hour averages undercounts the last column 19 | by 1 byte. 20 | 21 | HITS 22 | * vg_pls 23 | * sys_1hr_avg (partial) 24 | """ 25 | from pdr.loaders.queries import read_table_structure 26 | fmtdef = read_table_structure( 27 | block, name, filename, data, identifiers 28 | ) 29 | fmtdef.at[8, "BYTES"] = 6 30 | return fmtdef, None 31 | 32 | 33 | def pls_avg_special_block(data, name): 34 | """ 35 | Because VGR_PLS_HR_2017.FMT undercounts by 1 byte, the products that 36 | reference it also undercount their ROW_BYTES by 1. 37 | 38 | HITS 39 | * vg_pls 40 | * sys_1hr_avg 41 | """ 42 | block = data.metablock_(name) 43 | if block["^STRUCTURE"] == "VGR_PLS_HR_2017.FMT": 44 | block["ROW_BYTES"] = 57 45 | return True, block 46 | return False, None 47 | 48 | 49 | def pls_fine_special_block(data, name): 50 | """ 51 | Most of the PLS FINE RES labels undercount the ROW_BYTES. The most recent 52 | product (2007-241_2018-309) is formatted differently and opens correctly. 53 | 54 | HITS 55 | * vg_pls 56 | * sys_fine_res 57 | """ 58 | block = data.metablock_(name) 59 | if block["ROW_BYTES"] == 57: 60 | block["ROW_BYTES"] = 64 61 | return True, block 62 | return False, None 63 | 64 | 65 | def pls_ionbr_special_block(data, name): 66 | """ 67 | SUMRY.LBL references the wrong format file 68 | 69 | HITS 70 | * vg_pls 71 | * ur_ionbr (partial) 72 | """ 73 | block = data.metablock_(name) 74 | block["^STRUCTURE"] = "SUMRY.FMT" 75 | return True, block 76 | 77 | 78 | def pra_special_block(data, name, identifiers): 79 | """ 80 | PRA Lowband RDRs: The Jupiter labels use the wrong START_BYTE for columns 81 | in containers. The Saturn/Uranus/Neptune labels define columns with 82 | multiple ITEMS, but ITEM_BYTES is missing and the BYTES value is wrong. 83 | 84 | HITS 85 | * vg_pra 86 | * lowband_jup 87 | * lowband_other 88 | """ 89 | block = data.metablock_(name) 90 | if identifiers["DATA_SET_ID"] in ( 91 | "VG2-S-PRA-3-RDR-LOWBAND-6SEC-V1.0", 92 | "VG2-N-PRA-3-RDR-LOWBAND-6SEC-V1.0", 93 | "VG2-U-PRA-3-RDR-LOWBAND-6SEC-V1.0" 94 | ): 95 | for item in iter(block.items()): 96 | if "COLUMN" in item and "SWEEP" in item[1]["NAME"]: 97 | item[1].add("ITEM_BYTES", 4) # The original BYTES value 98 | item[1]["BYTES"] = 284 # ITEM_BYTES * ITEMS 99 | elif identifiers["DATA_SET_ID"] == "VG2-J-PRA-3-RDR-LOWBAND-6SEC-V1.0": 100 | for item in iter(block["CONTAINER"].items()): 101 | if "COLUMN" in item: 102 | if item[1]["NAME"] == "STATUS_WORD": 103 | item[1]["START_BYTE"] = 1 104 | if item[1]["NAME"] == "DATA_CHANNELS": 105 | item[1]["START_BYTE"] = 5 106 | return True, block 107 | 108 | 109 | def lecp_table_loader(filename, fmtdef_dt): 110 | """ 111 | VG1 LECP Jupiter SUMM Sector tables reference a format file with incorrect 112 | START_BYTEs for columns within a CONTAINER. Columns are consistently 113 | separated by whitespace. 114 | The VG2 Uranus 12.8 minute step table (ascii version) was missing values 115 | from some rows, not sure why. Reusing this special case fixes it. 116 | 117 | HITS 118 | vg_lecp 119 | * j_summ_sector_vg1 120 | * u_rdr_step_12.8 (partial) 121 | """ 122 | import pandas as pd 123 | 124 | fmtdef, dt = fmtdef_dt 125 | table = pd.read_csv(filename, header=None, sep=r"\s+") 126 | 127 | col_names = [c for c in fmtdef_dt[0]['NAME'] if "PLACEHOLDER" not in c] 128 | assert len(table.columns) == len(col_names), "mismatched column count" 129 | table.columns = col_names 130 | return table 131 | 132 | 133 | def lecp_vg1_sat_table_loader(filename, fmtdef_dt): 134 | """ 135 | VG1 Saturn RDR step products have an extra header row partway through their 136 | tables. This special case skips those rows by treating them as comments. 137 | PDS volume affected: VG1-S-LECP-3-RDR-STEP-6MIN-V1.0 138 | 139 | HITS 140 | vg_lecp 141 | * s_rdr_step (partial) 142 | """ 143 | import pandas as pd 144 | 145 | fmtdef, dt = fmtdef_dt 146 | # Rows that start with "VOYAGER" are extra headers. "comment='V'" skips them 147 | table = pd.read_csv(filename, comment='V') 148 | 149 | col_names = [c for c in fmtdef_dt[0]['NAME'] if "PLACEHOLDER" not in c] 150 | assert len(table.columns) == len(col_names), "mismatched column count" 151 | table.columns = col_names 152 | return table 153 | -------------------------------------------------------------------------------- /pdr/loaders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/loaders/__init__.py -------------------------------------------------------------------------------- /pdr/loaders/_helpers.py: -------------------------------------------------------------------------------- 1 | """Simple utility functions for assorted loaders and queries.""" 2 | from __future__ import annotations 3 | from functools import wraps 4 | import os 5 | from pathlib import Path 6 | import re 7 | from typing import Any, Callable, Optional, Union, TYPE_CHECKING 8 | 9 | from cytoolz import curry 10 | from multidict import MultiDict 11 | 12 | if TYPE_CHECKING: 13 | from pdr.pdrtypes import DataIdentifiers, PhysicalTarget 14 | 15 | 16 | HETERODOX_ENDING = re.compile(r"\r\n?") 17 | """Pattern for heterodox but not deeply bizarre line endings.""" 18 | _cle = curry(re.sub, HETERODOX_ENDING, "\n") 19 | """partially evaluated replacer of heterodox with orthodox line endings.""" 20 | 21 | 22 | def looks_like_ascii(block: MultiDict, name: str) -> bool: 23 | """Is this probably an ASCII table?""" 24 | return ( 25 | ("SPREADSHEET" in name) 26 | or ("ASCII" in name) 27 | or (block.get("INTERCHANGE_FORMAT") == "ASCII") 28 | ) 29 | 30 | 31 | def quantity_start_byte( 32 | quantity_dict: dict[str, Union[str, int]], record_bytes: Optional[int] 33 | ) -> Optional[int]: 34 | """ 35 | Attempt to infer an object's start byte from a dict parsed from a PVL 36 | quantity object associated with a PVL pointer parameter, along with, if 37 | known, the size of a product's records (relevant only if the quantity 38 | units are not bytes). Returns None if we can't infer it (usually meaning 39 | that the label gives the start position in records but doesn't say how 40 | big the records are). 41 | """ 42 | # TODO: are there cases in which _these_ aren't 1-indexed? 43 | if quantity_dict["units"] == "BYTES": 44 | return quantity_dict["value"] - 1 45 | if record_bytes is not None: 46 | return record_bytes * max(quantity_dict["value"] - 1, 0) 47 | 48 | 49 | def count_from_bottom_of_file( 50 | fn: Union[str, list, Path], rows: int, row_bytes: int 51 | ) -> int: 52 | """ 53 | Fallback start-byte-finding function for cases in which a label gives 54 | the length of a table in terms of number of rows and row length, but does 55 | not specify where in the file the table _starts_. In these cases, the table 56 | usually goes to the end of the file, but may be preceded by a header or 57 | whatever, which means that we can often guess its start byte by subtracting 58 | the table size in bytes from the physical size of the file. This is not 59 | guaranteed to work! 60 | """ 61 | tab_size = rows * row_bytes 62 | if isinstance(fn, list): 63 | fn = fn[0] 64 | return os.path.getsize(Path(fn)) - tab_size 65 | 66 | 67 | def _check_delimiter_stream( 68 | identifiers: DataIdentifiers, 69 | name: str, 70 | target: PhysicalTarget, 71 | block: MultiDict, 72 | ) -> bool: 73 | """ 74 | Does it look like this object is a delimiter-separated table without an 75 | explicitly-defined row length? 76 | """ 77 | # TODO: this may be deprecated. assess against notionally-supported 78 | # products. 79 | if isinstance(target, dict): 80 | if target.get("units") == "BYTES": 81 | return False 82 | # TODO: untangle this, everywhere 83 | if isinstance(target, (list, tuple)): 84 | if isinstance(target[-1], dict): 85 | if target[-1].get("units") == "BYTES": 86 | return False 87 | # TODO: Other criteria that could appear in the block? 88 | if "BYTES" in block: 89 | return False 90 | # TODO: not sure this is a good assumption -- it is a bad assumption 91 | # for the CHEMIN RDRs, but those labels are just wrong 92 | if identifiers["RECORD_BYTES"] not in (None, ""): 93 | return False 94 | # TODO: not sure this is a good assumption 95 | if not identifiers["RECORD_TYPE"] == "STREAM": 96 | return False 97 | # Well-known object types that imply textuality, if we have nothing 98 | # else to go on 99 | if any(label in name for label in ("ASCII", "SPREADSHEET", "HEADER")): 100 | return True 101 | return False 102 | 103 | 104 | def check_explicit_delimiter(block: MultiDict) -> str: 105 | """ 106 | Check if an ASCII TABLE/SPREADSHEET definition explicitly gives a field 107 | delimiter. If it doesn't, tentatively assume it's comma-separated. 108 | """ 109 | if "FIELD_DELIMITER" in block.keys(): 110 | try: 111 | return { 112 | "COMMA": ",", 113 | "VERTICAL_BAR": "|", 114 | "SEMICOLON": ";", 115 | "TAB": "\t", 116 | }[block["FIELD_DELIMITER"]] 117 | except KeyError: 118 | raise KeyError("Unknown FIELD_DELIMITER character.") 119 | return "," 120 | 121 | 122 | def canonicalize_line_endings(text: Any) -> Any: 123 | """ 124 | Attempt to replace common 'heterodox' line endings in a string or 125 | list/tuple of strings with canonical endings (\n). Does not attempt to 126 | perform sophisticated delimiter sniffing, and will only reliably handle 127 | only \r and \r\n endings, not \n\r, EM / 0x19, \r\r\n, etc. 128 | Ignores (returns unchanged) non-strings and non-string elements of 129 | lists/tuples. 130 | """ 131 | if isinstance(text, str): 132 | return _cle(text) 133 | if isinstance(text, (list, tuple)): 134 | return type(text)([_cle(s) if isinstance(s, str) else s for s in text]) 135 | return text 136 | 137 | 138 | def canonicalized(func: Callable) -> Callable: 139 | """ 140 | Creates a version of `func` that canonicalizes line endings of any string 141 | (or top-level string elements of a list/tuple), returned by `func` 142 | """ 143 | 144 | @wraps(func) 145 | def with_canonical_endings(*args, **kwargs): 146 | return canonicalize_line_endings(func(*args, **kwargs)) 147 | 148 | return with_canonical_endings 149 | -------------------------------------------------------------------------------- /pdr/loaders/astrowrap.py: -------------------------------------------------------------------------------- 1 | try: 2 | from astropy.io import fits 3 | from astropy.io.fits import HDUList 4 | from astropy.io.fits.hdu import BinTableHDU 5 | except ImportError: 6 | raise ModuleNotFoundError( 7 | "Reading FITS files requires the optional astropy dependency." 8 | ) 9 | -------------------------------------------------------------------------------- /pdr/loaders/datawrap.py: -------------------------------------------------------------------------------- 1 | """Classes to wrap and manage complex data-loading workflows.""" 2 | from typing import Any 3 | 4 | from dustgoggles.dynamic import exc_report 5 | from dustgoggles.func import constant 6 | 7 | from pdr.formats import ( 8 | check_special_sample_type, 9 | check_special_qube_band_storage, 10 | check_special_position, 11 | check_special_structure, 12 | check_special_table_reader, 13 | check_special_fits_start_byte 14 | ) 15 | from pdr.func import get_argnames, softquery, specialize, call_kwargfiltered 16 | from pdr.parselabel.pds3 import depointerize 17 | from pdr.pdrtypes import LoaderFunction, PDRLike 18 | from pdr.loaders.queries import ( 19 | DEFAULT_DATA_QUERIES, 20 | get_identifiers, 21 | get_file_mapping, 22 | get_fits_start_byte, 23 | get_hdulist, 24 | get_target 25 | ) 26 | 27 | 28 | def _format_exc_report(exc: Exception) -> dict: 29 | """format an exception report for inclusion in another dict""" 30 | report = exc_report(exc) 31 | for k, v in tuple(report.items()): 32 | if k != 'exception': 33 | del report[k] 34 | report[f"exception_{k}"] = v 35 | return report 36 | 37 | 38 | class Loader: 39 | """ 40 | compact wrapper for loader functions, intended principally but not solely 41 | for library-internal use. provides a common interface, adds compactness, 42 | delays imports, etc. 43 | """ 44 | 45 | def __init__(self, loader_function: LoaderFunction): 46 | self.loader_function = loader_function 47 | self.argnames = get_argnames(loader_function) 48 | 49 | def __call__( 50 | self, pdrlike: PDRLike, name: str, **kwargs 51 | ) -> dict[str, Any]: 52 | kwargdict = {"data": pdrlike, "name": depointerize(name)} | kwargs 53 | kwargdict["tracker"].set_metadata(loader=self.__class__.__name__) 54 | record_exc = {"status": "query_ok"} 55 | try: 56 | info = softquery(self.loader_function, self.queries, kwargdict) 57 | except Exception as exc: 58 | record_exc = {"status": "query_failed"} | _format_exc_report(exc) 59 | raise exc 60 | finally: 61 | kwargdict["tracker"].track(self.loader_function, **record_exc) 62 | kwargdict["tracker"].dump() 63 | load_exc = {"status": "load_ok"} 64 | try: 65 | return {name: call_kwargfiltered(self.loader_function, **info)} 66 | except Exception as exc: 67 | load_exc = {"status": "load_failed"} | _format_exc_report(exc) 68 | raise exc 69 | finally: 70 | kwargdict["tracker"].track(self.loader_function, **load_exc) 71 | kwargdict["tracker"].dump() 72 | queries = DEFAULT_DATA_QUERIES 73 | 74 | 75 | class ReadImage(Loader): 76 | """wrapper for read_image""" 77 | 78 | def __init__(self): 79 | from pdr.loaders.image import read_image 80 | from pdr.loaders.queries import ( 81 | base_sample_info, 82 | im_sample_type, 83 | check_if_qube, 84 | get_qube_band_storage_type, 85 | generic_image_properties, 86 | ) 87 | 88 | super().__init__(read_image) 89 | self.queries = DEFAULT_DATA_QUERIES | { 90 | "base_samp_info": base_sample_info, 91 | "sample_type": specialize( 92 | im_sample_type, check_special_sample_type 93 | ), 94 | "band_storage_type": specialize( 95 | get_qube_band_storage_type, check_special_qube_band_storage 96 | ), 97 | "gen_props": specialize(generic_image_properties, check_if_qube), 98 | # just modifies gen_props in place, triggers transform in load step 99 | } 100 | 101 | 102 | class ReadTable(Loader): 103 | """wrapper for read_table""" 104 | 105 | def __init__(self): 106 | from pdr.loaders.queries import table_position, parse_table_structure 107 | from pdr.loaders.table import read_table 108 | 109 | super().__init__(specialize(read_table, check_special_table_reader)) 110 | self.queries = DEFAULT_DATA_QUERIES | { 111 | "table_props": specialize(table_position, check_special_position), 112 | "fmtdef_dt": specialize( 113 | parse_table_structure, check_special_structure 114 | ), 115 | } 116 | 117 | 118 | class ReadHeader(Loader): 119 | """wrapper for read_header""" 120 | 121 | def __init__(self): 122 | from pdr.loaders.text import read_header 123 | from pdr.loaders.queries import table_position 124 | 125 | super().__init__(read_header) 126 | self.queries = DEFAULT_DATA_QUERIES | { 127 | "table_props": specialize(table_position, check_special_position) 128 | } 129 | 130 | 131 | class ReadText(Loader): 132 | """wrapper for read_text""" 133 | 134 | def __init__(self): 135 | from pdr.loaders.text import read_text 136 | 137 | super().__init__(read_text) 138 | 139 | 140 | class ReadLabel(Loader): 141 | """wrapper for read_label""" 142 | 143 | def __init__(self): 144 | from pdr.loaders.text import read_label 145 | 146 | super().__init__(read_label) 147 | 148 | 149 | class ReadFits(Loader): 150 | """wrapper for handle_fits_file""" 151 | 152 | 153 | def __init__(self): 154 | from pdr.loaders.handlers import handle_fits_file 155 | 156 | # noinspection PyTypeChecker 157 | super().__init__(handle_fits_file) 158 | 159 | def __call__(self, pdrlike: PDRLike, name: str, **kwargs): 160 | # slightly hacky but works with how we've done dictionary construction 161 | return tuple(super().__call__(pdrlike, name, **kwargs).values())[0] 162 | 163 | queries = DEFAULT_DATA_QUERIES | { 164 | "fn": get_file_mapping, 165 | 'target': get_target, 166 | "identifiers": get_identifiers, 167 | 'hdulist': get_hdulist, 168 | "hdu_id": specialize( 169 | get_fits_start_byte, check_special_fits_start_byte 170 | ), 171 | 'hdu_id_is_index': constant(False) 172 | } 173 | 174 | 175 | class ReadCompressedImage(Loader): 176 | """wrapper for handle_compressed_image""" 177 | 178 | def __init__(self): 179 | from pdr.loaders.handlers import handle_compressed_image 180 | 181 | super().__init__(handle_compressed_image) 182 | 183 | 184 | class ReadArray(Loader): 185 | """wrapper for read_array""" 186 | 187 | def __init__(self): 188 | from pdr.loaders.table import read_array 189 | from pdr.loaders.queries import parse_array_structure 190 | 191 | super().__init__(read_array) 192 | self.queries = DEFAULT_DATA_QUERIES | { 193 | "fmtdef_dt": specialize( 194 | parse_array_structure, check_special_structure 195 | ), 196 | } 197 | 198 | 199 | class TBD(Loader): 200 | """wrapper for tbd""" 201 | 202 | def __init__(self): 203 | from pdr.loaders.utility import tbd 204 | 205 | super().__init__(tbd) 206 | 207 | 208 | class Trivial(Loader): 209 | """wrapper for trivial""" 210 | 211 | def __init__(self): 212 | from pdr.loaders.utility import trivial 213 | 214 | super().__init__(trivial) 215 | -------------------------------------------------------------------------------- /pdr/loaders/dispatch.py: -------------------------------------------------------------------------------- 1 | """Functions to select appropriate Loader subclasses for data objects.""" 2 | 3 | from __future__ import annotations 4 | 5 | import re 6 | from typing import Optional, TYPE_CHECKING 7 | 8 | from pdr.formats import check_trivial_case 9 | from pdr.loaders.utility import ( 10 | looks_like_this_kind_of_file, 11 | DESKTOP_IMAGE_EXTENSIONS, 12 | FITS_EXTENSIONS, 13 | IMAGE_EXTENSIONS, 14 | TABLE_EXTENSIONS, 15 | TEXT_EXTENSIONS, 16 | ) 17 | from pdr.loaders.datawrap import ( 18 | Loader, 19 | ReadArray, 20 | ReadCompressedImage, 21 | ReadFits, 22 | ReadHeader, 23 | ReadImage, 24 | ReadLabel, 25 | ReadTable, 26 | ReadText, 27 | TBD, 28 | Trivial 29 | ) 30 | 31 | if TYPE_CHECKING: 32 | from pdr import Data 33 | 34 | 35 | def image_lib_dispatch(pointer: str, data: Data) -> Optional[Loader]: 36 | """ 37 | check file extensions to see if we want to toss a file to an external 38 | library rather than using our internal raster handling. current cases are: 39 | pillow for tiff, gif, or jp2; astropy for fits 40 | """ 41 | object_filename = data._target_path(pointer) 42 | if looks_like_this_kind_of_file(object_filename, FITS_EXTENSIONS): 43 | return ReadFits() 44 | if looks_like_this_kind_of_file( 45 | object_filename, DESKTOP_IMAGE_EXTENSIONS 46 | ): 47 | return ReadCompressedImage() 48 | return None 49 | 50 | 51 | def pointer_to_loader(pointer: str, data: Data) -> Loader: 52 | """ 53 | Attempt to select an appropriate Loader subclass based on a PDS3 object 54 | name (and sometimes the file extension). 55 | 56 | The apparently-redundant sequence of conditionals is not in fact redundant; 57 | it is based on our knowledge of the most frequently used but sometimes 58 | redundant object names in the PDS3 corpus. 59 | """ 60 | if check_trivial_case(pointer, data.identifiers, data.filename): 61 | return Trivial() 62 | if pointer == "LABEL": 63 | return ReadLabel() 64 | if image_lib_dispatch(pointer, data) is not None: 65 | return image_lib_dispatch(pointer, data) 66 | if ( 67 | "TEXT" in pointer 68 | or "PDF" in pointer 69 | or "MAP_PROJECTION_CATALOG" in pointer 70 | ): 71 | return ReadText() 72 | if "DESC" in pointer: # probably points to a reference file 73 | return ReadText() 74 | if "ARRAY" in pointer: 75 | return ReadArray() 76 | table_words = ( 77 | "TABLE", "SPREADSHEET", "CONTAINER", "SERIES", "SPECTRUM", "HISTOGRAM" 78 | ) 79 | if ( 80 | any(val in pointer for val in table_words) 81 | and not any(val+"_HEADER" in pointer for val in table_words) 82 | and "HISTOGRAM_IMAGE" not in pointer 83 | ): 84 | return ReadTable() 85 | if "HEADER" in pointer: 86 | if looks_like_this_kind_of_file( 87 | data.file_mapping[pointer], FITS_EXTENSIONS 88 | ): 89 | return ReadFits() 90 | return ReadHeader() 91 | # I have moved this below "table" due to the presence of a number of 92 | # binary tables named things like "Image Time Table". If there are pictures 93 | # of tables, we will need to do something more sophisticated. 94 | if ( 95 | ("IMAGE" in pointer) 96 | or ("QUB" in pointer) 97 | or ("XDR_DOCUMENT" in pointer) 98 | ): 99 | return ReadImage() 100 | if "FILE_NAME" in pointer: 101 | return file_extension_to_loader(pointer) 102 | return TBD() 103 | 104 | 105 | def file_extension_to_loader(fn: str) -> Loader: 106 | """ 107 | Attempt to select the correct Loader subclass for an object based solely on 108 | its file extension. Used primarily for objects only specified by a PDS3 109 | FILE_NAME pointer or similar. 110 | """ 111 | if looks_like_this_kind_of_file(fn, FITS_EXTENSIONS): 112 | return ReadFits() 113 | if looks_like_this_kind_of_file(fn, IMAGE_EXTENSIONS): 114 | return ReadImage() 115 | if looks_like_this_kind_of_file(fn, TEXT_EXTENSIONS): 116 | return ReadText() 117 | if looks_like_this_kind_of_file(fn, TABLE_EXTENSIONS): 118 | return ReadTable() 119 | if looks_like_this_kind_of_file(fn, DESKTOP_IMAGE_EXTENSIONS): 120 | return ReadCompressedImage() 121 | return TBD() 122 | 123 | 124 | OBJECTS_TO_IGNORE = ( 125 | "DATA_SET_MAP_PROJECT.*", ".*_DESC$", ".*DESCRIPTION(_[0-9]*)?$" 126 | ) 127 | """ 128 | PDS3 objects we do not automatically load, even when loading greedily. 129 | These are reference files, usually throwaway ones, that are usually not 130 | archived in the same place as the data products and add little, if any, context 131 | to individual products (they are the same across an entire 'product type'). 132 | This means that in almost all cases, attempting to greedily load them has no 133 | purpose but to throw irrelevant warnings at the user. 134 | """ 135 | OBJECTS_IGNORED_BY_DEFAULT = re.compile("|".join(OBJECTS_TO_IGNORE)) 136 | -------------------------------------------------------------------------------- /pdr/loaders/text.py: -------------------------------------------------------------------------------- 1 | """Pointy-end functions for text-handling Loader subclasses.""" 2 | from io import TextIOWrapper 3 | from pathlib import Path 4 | from typing import Optional, Union 5 | import warnings 6 | 7 | from pdr.loaders._helpers import canonicalized 8 | from pdr.loaders.utility import looks_like_this_kind_of_file 9 | from pdr.parselabel.utils import trim_label 10 | from pdr.utils import check_cases, decompress 11 | 12 | 13 | def read_text(target: str, fn: Union[list[str], str]) -> Union[list[str], str]: 14 | """Read text from a file or list of files.""" 15 | try: 16 | if isinstance(fn, str): 17 | return ignore_if_pdf(check_cases(fn)) 18 | elif isinstance(fn, list): 19 | return [ 20 | ignore_if_pdf(check_cases(each_file)) 21 | for each_file in fn 22 | ] 23 | except FileNotFoundError or UnicodeDecodeError: 24 | warnings.warn(f"couldn't find {target}") 25 | raise 26 | 27 | 28 | def read_header( 29 | fn: Union[str, Path], 30 | table_props: dict, 31 | name: str = "HEADER" 32 | ) -> str: 33 | """Read a text header from a file.""" 34 | return skeptically_load_header(fn, table_props, name) 35 | 36 | 37 | @canonicalized 38 | def read_label( 39 | fn: Union[str, Path], 40 | fmt: Optional[str] = "text" 41 | ) -> Union[str, "PVLModule"]: 42 | """ 43 | Read the entirety of a PDS3 label, optionally using `pvl` to parse it as 44 | completely as possible into Python objects. This is not intended for use 45 | in the primary `pdr.Metadata` initialization workflow, but rather to 46 | handle cases when the user explicitly requests the entirety of the label 47 | (typically by accessing the "LABEL" key of a `pdr.Data` object). 48 | """ 49 | if fmt == "text": 50 | return trim_label(decompress(fn)) 51 | elif fmt == "pvl": 52 | import pvl 53 | 54 | return pvl.load(fn) 55 | raise NotImplementedError(f"The {fmt} format is not yet implemented.") 56 | 57 | 58 | @canonicalized 59 | def skeptically_load_header( 60 | fn: Union[Path, str], 61 | table_props: dict, 62 | name: str = "header", # TODO: what's with this default value? 63 | fmt: Optional[str] = "text", 64 | ) -> Union[str, "PVLModule", None]: 65 | """ 66 | Attempt to read a text HEADER object from a file. PDS3 does not give a 67 | strict definition of the HEADER object, so there is no way to 68 | _consistently_ load HEADERs in a coherent, well-formatted fashion. However, 69 | providers generally use HEADER to denote either attached file/product-level 70 | metadata, column headers for an ASCII table, or object-level 71 | contextualizing metadata for ASCII tables. 72 | 73 | By default, simply read the designated byte range as unicode text. If 74 | `fmt` is "pvl", also attempt to parse this text as PVL. (This will fail 75 | on most products, because most HEADER objects are not PVL, but is useful 76 | for some ancillary attached labels, especially ISIS labels.) 77 | 78 | NOTE: HEADERs defined in labels very often do not actually exist and are 79 | never essential for loading primary data objects, so this function is 80 | _always_ "optional", even in debug mode. If it fails, it will simply raise 81 | a UserWarning and return None. 82 | 83 | WARNING: this function is not intended to load metadata of standard file 84 | formats (such as TIFF tags or FITS headers). These headers should always 85 | be handled by a format-specific parser. More generally, it will never work 86 | on binary files. 87 | """ 88 | # TODO: all these check_cases calls are probably unnecessary w/new file 89 | # mapping workflow 90 | # FIXME: PVL mode ignores the table_props 91 | # FIXME: Character encoding should be controlled separately from as_rows 92 | try: 93 | if fmt == "pvl": 94 | try: 95 | from pdr.pvl_utils import cached_pvl_load 96 | 97 | return cached_pvl_load(decompress(check_cases(fn))) 98 | except ValueError: 99 | pass 100 | if table_props["as_rows"] is True: 101 | # In order to take advantage of Python's universal newline 102 | # handling, we need to decode the file and _then_ split it. 103 | # Tolerate encoding errors mainly because we might have a 104 | # textual header preceded or followed by binary data, and 105 | # the decoder is going to process more of the file than 106 | # the part we actually use. 107 | lines = [] 108 | start = table_props["start"] 109 | end = start + table_props["length"] 110 | with decompress(check_cases(fn)) as f: 111 | decoded_f = TextIOWrapper(f, encoding="UTF-8", errors="replace") 112 | for i, line in enumerate(decoded_f): 113 | if i >= end: 114 | break 115 | if i >= start: 116 | lines.append(line.replace("\n", "\r\n")) 117 | text = "".join(lines) 118 | else: 119 | with decompress(check_cases(fn)) as file: 120 | file.seek(table_props["start"]) 121 | text = file.read(min(table_props["length"], 80000)).decode( 122 | "ISO-8859-1" 123 | ) 124 | return text 125 | except (ValueError, OSError) as ex: 126 | warnings.warn(f"unable to parse {name}: {ex}") 127 | 128 | 129 | @canonicalized 130 | # TODO: misleading name. Primarily a file _reader_. 131 | def ignore_if_pdf(fn: Union[str, Path]) -> Optional[str]: 132 | """Read text from a file if it's not a pdf.""" 133 | if looks_like_this_kind_of_file(fn, [".pdf"]): 134 | warnings.warn(f"Cannot open {fn}; PDF files are not supported.") 135 | return 136 | # TODO: should use a context manager to avoid dangling file handles 137 | return open(check_cases(fn)).read() 138 | -------------------------------------------------------------------------------- /pdr/loaders/utility.py: -------------------------------------------------------------------------------- 1 | """Support objects for 'utility' Loader subclasses.""" 2 | 3 | from functools import partial 4 | from itertools import chain 5 | from operator import contains 6 | from pathlib import Path 7 | from typing import Collection 8 | import warnings 9 | 10 | from multidict import MultiDict 11 | 12 | 13 | # TODO, maybe: I think we should keep these somewhere else; they're certainly 14 | # not used exclusively in loaders 15 | 16 | LABEL_EXTENSIONS = (".xml", ".lbl") 17 | IMAGE_EXTENSIONS = (".img", ".rgb") 18 | TABLE_EXTENSIONS = (".tab", ".csv") 19 | TEXT_EXTENSIONS = (".txt", ".md") 20 | FITS_EXTENSIONS = (".fits", ".fit", ".fits.gz", ".fit.gz", ".fz") 21 | BMP_EXTENSIONS = (".bmp",) 22 | TIFF_EXTENSIONS = (".tif", ".tiff") 23 | JP2_EXTENSIONS = (".jp2", ".jpf", ".jpc", ".jpx") 24 | GIF_EXTENSIONS = (".gif",) 25 | JPEG_EXTENSIONS = (".jpg", ".jpeg") # NOTE: Also extensions like MPO/MPF 26 | PNG_EXTENSIONS = (".png",) 27 | WEBP_EXTENSIONS = (".webp",) 28 | DESKTOP_IMAGE_EXTENSION_SETS = ( 29 | BMP_EXTENSIONS, 30 | TIFF_EXTENSIONS, 31 | JP2_EXTENSIONS, 32 | JPEG_EXTENSIONS, 33 | GIF_EXTENSIONS, 34 | PNG_EXTENSIONS, 35 | WEBP_EXTENSIONS 36 | ) 37 | # NOTE: these are codes from pillow. some sources may call them other things. 38 | DESKTOP_IMAGE_STANDARDS = ( 39 | "JPEG2000", 40 | "JPEG", 41 | "MPO", 42 | "WEBP", 43 | "PNG", 44 | "TIFF", 45 | "GIF", 46 | "BMP" 47 | ) 48 | DESKTOP_IMAGE_EXTENSIONS = tuple(chain(*DESKTOP_IMAGE_EXTENSION_SETS)) 49 | 50 | 51 | def trivial(*_, **__): 52 | """ 53 | This is a trivial loader. It does not load. The purpose is to use 54 | for any pointers we don't want to load and instead simply want ignored. 55 | """ 56 | pass 57 | 58 | 59 | def tbd(name: str, block: MultiDict, *_, **__): 60 | """ 61 | This is a placeholder function for objects that are not explicitly 62 | supported elsewhere. It throws a warning and 63 | passes just the value of the pointer. 64 | """ 65 | warnings.warn(f"The {name} pointer is not yet fully supported.") 66 | return block 67 | 68 | 69 | def looks_like_this_kind_of_file( 70 | filename: str, kind_extensions: Collection[str] 71 | ) -> bool: 72 | """Does this file have any of these extensions?""" 73 | is_this_kind_of_extension = partial(contains, kind_extensions) 74 | return any(map(is_this_kind_of_extension, Path(filename.lower()).suffixes)) 75 | 76 | 77 | def is_trivial(pointer: str) -> bool: 78 | """ 79 | Returns True if this is the name of a data object we want to handle 80 | trivally, in the sense that we never ever want to load it directly. 81 | """ 82 | # TIFF tags / headers should always be parsed by the TIFF parser itself 83 | if ( 84 | ("TIFF" in pointer) 85 | and ("IMAGE" not in pointer) 86 | and ("DOCUMENT" not in pointer) 87 | ): 88 | return True 89 | # we don't present STRUCTURES separately from their tables 90 | if "STRUCTURE" in pointer: 91 | return True 92 | # only in MSL CCAM products; probably for internal processing pipelines 93 | if "PDS_OBJECT" in pointer: 94 | return True 95 | return False 96 | -------------------------------------------------------------------------------- /pdr/np_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Methods for working with numpy objects, primarily intended as components of 3 | pdr's image- and table-loading routines. 4 | """ 5 | from bz2 import BZ2File 6 | from gzip import GzipFile 7 | from io import BufferedIOBase, BytesIO 8 | from numbers import Number 9 | from typing import Optional, Union 10 | from zipfile import ZipFile 11 | 12 | import numpy as np 13 | 14 | 15 | def enforce_order_and_object(array: np.ndarray, inplace=True) -> np.ndarray: 16 | """ 17 | Make an ndarray compatible for use with pandas or other similarly-strict 18 | interfaces. Determine which, if any, of the array's fields are in nonnative 19 | byteorder and swap them; also convert any void dtypes to object. 20 | """ 21 | # NOTE: doing the void conversion in this function is inelegant but 22 | # somewhat efficient. 23 | # TODO: or is it? benchmark. 24 | if inplace is False: 25 | array = array.copy() 26 | if len(array.dtype) < 2: 27 | if len(array.dtype) == 0: 28 | dtype = array.dtype 29 | void_return = array 30 | else: 31 | dtype = array.dtype[0] 32 | # if we don't slice the field out explicitly, numpy will transform 33 | # it into an array of tuples 34 | void_return = array[tuple(array.dtype.fields.keys())[0]] 35 | if "V" in str(dtype): 36 | return void_return.astype("O") 37 | if dtype.isnative: 38 | return array 39 | return array.byteswap().view(array.dtype.newbyteorder("=")) 40 | swap_targets = [] 41 | swapped_dtype = [] 42 | for name, field in array.dtype.fields.items(): 43 | if field[0].isnative is False: 44 | swap_targets.append(name) 45 | swapped_dtype.append((name, field[0].newbyteorder("="))) 46 | elif "V" not in str(field[0]): 47 | swapped_dtype.append((name, field[0])) 48 | else: 49 | swapped_dtype.append((name, "O")) 50 | # TODO: this may work unreliably for small integer types 51 | return np.array(array, dtype=swapped_dtype) 52 | 53 | 54 | def casting_to_float(array: np.ndarray, *operands: Number) -> bool: 55 | """ 56 | check: will this operation cast the array to float? 57 | return True if array is integer-valued and any operands are not integers. 58 | """ 59 | return (array.dtype.char in np.typecodes["AllInteger"]) and not all( 60 | [isinstance(operand, int) for operand in operands] 61 | ) 62 | 63 | 64 | # TODO: shake this out with a bunch of different compression type examples, 65 | # including specific compressions on band/line/single-plane/etc. images, 66 | # compressed binary tables, etc. 67 | # TODO: I'm not sure if the above TODO is still relevant. 68 | def np_from_buffered_io( 69 | buffered_io: BufferedIOBase, 70 | dtype: Union[np.dtype, str], 71 | offset: Optional[int] = None, 72 | count: Optional[int] = None, 73 | ) -> np.ndarray: 74 | """ 75 | Read a 1D numpy array of the specified dtype, size, and offset from a 76 | buffered IO object. 77 | """ 78 | if offset is not None: 79 | buffered_io.seek(offset) 80 | if isinstance(buffered_io, (BZ2File, ZipFile, GzipFile, BytesIO)): 81 | # we need to read the appropriate amount into a new buffer, especially 82 | # if it's monolithically compressed 83 | n_bytes = None if count is None else count * dtype.itemsize 84 | stream = BytesIO(buffered_io.read(n_bytes)) 85 | return np.frombuffer(stream.getbuffer(), dtype=dtype) 86 | count = -1 if count is None else count 87 | # In this case, buffered_io is just an open file stream 88 | return np.fromfile(buffered_io, dtype=dtype, count=count) 89 | 90 | 91 | def make_c_contiguous(arr: np.ndarray) -> np.ndarray: 92 | """ 93 | If an ndarray isn't C-contiguous, reorder it as C-contiguous. If it is, 94 | don't mess with it. 95 | """ 96 | if arr.flags["C_CONTIGUOUS"] is False: 97 | return np.ascontiguousarray(arr) 98 | return arr 99 | 100 | 101 | # TODO: really all arguments but ibm/sreg are redundant for basic S/360 formats 102 | def ibm_to_np(ibm: np.ndarray, sreg: int, ereg: int, mmask: int) -> np.ndarray: 103 | """ 104 | Convert an array composed of IBM System 360-style floats (expressed as 105 | 4- or 8-byte unsigned integers, as appropriate for byte width) to numpy 106 | float64. 107 | """ 108 | # dtype conversion: this field must be signed 109 | ibm_sign = (ibm >> sreg & 0x01).astype('int8') 110 | # dtype conversion: largest values possible will overfloat int64 or float32 111 | ibm_exponent = (ibm >> ereg & 0x7f).astype('float64') 112 | ibm_mantissa = ibm & mmask 113 | mantissa = ibm_mantissa / (2 ** ereg) 114 | exponent = 16 ** (ibm_exponent - 64) 115 | sign = 1 - (2 * ibm_sign).astype('int8') 116 | return sign * mantissa * exponent 117 | 118 | 119 | def ibm32_to_np_f32(ibm): 120 | """ 121 | Convert an array of IBM System 360-style 32-bit floats (expressed as 32-bit 122 | unsigned integers) to numpy float64. 123 | """ 124 | return ibm_to_np(ibm, 31, 24, 0x00ffffff) 125 | 126 | 127 | def ibm64_to_np_f64(ibm): 128 | """ 129 | Convert an array of IBM System 360-style 64-bit floats (expressed as 64-bit 130 | unsigned integers) to numpy float64. 131 | """ 132 | return ibm_to_np(ibm, 63, 56, 0x00ffffffffffffff) 133 | -------------------------------------------------------------------------------- /pdr/parselabel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/parselabel/__init__.py -------------------------------------------------------------------------------- /pdr/parselabel/pds4.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple utilities for preprocessing pds4_tools-produced label objects for the 3 | pdr.Metadata constructor. 4 | """ 5 | from collections import OrderedDict 6 | from typing import Mapping, TYPE_CHECKING 7 | 8 | from dustgoggles.func import constant 9 | from dustgoggles.structures import dig_for_keys 10 | from multidict import MultiDict 11 | 12 | 13 | if TYPE_CHECKING: 14 | from pdr.pds4_tools.reader.label_objects import Label 15 | 16 | 17 | def unpack_to_multidict( 18 | packed: Mapping, mtypes: tuple[type, ...] = (dict,) 19 | ) -> MultiDict: 20 | """ 21 | Recursively unpack any Mapping into a MultiDict. Unpacks all list or tuple 22 | values at any level into multiple keys at that level. This is an unusual- 23 | sounding behavior but is generally appropriate for PDS4 labels, and 24 | specifically for the pds4_tools representation of XML labels. PDS4 types 25 | with cardinality > 1 always (?) represent multiple distinct entities / 26 | properties rather than an array of properties. The list can also always be 27 | retrieved from the resulting multidict with `MultiDict.get_all()`. 28 | 29 | Example: 30 | ``` 31 | >>> unpack_to_multidict({'a': 1, 'b': [{'c': 2}, 3]}) 32 | , 'b': 3)> 33 | ``` 34 | """ 35 | unpacked, items = MultiDict(), list(reversed(packed.items())) 36 | while len(items) > 0: 37 | k, v = items.pop() 38 | if isinstance(v, (list, tuple)): 39 | items += [(k, e) for e in reversed(v)] 40 | elif isinstance(v, mtypes): 41 | unpacked.add(k, unpack_to_multidict(v, mtypes)) 42 | else: 43 | unpacked.add(k, v) 44 | return unpacked 45 | 46 | 47 | # noinspection PyTypeChecker 48 | def reformat_pds4_tools_label(label: "Label") -> tuple[MultiDict, list[str]]: 49 | """ 50 | Convert a pds4_tools Label object into a MultiDict and a list of parameters 51 | suitable for constructing a pdr.Metadata object. This is not just a type 52 | conversion; it also rearranges some nested data structures (in particular, 53 | repeated child elements become multiple keys of a MultiDict rather than 54 | a list of OrderedDicts). 55 | """ 56 | unpacked = unpack_to_multidict(label.to_dict(), (OrderedDict, MultiDict)) 57 | # collect all keys to populate pdr.Metadata's fieldcounts attribute 58 | params = dig_for_keys( 59 | unpacked, None, base_pred=constant(True), mtypes=(MultiDict,) 60 | ) 61 | return unpacked, params 62 | -------------------------------------------------------------------------------- /pdr/parselabel/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import re 3 | from typing import Union, IO 4 | 5 | 6 | KNOWN_LABEL_ENDINGS = ( 7 | re.compile(b"\nEND {0,2}(\r| {8})"), # common PVL convention 8 | re.compile(b"\x00{3}"), # just null bytes, for odder cases 9 | ) 10 | """ 11 | Fast regex patterns for generic PVL label endings. They work for almost all PVL 12 | labels in the PDS. 13 | """ 14 | 15 | DEFAULT_PVL_LIMIT = 1000 * 1024 16 | """heuristic for max label size. we know it's not a real rule.""" 17 | 18 | 19 | class InvalidAttachedLabel(ValueError): 20 | pass 21 | 22 | 23 | def _scan_to_end_of_label( 24 | buf: IO, max_size: int, text: bytes, raise_no_ending: bool 25 | ): 26 | """Subroutine of trim_label()""" 27 | length = 0 28 | while length < max_size: 29 | if (chunk := buf.read(50 * 1024)) == b'': 30 | break 31 | for ending in KNOWN_LABEL_ENDINGS: 32 | if (endmatch := re.search(ending, text[:-15] + chunk)) is not None: 33 | return text + chunk[: endmatch.span()[1]] 34 | text, length = text + chunk, length + 50 * 1024 35 | if raise_no_ending is True: 36 | raise InvalidAttachedLabel("Couldn't find a label ending.") 37 | return text 38 | 39 | 40 | def trim_label( 41 | fn: Union[IO, Path, str], 42 | max_size: int = DEFAULT_PVL_LIMIT, 43 | strict_decode: bool = True, 44 | raise_no_ending: bool = False 45 | ) -> str: 46 | """Look for a PVL label at the top of a file.""" 47 | target_is_fn = isinstance(fn, (Path, str)) 48 | try: 49 | if target_is_fn is True: 50 | fn = open(fn, 'rb') 51 | text = fn.read(20) 52 | if strict_decode is True: 53 | try: 54 | text.decode('ascii') 55 | except UnicodeDecodeError: 56 | raise InvalidAttachedLabel("File head appears to be binary.") 57 | text = _scan_to_end_of_label(fn, max_size, text, raise_no_ending) 58 | finally: 59 | if target_is_fn is True: 60 | fn.close() 61 | policy = "strict" if strict_decode is True else "replace" 62 | try: 63 | return text.decode("utf-8", errors=policy) 64 | except UnicodeDecodeError: 65 | raise InvalidAttachedLabel("Invalid characters in label.") 66 | -------------------------------------------------------------------------------- /pdr/pdrtypes.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import ( 4 | Callable, Literal, Optional, TypedDict, TYPE_CHECKING, Union 5 | ) 6 | # TypeAlias is new in 3.10 7 | # this is exactly how it's defined in python3.11/typing.py 8 | try: 9 | from typing import TypeAlias 10 | except ImportError: 11 | def TypeAlias(self, parameters): 12 | raise TypeError(f"{self} is not subscriptable") 13 | 14 | 15 | if TYPE_CHECKING: 16 | from multidict import MultiDict 17 | import numpy as np 18 | import pandas as pd 19 | from pdr import Data, Metadata 20 | 21 | ByteOrder: TypeAlias = Literal["<", ">"] 22 | """Most significant/least significant byteorder codes""" 23 | 24 | PDRLike: TypeAlias = Union["Data", "Metadata"] 25 | """Something with a pdr-style metadata-getting interface""" 26 | 27 | LoaderFunction: TypeAlias = Callable[ 28 | ..., Union[str, "MultiDict", "pd.DataFrame", "np.ndarray"] 29 | ] 30 | """Signature of a Loader's load function""" 31 | 32 | PhysicalTarget: TypeAlias = Union[ 33 | list[str, int], tuple[str, int], int, str, dict[str, Union[str, int]] 34 | ] 35 | """Expected formats of 'pointer' parameters, i.e. ^WHATEVER = PhysicalTarget""" 36 | 37 | BandStorageType: TypeAlias = Literal[ 38 | "BAND_SEQUENTIAL", "LINE_INTERLEAVED", "SAMPLE_INTERLEAVED", None 39 | ] 40 | """ 41 | Codes for physical storage layout of 3-D arrays. Also known as BSQ/band 42 | sequential, BIL/band interleaved by line, BIP/band interleaved by pixel. 43 | None implies either that the storage layout is unknown or that the array is 44 | not 3-D. 45 | """ 46 | 47 | Axname: TypeAlias = Literal["BAND", "LINE", "SAMPLE"] 48 | """Conventional names for image axes.""" 49 | 50 | 51 | class ImageProps(TypedDict): 52 | """Standard image properties dict used in image-processing workflows.""" 53 | # Number of bytes per pixel (eventually redundant with sample_type but 54 | # populated much earlier) 55 | BYTES_PER_PIXEL: Literal[1, 2, 4, 8] 56 | # Do the elements of the array, when loaded, represent VAX reals? 57 | is_vax_real: bool 58 | # numpy dtype string 59 | sample_type: str 60 | # total number of elements 61 | pixels: int 62 | # number of elements along each dimension 63 | nrows: int 64 | ncols: int 65 | nbands: int 66 | # physical storage layout of 3D arrays (None for 2D arrays) 67 | band_storage_type: BandStorageType 68 | # total row/column/band pad elements due to ISIS-style axplanes 69 | rowpad: int 70 | colpad: int 71 | bandpad: int 72 | # number of pad elements for left/right sideplanes 73 | prefix_rows: Optional[int] 74 | suffix_rows: Optional[int] 75 | # number of pad elements for bottom/topplanes 76 | prefix_cols: Optional[int] 77 | suffix_cols: Optional[int] 78 | # number of pad elements for front/backplanes 79 | prefix_bands: Optional[int] 80 | suffix_bands: Optional[int] 81 | # total pad elements due to line prefixes/suffixes 82 | linepad: int 83 | # number of elements in line prefix and suffix 84 | line_prefix_pix: Optional[int] 85 | line_suffix_pix: Optional[int] 86 | # Order of axes expressed as a tuple of axis names, only used by ISIS qubes 87 | axnames: Optional[tuple[Axname]] 88 | 89 | 90 | class DataIdentifiers(TypedDict): 91 | """ 92 | Standard PDS3 'identifiers' Data checks its Metadata for on initialization 93 | (if it's made from a PDS3 product). Used primarily to make special case 94 | checks more compact. These are taken directly from the label, then 95 | stringified if they're sets or tuples. All keys are always present, but 96 | may be None if a parameter's not actually in the label. 97 | """ 98 | DATA_SET_ID: Union[str, None] 99 | DATA_SET_NAME: Union[str, None] 100 | FILE_NAME: Union[str, None] 101 | FILE_RECORDS: Union[int, None] 102 | INSTRUMENT_ID: Union[str, None] 103 | INSTRUMENT_HOST_NAME: Union[str, None] 104 | INSTRUMENT_NAME: Union[str, None] 105 | LABEL_RECORDS: Union[int, None] 106 | NOTE: Union[str, None] 107 | PRODUCT_ID: Union[str, None] 108 | PRODUCT_TYPE: Union[str, None] 109 | RECORD_BYTES: Union[int, None] 110 | RECORD_TYPE: Union[str, None] 111 | ROW_BYTES: Union[int, None] 112 | ROWS: Union[int, None] 113 | SPACECRAFT_NAME: Union[str, None] 114 | STANDARD_DATA_PRODUCT_ID: Union[str, None] 115 | -------------------------------------------------------------------------------- /pdr/pds4_tools/CREDITS: -------------------------------------------------------------------------------- 1 | Authors: 2 | 3 | Lev Nagdimunov 4 | 5 | Contributors: 6 | 7 | Tilden Barnes 8 | Michael S. P. Kelley 9 | Matthew Knight 10 | Mark Bentley 11 | 12 | Special Thanks: 13 | 14 | Michael F. A'Hearn 15 | Ludmilla Kolokolova 16 | Anne Raugh 17 | James M. Bauer 18 | 19 | Other Credits: 20 | 21 | NASA PDS-Small Bodies Node for funding this project. 22 | PyInstaller and Py2App for binary packaging. 23 | SAOImage DS9 and fv FITS Viewer for inspiration. 24 | -------------------------------------------------------------------------------- /pdr/pds4_tools/__about__.py: -------------------------------------------------------------------------------- 1 | __author__ = "Lev Nagdimunov" 2 | __copyright__ = "2015 - 2021, University of Maryland" 3 | 4 | __version__ = "1.4.dev0" 5 | __email__ = "lnagdi1@astro.umd.edu" 6 | -------------------------------------------------------------------------------- /pdr/pds4_tools/__init__.py: -------------------------------------------------------------------------------- 1 | from pdr.pds4_tools.__about__ import (__version__, __author__, __email__, __copyright__) 2 | 3 | from .reader import pds4_read 4 | from .reader import pds4_read as read 5 | 6 | from .utils.logging import set_loglevel 7 | 8 | try: 9 | from .viewer import pds4_viewer 10 | from .viewer import pds4_viewer as view 11 | except ImportError as e: 12 | 13 | def _missing_optional_deps(exception, *args, **kwargs): 14 | raise exception 15 | 16 | import functools as _functools 17 | pds4_viewer = view = _functools.partial(_missing_optional_deps, e) 18 | -------------------------------------------------------------------------------- /pdr/pds4_tools/extern/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/pds4_tools/extern/__init__.py -------------------------------------------------------------------------------- /pdr/pds4_tools/extern/cached_property.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015, Daniel Greenfeld 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # * Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # * Redistributions in binary form must reproduce the above 12 | # copyright notice, this list of conditions and the following 13 | # disclaimer in the documentation and/or other materials provided 14 | # with the distribution. 15 | # 16 | # * Neither the name of cached-property nor the names of its 17 | # contributors may be used to endorse or promote products derived 18 | # from this software without specific prior written permission. 19 | # 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 30 | # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | # POSSIBILITY OF SUCH DAMAGE. 32 | 33 | # -*- coding: utf-8 -*- 34 | 35 | __author__ = 'Daniel Greenfeld' 36 | __email__ = 'pydanny@gmail.com' 37 | __version__ = '1.3.0' 38 | __license__ = 'BSD' 39 | 40 | from time import time 41 | import threading 42 | 43 | 44 | class cached_property(object): 45 | """ 46 | A property that is only computed once per instance and then replaces itself 47 | with an ordinary attribute. Deleting the attribute resets the property. 48 | 49 | Source: https://github.com/bottlepy/bottle/commit/fa7733e075da0d790d809aa3d2f53071897e6f76 50 | """ # noqa 51 | 52 | def __init__(self, func): 53 | self.__doc__ = getattr(func, '__doc__') 54 | self.func = func 55 | 56 | def __get__(self, obj, cls): 57 | if obj is None: 58 | return self 59 | value = obj.__dict__[self.func.__name__] = self.func(obj) 60 | return value 61 | 62 | 63 | class threaded_cached_property(object): 64 | """ 65 | A cached_property version for use in environments where multiple threads 66 | might concurrently try to access the property. 67 | """ 68 | 69 | def __init__(self, func): 70 | self.__doc__ = getattr(func, '__doc__') 71 | self.func = func 72 | self.lock = threading.RLock() 73 | 74 | def __get__(self, obj, cls): 75 | if obj is None: 76 | return self 77 | 78 | obj_dict = obj.__dict__ 79 | name = self.func.__name__ 80 | with self.lock: 81 | try: 82 | # check if the value was computed before the lock was acquired 83 | return obj_dict[name] 84 | except KeyError: 85 | # if not, do the calculation and release the lock 86 | return obj_dict.setdefault(name, self.func(obj)) -------------------------------------------------------------------------------- /pdr/pds4_tools/extern/zscale.py: -------------------------------------------------------------------------------- 1 | # This file is part of the NumDisplay tool available at the following URL: 2 | # http://stsdas.stsci.edu/numdisplay/ 3 | # 4 | # Copyright (C) 2005 Association of Universities for Research in Astronomy (AURA) 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions are met: 8 | # 9 | # 1. Redistributions of source code must retain the above copyright 10 | # notice, this list of conditions and the following disclaimer. 11 | # 12 | # 2. Redistributions in binary form must reproduce the above 13 | # copyright notice, this list of conditions and the following 14 | # disclaimer in the documentation and/or other materials provided 15 | # with the distribution. 16 | # 17 | # 3. The name of AURA and its representatives may not be used to 18 | # endorse or promote products derived from this software without 19 | # specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY AURA ``AS IS'' AND ANY EXPRESS OR IMPLIED 22 | # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 23 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | # DISCLAIMED. IN NO EVENT SHALL AURA BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | # OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 28 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 29 | # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 30 | # USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 31 | # DAMAGE. 32 | 33 | from __future__ import division # confidence high 34 | 35 | import math 36 | import numpy 37 | 38 | MAX_REJECT = 0.5 39 | MIN_NPIXELS = 5 40 | GOOD_PIXEL = 0 41 | BAD_PIXEL = 1 42 | KREJ = 2.5 43 | MAX_ITERATIONS = 5 44 | 45 | def zscale (image, nsamples=1000, contrast=0.25, bpmask=None, zmask=None): 46 | """Implement IRAF zscale algorithm 47 | 48 | Parameters 49 | ---------- 50 | image : arr 51 | 2-d numpy array 52 | 53 | nsamples : int (Default: 1000) 54 | Number of points in array to sample for determining scaling factors 55 | 56 | contrast : float (Default: 0.25) 57 | Scaling factor for determining min and max. Larger values increase the 58 | difference between min and max values used for display. 59 | 60 | bpmask : None 61 | Not used at this time 62 | 63 | zmask : None 64 | Not used at this time 65 | 66 | Returns 67 | ------- 68 | (z1, z2) 69 | """ 70 | 71 | # Sample the image 72 | samples = zsc_sample (image, nsamples, bpmask, zmask) 73 | npix = len(samples) 74 | samples.sort() 75 | zmin = samples[0] 76 | zmax = samples[-1] 77 | # For a zero-indexed array 78 | center_pixel = (npix - 1) // 2 79 | if npix%2 == 1: 80 | median = samples[center_pixel] 81 | else: 82 | median = 0.5 * (samples[center_pixel] + samples[center_pixel + 1]) 83 | 84 | # 85 | # Fit a line to the sorted array of samples 86 | minpix = max(MIN_NPIXELS, int(npix * MAX_REJECT)) 87 | ngrow = max (1, int (npix * 0.01)) 88 | ngoodpix, zstart, zslope = zsc_fit_line (samples, npix, KREJ, ngrow, 89 | MAX_ITERATIONS) 90 | 91 | if ngoodpix < minpix: 92 | z1 = zmin 93 | z2 = zmax 94 | else: 95 | if contrast > 0: zslope = zslope / contrast 96 | z1 = max (zmin, median - (center_pixel - 1) * zslope) 97 | z2 = min (zmax, median + (npix - center_pixel) * zslope) 98 | return z1, z2 99 | 100 | 101 | def zsc_sample (image, maxpix, bpmask=None, zmask=None): 102 | 103 | # Figure out which pixels to use for the zscale algorithm 104 | # Returns the 1-d array samples 105 | # Don't worry about the bad pixel mask or zmask for the moment 106 | # Sample in a square grid, and return the first maxpix in the sample 107 | nc = image.shape[0] 108 | nl = image.shape[1] 109 | stride = max (1.0, math.sqrt((nc - 1) * (nl - 1) / float(maxpix))) 110 | stride = int (stride) 111 | samples = image[::stride,::stride].flatten() 112 | 113 | # Remove invalid values for masked arrays 114 | if isinstance(samples, numpy.ma.MaskedArray): 115 | samples = samples.compressed() 116 | 117 | # Remove invalid values from ndarrays 118 | elif isinstance(samples, numpy.ndarray): 119 | samples = samples[numpy.isfinite(samples)] 120 | 121 | return samples[:maxpix] 122 | 123 | 124 | def zsc_fit_line (samples, npix, krej, ngrow, maxiter): 125 | 126 | # 127 | # First re-map indices from -1.0 to 1.0 128 | xscale = 2.0 / (npix - 1) 129 | xnorm = numpy.arange(npix) 130 | xnorm = xnorm * xscale - 1.0 131 | 132 | ngoodpix = npix 133 | minpix = max (MIN_NPIXELS, int (npix*MAX_REJECT)) 134 | last_ngoodpix = npix + 1 135 | 136 | intercept = 0 137 | slope = 0 138 | 139 | # This is the mask used in k-sigma clipping. 0 is good, 1 is bad 140 | badpix = numpy.zeros(npix, dtype="int32") 141 | 142 | # Iterate 143 | for niter in range(maxiter): 144 | 145 | if (ngoodpix >= last_ngoodpix) or (ngoodpix < minpix): 146 | break 147 | 148 | # Accumulate sums to calculate straight line fit 149 | goodpixels = numpy.where(badpix == GOOD_PIXEL) 150 | sumx = xnorm[goodpixels].sum() 151 | sumxx = (xnorm[goodpixels]*xnorm[goodpixels]).sum() 152 | sumxy = (xnorm[goodpixels]*samples[goodpixels]).sum() 153 | sumy = samples[goodpixels].sum() 154 | sum = len(goodpixels[0]) 155 | 156 | delta = sum * sumxx - sumx * sumx 157 | # Slope and intercept 158 | intercept = (sumxx * sumy - sumx * sumxy) / delta 159 | slope = (sum * sumxy - sumx * sumy) / delta 160 | 161 | # Subtract fitted line from the data array 162 | fitted = xnorm*slope + intercept 163 | flat = samples - fitted 164 | 165 | # Compute the k-sigma rejection threshold 166 | ngoodpix, mean, sigma = zsc_compute_sigma (flat, badpix, npix) 167 | 168 | threshold = sigma * krej 169 | 170 | # Detect and reject pixels further than k*sigma from the fitted line 171 | lcut = -threshold 172 | hcut = threshold 173 | below = numpy.where(flat < lcut) 174 | above = numpy.where(flat > hcut) 175 | 176 | badpix[below] = BAD_PIXEL 177 | badpix[above] = BAD_PIXEL 178 | 179 | # Convolve with a kernel of length ngrow 180 | kernel = numpy.ones(ngrow,dtype="int32") 181 | badpix = numpy.convolve(badpix, kernel, mode='same') 182 | 183 | ngoodpix = len(numpy.where(badpix == GOOD_PIXEL)[0]) 184 | 185 | niter += 1 186 | 187 | # Transform the line coefficients back to the X range [0:npix-1] 188 | zstart = intercept - slope 189 | zslope = slope * xscale 190 | 191 | return ngoodpix, zstart, zslope 192 | 193 | 194 | def zsc_compute_sigma (flat, badpix, npix): 195 | 196 | # Compute the rms deviation from the mean of a flattened array. 197 | # Ignore rejected pixels 198 | 199 | # Accumulate sum and sum of squares 200 | goodpixels = numpy.where(badpix == GOOD_PIXEL) 201 | sumz = flat[goodpixels].sum() 202 | sumsq = (flat[goodpixels]*flat[goodpixels]).sum() 203 | ngoodpix = len(goodpixels[0]) 204 | if ngoodpix == 0: 205 | mean = None 206 | sigma = None 207 | elif ngoodpix == 1: 208 | mean = sumz 209 | sigma = None 210 | else: 211 | mean = sumz / ngoodpix 212 | temp = sumsq / (ngoodpix - 1) - sumz*sumz / (ngoodpix * (ngoodpix - 1)) 213 | if temp < 0: 214 | sigma = 0.0 215 | else: 216 | sigma = math.sqrt (temp) 217 | 218 | return ngoodpix, mean, sigma -------------------------------------------------------------------------------- /pdr/pds4_tools/reader/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import pds4_read 2 | -------------------------------------------------------------------------------- /pdr/pds4_tools/reader/read_headers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from .header_objects import HeaderStructure 7 | 8 | 9 | def _read_header_byte_data(header_structure): 10 | """ Reads the byte data from the data file for a PDS4 Header. 11 | 12 | Determines, from the structure's meta data, the relevant start and stop bytes in the data file prior to 13 | reading. 14 | 15 | Parameters 16 | ---------- 17 | header_structure : HeaderStructure 18 | The PDS4 Header data structure for which the byte data needs to be read. Should have been 19 | initialized via `HeaderStructure.from_file` method, or contain the required meta data. 20 | 21 | Returns 22 | ------- 23 | str or bytes 24 | The exact byte data for the header. 25 | """ 26 | 27 | from .core import read_byte_data 28 | 29 | meta_data = header_structure.meta_data 30 | 31 | start_byte = meta_data['offset'] 32 | stop_byte = start_byte + meta_data['object_length'] 33 | 34 | return read_byte_data(header_structure.parent_filename, start_byte, stop_byte) 35 | 36 | 37 | def new_header(input, **structure_kwargs): 38 | """ Create an header structure from PDS-compliant data. 39 | 40 | Parameters 41 | ---------- 42 | input : bytes, str or unicode 43 | A string or bytes containing the data for header. 44 | structure_kwargs : dict, optional 45 | Keywords that are passed directly to the `HeaderStructure` constructor. 46 | 47 | Returns 48 | ------- 49 | HeaderStructure 50 | An object representing the PDS4 header structure. The data attribute will contain *input*. 51 | Other attributes may be specified via *structure_kwargs*. 52 | """ 53 | 54 | # Create the HeaderStructure 55 | header_structure = HeaderStructure(**structure_kwargs) 56 | header_structure.data = input 57 | 58 | return header_structure 59 | 60 | 61 | def read_header_data(header_structure): 62 | """ 63 | Reads the data for a single PDS4 header structure, modifies *header_structure* to contain said data. 64 | 65 | Parameters 66 | ---------- 67 | header_structure : HeaderStructure 68 | The PDS4 Header data structure to which the data should be added. 69 | 70 | Returns 71 | ------- 72 | None 73 | """ 74 | 75 | header_byte_data = _read_header_byte_data(header_structure) 76 | 77 | header_structure.data = new_header(header_byte_data).data 78 | 79 | 80 | def read_header(full_label, header_label, data_filename, lazy_load=False, decode_strings=False): 81 | """ Create the `HeaderStructure`, containing label, data and meta data for a PDS4 Header from a file. 82 | 83 | Headers refer to PDS4 header data structures, which typically describe a portion of the data that serves 84 | as a header for some other data format. 85 | 86 | Parameters 87 | ---------- 88 | full_label : Label 89 | The entire label for a PDS4 product, from which *header_label* originated. 90 | header_label : Label 91 | Portion of label that defines the PDS4 header data structure. 92 | data_filename : str or unicode 93 | Filename, including the full path, of the data file that contains the data for this header. 94 | lazy_load : bool, optional 95 | If True, does not read-in the data of this header until the first attempt to access it. 96 | Defaults to False. 97 | decode_strings : bool, optional 98 | If True, the header data will be decoded to the ``unicode`` type in Python 2, and to the 99 | ``str`` type in Python 3. If False, leaves said data as a byte string. Defaults to False. 100 | 101 | Returns 102 | ------- 103 | HeaderStructure 104 | An object representing the header; contains its label, data and meta data 105 | 106 | Raises 107 | ------ 108 | TypeError 109 | Raised if called on a non-header according to *header_label*. 110 | """ 111 | 112 | # Skip over data structure if its not actually an Array 113 | if 'Header' not in header_label.tag: 114 | raise TypeError('Attempted to read_header() on a non-header: ' + header_label.tag) 115 | 116 | # Create the data structure for this array 117 | header_structure = HeaderStructure.from_file(data_filename, header_label, full_label, 118 | lazy_load=lazy_load, decode_strings=decode_strings) 119 | 120 | return header_structure 121 | -------------------------------------------------------------------------------- /pdr/pds4_tools/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/pds4_tools/utils/__init__.py -------------------------------------------------------------------------------- /pdr/pds4_tools/utils/compat.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | from __future__ import unicode_literals 5 | 6 | import inspect 7 | from xml.etree import ElementTree as ET 8 | 9 | import numpy as np 10 | 11 | from ..extern import six 12 | 13 | # OrderedDict compat (Python 2.7+ and 3.1+) 14 | try: 15 | from collections import OrderedDict 16 | except ImportError: 17 | from ..extern.ordered_dict import OrderedDict 18 | 19 | # ArgParse compat (Python 2.7+ and 3.2+) 20 | try: 21 | import argparse 22 | except ImportError: 23 | from ..extern import argparse 24 | 25 | # ElementTree compat (Python 2.7+ and 3.3+) 26 | ET_Element = ET.Element if isinstance(ET.Element, six.class_types) else ET._Element 27 | ET_Tree_iter = ET.ElementTree.iter if hasattr(ET.ElementTree, 'iter') else ET.ElementTree.getiterator 28 | ET_Element_iter = ET_Element.iter if hasattr(ET_Element, 'iter') else ET_Element.getiterator 29 | ET_ParseError = ET.ParseError if hasattr(ET, 'ParseError') else None 30 | 31 | # NumPy compat (NumPy 2.0+) 32 | NUMPY_LT_2_0 = np.__version__.startswith(('0.', '1.')) 33 | 34 | try: 35 | np_unicode = np.unicode_ 36 | except AttributeError: 37 | np_unicode = np.str_ 38 | 39 | try: 40 | np_issubclass = np.issubclass_ 41 | except AttributeError: 42 | np_issubclass = issubclass 43 | 44 | 45 | # signature.bind(...).arguments compat (Python 3.3+) 46 | def bind_arguments(func, *args, **kwargs): 47 | # Python 3.3+ 48 | try: 49 | signature = inspect.signature(func) 50 | arguments = signature.bind(*args, **kwargs).arguments 51 | except AttributeError: 52 | # Python 2.7+ 53 | try: 54 | arguments = inspect.getcallargs(func, *args, **kwargs) 55 | defaults = inspect.getcallargs(func, (), ()) 56 | for arg in arguments.keys(): 57 | if (defaults[arg] == arguments[arg]) and (arg not in kwargs): 58 | del arguments[arg] 59 | except AttributeError: 60 | arguments = kwargs 61 | 62 | return arguments 63 | -------------------------------------------------------------------------------- /pdr/pds4_tools/utils/constants.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | from __future__ import unicode_literals 5 | 6 | # PDS4 namespace URIs and default corresponding prefixes. Contains only those that have required 7 | # special usage in the code, and thus must be known. 8 | PDS4_NAMESPACES = {'pds': 'http://pds.nasa.gov/pds4/pds/v1', 9 | 'disp': 'http://pds.nasa.gov/pds4/disp/v1', 10 | 'sp': 'http://pds.nasa.gov/pds4/sp/v1'} 11 | 12 | # PDS4 root element names for labels that could contain file areas with supported data structures 13 | PDS4_DATA_ROOT_ELEMENTS = ['Product_Observational', 14 | 'Product_Ancillary', 15 | 'Product_Browse', 16 | 'Product_Collection'] 17 | 18 | # PDS4 file area names that could contain supported data structures 19 | PDS4_DATA_FILE_AREAS = ['File_Area_Observational', 20 | 'File_Area_Observational_Supplemental', 21 | 'File_Area_Ancillary', 22 | 'File_Area_Browse', 23 | 'File_Area_Inventory'] 24 | 25 | # PDS4 table types that are supported data structures, and subclasses (which should be supported by default 26 | # since are subclasses) there of 27 | PDS4_TABLE_TYPES = ['Table_Character', 'Table_Binary', 'Table_Delimited', 'Inventory'] 28 | -------------------------------------------------------------------------------- /pdr/pds4_tools/utils/exceptions.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | from __future__ import unicode_literals 5 | 6 | from .deprecation import PDS4ToolsDeprecationWarning 7 | 8 | 9 | class PDS4StandardsException(Exception): 10 | """ Custom exception thrown when PDS4 Standards are violated. """ 11 | pass 12 | -------------------------------------------------------------------------------- /pdr/pil_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for dealing with 'desktop'-format images using pillow. 3 | 4 | TODO: not all of this ultimately goes here. Also, we might want to use opencv 5 | for some things instead. 6 | """ 7 | from io import BytesIO 8 | import re 9 | from pathlib import Path 10 | from typing import Any, Union, Mapping 11 | from xml.etree import ElementTree 12 | 13 | from dustgoggles.func import constant 14 | from dustgoggles.structures import dig_for_keys 15 | from multidict import MultiDict 16 | 17 | try: 18 | from PIL import Image 19 | from PIL.ExifTags import GPSTAGS, TAGS 20 | from PIL.ImageCms import ImageCmsProfile 21 | from PIL.TiffTags import lookup 22 | except ImportError: 23 | raise ModuleNotFoundError 24 | 25 | NS_PATTERN = re.compile("{.*?}") 26 | 27 | 28 | def unpack_icp(icp_blob: bytes): 29 | unpacked = {} 30 | for attr in dir((icp := ImageCmsProfile(BytesIO(icp_blob)).profile)): 31 | if attr.startswith("__"): 32 | continue 33 | if callable((obj := getattr(icp, attr))): 34 | continue 35 | unpacked[attr] = obj 36 | return unpacked 37 | 38 | 39 | def add_gps_ifd(im: Image, gps_tagname: int): 40 | gpsdict = im.getexif().get_ifd(gps_tagname) 41 | return {GPSTAGS[k].replace('GPS', ''): v for k, v in gpsdict.items()} 42 | 43 | 44 | def get_image_metadata(im: Image): 45 | outdict = {} 46 | meta = list(im.getexif().items()) 47 | if hasattr(im, "mpinfo"): 48 | meta += list(im.mpinfo.items()) 49 | for tag, val in meta: 50 | if tag in TAGS.keys(): 51 | name = TAGS[tag] 52 | elif ( 53 | im.format in ("TIFF", "MPO") 54 | and (tname := lookup(tag).name) != "unknown" 55 | ): 56 | name = tname 57 | else: 58 | name = str(tag) 59 | if name == 'GPSInfo': 60 | outdict |= add_gps_ifd(im, tag) 61 | elif name == 'XMLPacket': 62 | outdict[name] = unpack_xml(ElementTree.fromstring(val)) 63 | elif name == 'InterColorProfile': 64 | outdict[name] = unpack_icp(val) 65 | else: 66 | outdict[name] = val 67 | return outdict 68 | 69 | 70 | def strip_ns(tag): 71 | return NS_PATTERN.sub("", tag) 72 | 73 | 74 | def maybestrip_ns(obj, do_remove): 75 | text = obj.tag if isinstance(obj, ElementTree.Element) else obj 76 | return text if do_remove is False else strip_ns(text) 77 | 78 | 79 | def pick_text_attrib(node, remove_ns=True): 80 | has_text = node.text is not None and node.text.strip() != '' 81 | if has_text and len(node) > 0: 82 | raise SyntaxError( 83 | f"Can't parse text-containing parent node {node.tag}" 84 | ) 85 | has_attrib = len(node.attrib) != 0 86 | if has_text is has_attrib is False: 87 | return None 88 | if has_attrib is False: 89 | return node.text.strip() 90 | attrib = { 91 | maybestrip_ns(k, remove_ns): v for k, v in node.attrib.items() 92 | } 93 | if has_text is True: 94 | return {'attrib': attrib, 'text': node.text.strip()} 95 | return attrib 96 | 97 | 98 | def paramdig(unpacked: Mapping) -> tuple[Mapping, list[str]]: 99 | return unpacked, dig_for_keys( 100 | unpacked, None, base_pred=constant(True), mtypes=(MultiDict, dict) 101 | ) 102 | 103 | 104 | # TODO: probably want more! 105 | IMAGE_META_ATTRS = ( 106 | 'mode', 107 | 'size', 108 | 'width', 109 | 'height', 110 | 'format', 111 | 'format_description', 112 | 'n_frames', 113 | ) 114 | 115 | 116 | def unpack_xml(root: ElementTree.Element, remove_ns: bool = True) -> Any: 117 | pick = pick_text_attrib(root, remove_ns) 118 | if len(root) == 0: 119 | return pick 120 | if pick is not None: 121 | # should only ever be dict or None for a non-terminal node 122 | xmd = MultiDict(pick) 123 | else: 124 | xmd = MultiDict() 125 | for node in root: 126 | unpacked = unpack_xml(node, remove_ns) 127 | if unpacked is None or len(unpacked) == 0: 128 | continue 129 | xmd.add(maybestrip_ns(node, remove_ns), unpacked) 130 | return xmd 131 | 132 | 133 | # TODO, maybe: decode ImageResources (see kings_river_canyon.tiff) 134 | def skim_image_data(fn: Union[str, Path]) -> dict: 135 | im, meta = Image.open(fn), {'fn': str(fn)} 136 | for attr in IMAGE_META_ATTRS: 137 | if (val := getattr(im, attr, None)) is None: 138 | continue 139 | meta[attr] = val 140 | meta['mimetype'] = Image.MIME[meta['format']] 141 | if (pal := getattr(im, 'palette', None)) is not None: 142 | # TODO, maybe: I hate that they use the color as the key and the 143 | # palette index as the value, but keeping it now for compatibility 144 | meta['palette'] = pal.colors 145 | # NOTE: this looks at TIFF tags for TIFFs by default 146 | return meta | get_image_metadata(im) 147 | -------------------------------------------------------------------------------- /pdr/pvl_utils.py: -------------------------------------------------------------------------------- 1 | """utilities for working with the `pvl` library.""" 2 | from functools import cache 3 | 4 | try: 5 | import pvl 6 | import pvl.decoder 7 | import pvl.grammar 8 | except ImportError: 9 | raise ModuleNotFoundError( 10 | "pvl is not installed. Please install pvl to parse PVL labels with it." 11 | ) 12 | 13 | 14 | class TimelessOmniDecoder(pvl.decoder.OmniDecoder): 15 | """""" 16 | def __init__(self, *args, **kwargs): 17 | super().__init__(*args, grammar=pvl.grammar.OmniGrammar(), **kwargs) 18 | 19 | def decode_datetime(self, value: str): 20 | raise ValueError 21 | 22 | 23 | @cache 24 | def cached_pvl_load(reference): 25 | """""" 26 | import pvl 27 | 28 | return pvl.load(reference, decoder=TimelessOmniDecoder()) 29 | -------------------------------------------------------------------------------- /pdr/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/__init__.py -------------------------------------------------------------------------------- /pdr/tests/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Union 3 | 4 | import numpy as np 5 | import pytest 6 | 7 | from dustgoggles.tracker import Tracker 8 | from pdr.tests.objects import ( 9 | STUB_IMAGE_LABEL, 10 | STUB_BINARY_TABLE_LABEL, 11 | STUB_DSV_TABLE_LABEL, 12 | ) 13 | 14 | 15 | @pytest.fixture(scope="session") 16 | def tracker_factory(tmp_path_factory): 17 | tracker_log_dir = tmp_path_factory.mktemp("tracker_logs", numbered=False) 18 | 19 | def make_tracker(path): 20 | return Tracker(path.name.replace(".", "_"), outdir=tracker_log_dir) 21 | 22 | return make_tracker 23 | 24 | 25 | def make_product( 26 | dir: Path, 27 | name: str, 28 | content: Union[np.ndarray, bytes, str], 29 | label: str, 30 | **extra_label_params: Union[str, int] 31 | ): 32 | if isinstance(content, np.ndarray): 33 | content = content.tobytes() 34 | mode = "wb" 35 | elif isinstance(content, bytes): 36 | mode = "wb" 37 | else: 38 | mode = "w" 39 | 40 | label = label.format(product_name=name, **extra_label_params) 41 | 42 | fpath = dir / (name + ".QQQ") 43 | lpath = dir / (name + ".LBL") 44 | 45 | with fpath.open(mode) as stream: 46 | stream.write(content) 47 | with lpath.open("w") as stream: 48 | stream.write(label) 49 | return (name, fpath, lpath) 50 | 51 | 52 | @pytest.fixture(scope="session") 53 | def products_dir(tmp_path_factory): 54 | return tmp_path_factory.mktemp("products", numbered=False) 55 | 56 | 57 | @pytest.fixture(scope="session") 58 | def uniband_image_product(products_dir): 59 | zeros = np.zeros((100, 100), dtype=np.uint8) 60 | return make_product( 61 | products_dir, "UB-IMG-PROD", zeros, STUB_IMAGE_LABEL, bands=1 62 | ) 63 | 64 | 65 | @pytest.fixture(scope="session") 66 | def multiband_image_product(products_dir): 67 | zeros = np.zeros((100, 100, 3), dtype=np.uint8) 68 | return make_product( 69 | products_dir, "MB-IMG-PROD", zeros, STUB_IMAGE_LABEL, bands=3 70 | ) 71 | 72 | 73 | @pytest.fixture(scope="session") 74 | def binary_table_product(products_dir): 75 | dtype = np.dtype([("x", np.uint8), ("y", np.float32), ("z", np.float64)]) 76 | row = np.array([(1, 4.4, 8.8)], dtype=dtype) 77 | table = np.tile(row, 10) 78 | return make_product( 79 | products_dir, "BIN-TBL-PROD", table, STUB_BINARY_TABLE_LABEL 80 | ) 81 | 82 | 83 | @pytest.fixture(scope="session") 84 | def dsv_table_product(products_dir): 85 | table = "5.5| cat| -12\r\n" * 10 86 | return make_product( 87 | products_dir, "DSV-TBL-PROD", table, STUB_DSV_TABLE_LABEL 88 | ) 89 | -------------------------------------------------------------------------------- /pdr/tests/data/F187B51_cycle_3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/F187B51_cycle_3.gif -------------------------------------------------------------------------------- /pdr/tests/data/Simple_Animated_Clock.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/Simple_Animated_Clock.webp -------------------------------------------------------------------------------- /pdr/tests/data/catseye_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/catseye_1.png -------------------------------------------------------------------------------- /pdr/tests/data/concert.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/concert.jpeg -------------------------------------------------------------------------------- /pdr/tests/data/kings_river_canyon.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/kings_river_canyon.tiff -------------------------------------------------------------------------------- /pdr/tests/data/squirrel.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/squirrel.jpg -------------------------------------------------------------------------------- /pdr/tests/data/weather.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MillionConcepts/pdr/eac20507663a738ca988cca3d695fc8199549c81/pdr/tests/data/weather.bmp -------------------------------------------------------------------------------- /pdr/tests/objects.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | 4 | def takes_a_few_things(a, b, c, *, d: Optional[int] = 1, e=5, **_): 5 | return a + b + c + d + e 6 | 7 | 8 | def takes_x_only(x): 9 | return x + 1 10 | 11 | 12 | STUB_BINARY_TABLE_LABEL = """ 13 | ^TABLE = "{product_name}.QQQ" 14 | RECORD_TYPE = STREAM 15 | FILE_RECORDS = 10 16 | RECORD_BYTES = 13 17 | OBJECT = TABLE 18 | INTERCHANGE_FORMAT = BINARY 19 | ROWS = 10 20 | ROW_BYTES = 13 21 | COLUMNS = 3 22 | OBJECT = COLUMN 23 | NAME = "X" 24 | DATA_TYPE = "UNSIGNED_INTEGER" 25 | START_BYTE = 1 26 | BYTES = 1 27 | END_OBJECT = COLUMN 28 | OBJECT = COLUMN 29 | NAME = "Y" 30 | DATA_TYPE = "PC_REAL" 31 | START_BYTE = 2 32 | BYTES = 4 33 | END_OBJECT = COLUMN 34 | OBJECT = COLUMN 35 | NAME = "X" 36 | DATA_TYPE = "PC_REAL" 37 | START_BYTE = 6 38 | BYTES = 8 39 | END_OBJECT = COLUMN 40 | END_OBJECT = TABLE 41 | END 42 | """ 43 | 44 | STUB_DSV_TABLE_LABEL = """ 45 | ^SPREADSHEET = "{product_name}.QQQ" 46 | RECORD_TYPE = STREAM 47 | FILE_RECORDS = 10 48 | RECORD_BYTES = 17 49 | OBJECT = SPREADSHEET 50 | INTERCHANGE_FORMAT = ASCII 51 | ROWS = 10 52 | FIELD_DELIMITER = VERTICAL_BAR 53 | COLUMNS = 3 54 | OBJECT = COLUMN 55 | NAME = "X" 56 | DATA_TYPE = "ASCII_INTEGER" 57 | END_OBJECT = COLUMN 58 | OBJECT = COLUMN 59 | NAME = "Y" 60 | DATA_TYPE = "ASCII_REAL" 61 | END_OBJECT = COLUMN 62 | OBJECT = COLUMN 63 | NAME = "X" 64 | DATA_TYPE = "ASCII_REAL" 65 | END_OBJECT = COLUMN 66 | END_OBJECT = TABLE 67 | END 68 | """ 69 | 70 | 71 | STUB_IMAGE_LABEL = """ 72 | ^IMAGE = "{product_name}.QQQ" 73 | SPACECRAFT_NAME = "ORBITER" 74 | OBJECT = IMAGE 75 | INTERCHANGE_FORMAT = BINARY 76 | LINES = 100 77 | LINE_SAMPLES = 100 78 | SAMPLE_TYPE = LSB_UNSIGNED_INTEGER 79 | SAMPLE_BITS = 8 80 | BANDS = {bands} 81 | BAND_STORAGE_TYPE = BAND_SEQUENTIAL 82 | FIRST_LINE = 1 83 | FIRST_LINE_SAMPLE = 1 84 | SAMPLE_BIT_MASK = 2#0111111111111111# 85 | INVALID_CONSTANT = 0 86 | MISSING_CONSTANT = 0 87 | END_OBJECT = IMAGE 88 | END 89 | """ 90 | 91 | SILLY_LABEL = """ 92 | PDS_VERSION_ID = NO 93 | /* FILE DATA ELEMENTS */ 94 | RECORD_TYPE = ABSOLUTELY_NOT 95 | RECORD_BYTES = 1000000 96 | FILE_RECORDS = -1 97 | /* pointer to CAT */ 98 | ^CAT = "MEOW.CAT" /* 0:SPECTRUM IR; 1:IMAGE */ 99 | CAT_NAME = LILY 100 | SOME_PARAMETER = "1000" /* h h h h i! */ 101 | OTHER_CATS = { 102 | "this_one"} 103 | DESCRIPTION = "This is a really 104 | nice cat. MONTMORILLONITE = 100. 105 | Great cat" 106 | /* Misidentification Data Elements */ 107 | NOTHING:FF = "B" 108 | MEOW_SEQUENCE_NUMBERS = (1, 2, 109 | 3, 4, "5" 110 | ) 111 | /* Coordinate System State: Tail */ 112 | 113 | 114 | GROUP = TAIL_COORDINATE_SYSTEM_PARMS 115 | COORDINATE_SYSTEM_NAME = TAIL_FRAME 116 | OBJECT = TIP_OF_TAIL_FORMAT 117 | POINTINESS = 12 118 | END_OBJECT = TAIL_TIP_FORMAT 119 | COORDINATE_SYSTEM_INDEX_NAME = ("CURL", "FUR", "POSE") 120 | ARTICULATION_DEVICE_ANGLE = ( -0.000045 , -0.785042 ) 121 | END_GROUP = I_FORGOT 122 | END 123 | """ 124 | 125 | BLOCK_TEXT = """OBJECT = IMAGE 126 | INTERCHANGE_FORMAT = BINARY 127 | LINES = 650 128 | LINE_SAMPLES = 350 129 | SAMPLE_TYPE = IEEE_REAL 130 | SAMPLE_BITS = 32 131 | BANDS = 3 132 | BAND_STORAGE_TYPE = BAND_SEQUENTIAL 133 | FIRST_LINE = 375 134 | FIRST_LINE_SAMPLE = 1 135 | SAMPLE_BIT_MASK = 2#0111111111111111# 136 | INVALID_CONSTANT = (0.0,0.0,0.0) 137 | MISSING_CONSTANT = (0.0,0.0,0.0) 138 | END_OBJECT = IMAGE 139 | """ 140 | 141 | QUBE_BLOCK_TEXT = """OBJECT = SPECTRAL_QUBE 142 | AXES = 3 143 | AXIS_NAME = (SAMPLE, LINE, BAND) 144 | ISIS_STRUCTURE_VERSION_ID = "2.1" 145 | /* Core Description */ 146 | CORE_ITEMS = (100, 66, 17) 147 | CORE_NAME = "CALIBRATED SPECTRAL RADIANCE" 148 | CORE_ITEM_BYTES = 4 149 | CORE_ITEM_TYPE = IEEE_REAL 150 | CORE_BASE = 0.000000 151 | CORE_MULTIPLIER = 1.000000 152 | CORE_UNIT = "uWATT*CM**-2*SR**-1*uM**-1" 153 | CORE_NULL = -1.0 154 | CORE_VALID_MINIMUM = 0.0 155 | CORE_LOW_REPR_SATURATION = -32767.0 156 | CORE_LOW_INSTR_SATURATION = -32766.0 157 | CORE_HIGH_REPR_SATURATION = -32765.0 158 | CORE_HIGH_INSTR_SATURATION = -32764.0 159 | SUFFIX_ITEMS = (0,0,8) 160 | BAND_SUFFIX_ITEM_BYTES = 4 161 | END_OBJECT 162 | """ 163 | 164 | # TODO: Can we leave out even more stuff? 165 | MINIMAL_PDS4_LABEL = """ 166 | 168 | 169 | 170 | urn:nasa:pds:mc_pdr_testsuite:test_labels:test_minimal_label.dat 171 | 172 | 173 | 174 | 175 | """ 176 | -------------------------------------------------------------------------------- /pdr/tests/test_bit_handling.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import pandas as pd 4 | 5 | from pdr.bit_handling import expand_bit_strings 6 | from pdr.loaders.queries import read_table_structure 7 | from pdr.parselabel.pds3 import literalize_pvl_block, parse_pvl 8 | from pdr.pdrtypes import DataIdentifiers 9 | 10 | BIT_STUB = """ 11 | OBJECT = COLUMN 12 | NAME = BITS1 13 | BYTES = 2 14 | START_BYTE = 1 15 | DATA_TYPE = "MSB_BIT_STRING" 16 | OBJECT = BIT_COLUMN 17 | NAME = BITS2 18 | BIT_DATA_TYPE = "MSB_INTEGER" 19 | BITS = 3 20 | START_BIT = 1 21 | END_OBJECT = BIT_COLUMN 22 | OBJECT = BIT_COLUMN 23 | NAME = BITS2 24 | BIT_DATA_TYPE = "MSB_INTEGER" 25 | BITS = 3 26 | START_BIT = 5 27 | END_OBJECT = BIT_COLUMN 28 | OBJECT = BIT_COLUMN 29 | NAME = BITS3 30 | BIT_DATA_TYPE = "MSB_INTEGER" 31 | BITS = 4 32 | START_BIT = 9 33 | END_OBJECT = BIT_COLUMN 34 | OBJECT = BIT_COLUMN 35 | NAME = BITS4 36 | BIT_DATA_TYPE = "MSB_INTEGER" 37 | BITS = 4 38 | START_BIT = 13 39 | END_OBJECT = BIT_COLUMN 40 | END_OBJECT = COLUMN 41 | """ 42 | 43 | NULL_IDENTIFIERS = {field: "" for field in DataIdentifiers.__required_keys__} 44 | 45 | 46 | def test_bit_handling(): 47 | block = parse_pvl(BIT_STUB)[0] 48 | fmtdef = read_table_structure(block, 'TABLE', None, None, NULL_IDENTIFIERS) 49 | bits = random.choices((0, 1), k=16) 50 | table = pd.DataFrame( 51 | {'BITS1': [int("".join(map(str, bits)), 2).to_bytes(2, 'big')]} 52 | ) 53 | table = expand_bit_strings(table, fmtdef) 54 | strings = table.loc[0, 'BITS1'] 55 | assert strings[0] == ''.join(map(str, bits[0:3])) 56 | assert strings[1] == ''.join(map(str, bits[4:7])) 57 | assert strings[2] == "".join(map(str, bits[8:12])) 58 | assert strings[3] == "".join(map(str, bits[12:16])) 59 | -------------------------------------------------------------------------------- /pdr/tests/test_browsify.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from pdr.browsify import ( 7 | find_masked_bounds, 8 | find_unmasked_bounds, 9 | normalize_range, 10 | eightbit, 11 | colorfill_maskedarray, 12 | browsify, 13 | ) 14 | 15 | import pytest 16 | try: 17 | from PIL import Image 18 | pil_available = True 19 | except ImportError: 20 | pil_available = False 21 | 22 | RNG = np.random.default_rng() 23 | # NOTE: all these tests have miniscule chances of randomly failing. 24 | 25 | 26 | def test_find_masked_bounds(): 27 | array = np.ma.masked_outside(RNG.poisson(10, (1024, 1024)), 1, 20) 28 | bounds = find_masked_bounds(array, 0, 0) 29 | assert bounds == (1, 20) 30 | bounds2 = find_masked_bounds(array, 10, 10) 31 | assert bounds2[0] > 1 32 | assert bounds2[1] < 20 33 | 34 | 35 | def test_find_unmasked_bounds(): 36 | array, _ = np.indices((100, 100)) 37 | bounds = find_unmasked_bounds(array, 0, 0) 38 | assert bounds == (0, 99) 39 | bounds2 = find_unmasked_bounds(array, 10, 10) 40 | assert bounds2[0] == 9 41 | assert bounds2[1] == 89 42 | 43 | 44 | def test_normalize_range(): 45 | array = RNG.poisson(50, (1024, 1024)) 46 | norm = normalize_range(array) 47 | assert norm.min() == 0 48 | assert norm.max() == 1 49 | norm2 = normalize_range(array, clip=10) 50 | assert norm2.std() > norm.std() 51 | 52 | 53 | def test_eightbit(): 54 | array = RNG.poisson(100, (1024, 1024)) 55 | eight = eightbit(array, 10) 56 | assert eight.min() == 0 57 | assert eight.max() == 255 58 | assert eight.dtype == np.dtype("uint8") 59 | assert eight.std() / eight.mean() > array.std() / array.mean() 60 | 61 | 62 | def test_colorfill_maskedarray(): 63 | arr = RNG.poisson(100, (1024, 1024)) 64 | masked = np.ma.masked_outside(arr, 10, 90) 65 | filled = colorfill_maskedarray(masked) 66 | assert np.equal(filled[masked.mask], np.array([0, 255, 255])).all() 67 | 68 | 69 | def test_browsify_df(tmp_path): 70 | obj = pd.DataFrame({"a": [1, 2], "b": ["cat", "dog"]}) 71 | browsify(obj, tmp_path / "browse") 72 | df = pd.read_csv(tmp_path / "browse.csv") 73 | assert (df["a"] == [1, 2]).all() 74 | assert (df["b"] == ["cat", "dog"]).all() 75 | 76 | 77 | @pytest.mark.skipif(not pil_available, reason="PIL not available") 78 | def test_browsify_array(tmp_path): 79 | arr = np.ma.masked_outside(RNG.poisson(100, (1024, 1024)), 10, 90) 80 | 81 | browsify(arr, tmp_path / "browse") 82 | im = Image.open(tmp_path / "browse.jpg") 83 | assert im.size == (1024, 1024) 84 | # compression artifacts etc. mean it's not precisely equal 85 | assert ( 86 | np.abs( 87 | np.subtract( 88 | np.asarray(im)[arr.mask], np.array([0, 255, 255]) 89 | ).mean() 90 | ) 91 | < 5 92 | ) 93 | -------------------------------------------------------------------------------- /pdr/tests/test_data.py: -------------------------------------------------------------------------------- 1 | import pdr 2 | 3 | from pdr.tests.objects import STUB_IMAGE_LABEL 4 | 5 | 6 | def test_data_init_basic(uniband_image_product): 7 | prod_name, fpath, lpath = uniband_image_product 8 | expected_label = STUB_IMAGE_LABEL.format(product_name=prod_name, bands=1) 9 | 10 | data = pdr.read(fpath) 11 | assert data.LABEL == expected_label 12 | assert data._target_path('IMAGE') == str(fpath) 13 | for k, v in data.identifiers.items(): 14 | if k == 'SPACECRAFT_NAME': 15 | assert v == 'ORBITER' 16 | else: 17 | assert v == '' 18 | assert data.keys() == ["LABEL", "IMAGE"] 19 | assert data.metaget("^IMAGE") == prod_name + ".QQQ" 20 | assert data.get_absolute_paths('x')[0] == (fpath.parent / 'x').absolute() 21 | data2 = pdr.read(lpath) 22 | assert data.LABEL == data2.LABEL 23 | -------------------------------------------------------------------------------- /pdr/tests/test_datatypes.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | 3 | from pdr.datatypes import sample_types 4 | 5 | 6 | def test_sample_types(): 7 | pds3_data_types = ( 8 | "CHARACTER", 9 | "IEEE_REAL", 10 | "LSB_INTEGER", 11 | "LSB_UNSIGNED_INTEGER", 12 | "MSB_INTEGER", 13 | "MSB_UNSIGNED_INTEGER", 14 | "PC_REAL", 15 | "UNSIGNED_INTEGER", 16 | "VAX_UNSIGNED_INTEGER", 17 | "ASCII_REAL", 18 | ) 19 | bit_depths = [1, 2, 4, 8] 20 | numpy_dtype_strings = [] 21 | for dt, depth in product(pds3_data_types, bit_depths): 22 | try: 23 | numpy_dtype_strings.append(sample_types(dt, depth, True)) 24 | except NotImplementedError: 25 | assert ("REAL" in dt) and (depth in (1, 2)) 26 | expected_dtype_strings = [ 27 | # CHARACTER 28 | "S1", 29 | "S2", 30 | "S4", 31 | "S8", 32 | # IEEE_REAL 33 | ">f", 34 | ">d", 35 | # LSB_INTEGER 36 | "b", 47 | ">h", 48 | ">i4", 49 | ">i8", 50 | # MSB_UNSIGNED_INTEGER 51 | ">B", 52 | ">H", 53 | ">u4", 54 | ">u8", 55 | # PC_REAL 56 | "B", 60 | ">H", 61 | ">u4", 62 | ">u8", 63 | # VAX_UNSIGNED_INTEGER 64 | " 5: 61 | return True, a / 2 62 | return False, None 63 | 64 | ifbig = specialize(takes_a_few_things, check_big) 65 | # NOTE: this function cannot filter inappropriate arguments 66 | # passed as positional. 67 | assert ifbig(a=1, b=2, c=3, d=1, e=1) == 8 68 | assert ifbig(a=8, b=2, c=3, d=1, e=1) == 4 69 | 70 | 71 | def test_get_argnames(): 72 | assert get_argnames(takes_x_only) == {"x"} 73 | 74 | 75 | def test_get_non_optional_argnames(): 76 | assert get_non_optional_argnames(takes_a_few_things) == { 77 | "a", 78 | "b", 79 | "c", 80 | "e", 81 | } 82 | 83 | 84 | def test_get_all_argnames(): 85 | assert get_all_argnames(takes_x_only, takes_a_few_things) == { 86 | "a", 87 | "b", 88 | "c", 89 | "d", 90 | "e", 91 | "x", 92 | "_", 93 | } 94 | assert get_all_argnames( 95 | takes_x_only, takes_a_few_things, nonoptional=True 96 | ) == {"a", "b", "c", "e", "x"} 97 | 98 | 99 | def test_softquery(): 100 | def b_gen(a): 101 | return a + 1 102 | 103 | def c_gen(a, b, nothing_really: Optional[int] = None): 104 | return a + b 105 | 106 | def f_gen(a, b, c, d, e): 107 | return a + b + c + d + e 108 | 109 | def target(a, b, c, d, e, f, tracker): 110 | tracker.track() 111 | return a * b * c * d * e * f 112 | 113 | querydict = {'b': b_gen, 'c': c_gen, 'f': f_gen} 114 | kwargdict = {'a': 5, 'd': 100, 'tracker': TrivialTracker()} 115 | try: 116 | # this should fail because the pipeline doesn't generate an 'e' and we 117 | # don''t have one in kwargdict 118 | softquery(target, querydict, kwargdict) 119 | raise TypeError 120 | except TypeError: 121 | pass 122 | kwargdict['e'] = 20 123 | # result should be: 124 | # b = a + 1 == 5 + 1 == 6 125 | # c = a + b == 5 + 6 == 11 126 | # f = a + b + c + d + e == 5 + 6 + 11 + 100 + 20 == 142 127 | # then: a * b * c * d * e * f == 5 * 6 * 11 * 100 * 20 * 142 == 93720000 128 | assert target(**softquery(target, querydict, kwargdict)) == 93720000 129 | -------------------------------------------------------------------------------- /pdr/tests/test_image.py: -------------------------------------------------------------------------------- 1 | import pdr 2 | 3 | 4 | def test_image_simple_2d(uniband_image_product, tracker_factory): 5 | prod_name, fpath, lpath = uniband_image_product 6 | data = pdr.read(fpath, debug=True, tracker=tracker_factory(fpath)) 7 | assert data.IMAGE.sum() == 0 8 | 9 | 10 | def test_image_simple_3d(multiband_image_product, tracker_factory): 11 | prod_name, fpath, lpath = multiband_image_product 12 | data = pdr.read(fpath, debug=True, tracker=tracker_factory(fpath)) 13 | assert data.IMAGE.sum() == 0 14 | -------------------------------------------------------------------------------- /pdr/tests/test_import.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import ast 3 | 4 | 5 | def test_delayed_import(): 6 | imports_to_delay = ['numpy', 'pandas'] 7 | commands = f"import sys; import pdr; " \ 8 | f"print(not any(module in sys.modules for module in {imports_to_delay}))" 9 | out = run_isolated(commands) 10 | assert ast.literal_eval(out) 11 | 12 | 13 | def run_isolated(commands_for_interpreter): 14 | 15 | p = subprocess.run(['python', '-c', commands_for_interpreter], 16 | capture_output=True, 17 | text=True) 18 | stdout = p.stdout 19 | return stdout 20 | -------------------------------------------------------------------------------- /pdr/tests/test_loader_helpers.py: -------------------------------------------------------------------------------- 1 | import os 2 | from itertools import product 3 | from pathlib import Path 4 | 5 | # noinspection PyProtectedMember 6 | from pdr.loaders._helpers import ( 7 | looks_like_ascii, 8 | quantity_start_byte, 9 | count_from_bottom_of_file, 10 | _check_delimiter_stream, 11 | check_explicit_delimiter 12 | ) 13 | 14 | 15 | def test_looks_like_ascii(): 16 | names = ('SPREADSHEET', 'ASCII_TABLE', 'IMAGE') 17 | formats = ('ASCII', 'STREAM') 18 | expected = (True, True, True, True, True, False) 19 | for (name, format_), value in zip(product(names, formats), expected): 20 | assert value == looks_like_ascii( 21 | {'INTERCHANGE_FORMAT': format_}, name 22 | ) 23 | 24 | 25 | def test_quantity_start_byte(): 26 | units = "BYTES", "RECORDS" 27 | record_bytes = 100, None 28 | expected = 99, 99, 9900, None 29 | for (unit, rb), ex in zip(product(units, record_bytes), expected): 30 | assert quantity_start_byte({'units': unit, 'value': 100}, rb) == ex 31 | 32 | 33 | def test_count_from_bottom_of_file(tmp_path): 34 | fn = [tmp_path / 'foo.bin', tmp_path / 'FOO.bin'] 35 | rows = 100 36 | row_bytes = 256 37 | with fn[0].open('wb') as stream: 38 | stream.write(b'\x00' * rows * row_bytes * 2) 39 | assert ( 40 | count_from_bottom_of_file(fn, rows, row_bytes) == rows * row_bytes 41 | ) 42 | 43 | 44 | def test_check_delimiter_stream(): 45 | byte_target = {"units": "BYTES", "value": 19200} 46 | rec_target = {"units": "RECORDS", "value": 1200} 47 | identifiers = { 48 | "SPACECRAFT_ID": "NOSTALGIA_FOR_INFINITY", 49 | 'RECORD_BYTES': 100, 50 | "ETC": ..., 51 | 'RECORD_TYPE': 'BINARY' 52 | } 53 | empty_block = {} 54 | bytes_block = {"BYTES", 100} 55 | # should never say a stream with a byte quantity is delimited 56 | assert _check_delimiter_stream(identifiers, "TABLE", byte_target, empty_block) is False 57 | assert _check_delimiter_stream(identifiers, "TABLE", ("", byte_target), empty_block) is False 58 | # should never say a stream with specified record bytes is delimited 59 | assert _check_delimiter_stream(identifiers, "TABLE", rec_target, empty_block) is False 60 | identifiers['RECORD_BYTES'] = None 61 | # should never say a non-STREAM stream is delimited 62 | assert _check_delimiter_stream(identifiers, "TABLE", rec_target, empty_block) is False 63 | # should never say something that isn't ASCII/SPREADSHEET/HEADER is delimited 64 | identifiers['RECORD_TYPE'] = 'STREAM' 65 | assert _check_delimiter_stream(identifiers, "TABLE", rec_target, empty_block) is False 66 | # should never say something whose length is declared at the block level in bytes is delimited 67 | assert _check_delimiter_stream(identifiers, "SPREADSHEET", rec_target, bytes_block) is False 68 | # if all the above conditions aren't satisfied, should say it's delimited 69 | assert _check_delimiter_stream(identifiers, "SPREADSHEET", rec_target, empty_block) is True 70 | 71 | 72 | def test_check_explicit_delimiter(): 73 | assert check_explicit_delimiter({'FIELD_DELIMITER': 'VERTICAL_BAR'}) == '|' 74 | assert check_explicit_delimiter({}) == ',' 75 | try: 76 | check_explicit_delimiter({'FIELD_DELIMITER': 'FENCE'}) 77 | raise KeyError 78 | except KeyError: 79 | pass 80 | -------------------------------------------------------------------------------- /pdr/tests/test_metadata.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | 3 | from pdr import Metadata 4 | from pdr.parselabel.pds3 import parse_pvl 5 | from pdr.tests.objects import SILLY_LABEL 6 | 7 | import pytest 8 | if importlib.util.find_spec("Levenshtein"): 9 | lev_available = True 10 | else: 11 | lev_available = False 12 | 13 | 14 | def test_metadata_1(): 15 | meta = Metadata(parse_pvl(SILLY_LABEL), 'PDS3') 16 | assert meta.metaget('POINTINESS') == 12 17 | assert meta.metablock( 18 | 'TAIL_COORDINATE_SYSTEM_PARMS' 19 | )['ARTICULATION_DEVICE_ANGLE'][0]['units'] == 'rad' 20 | assert meta.metaget_('MEOW_SEQUENCE_NUMBERS') == (1, 2, 3, 4, '5') 21 | 22 | 23 | @pytest.mark.skipif(not lev_available, reason="Levenshtein not available") 24 | def test_fuzzy_metadata(): 25 | meta = Metadata(parse_pvl(SILLY_LABEL), 'PDS3') 26 | assert meta.metaget_fuzzy('KAT') == 'MEOW.CAT' 27 | -------------------------------------------------------------------------------- /pdr/tests/test_np_utils.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import os 3 | 4 | import numpy as np 5 | 6 | from pdr.np_utils import ( 7 | make_c_contiguous, 8 | casting_to_float, 9 | np_from_buffered_io, 10 | ibm32_to_np_f32, 11 | ibm64_to_np_f64, 12 | enforce_order_and_object, 13 | ) 14 | 15 | RNG = np.random.default_rng() 16 | 17 | 18 | def test_make_c_contiguous(): 19 | arr = np.arange(0, 100, 5) 20 | arr = arr[0:-1:2] 21 | assert arr.flags["C_CONTIGUOUS"] is False 22 | arr = make_c_contiguous(arr) 23 | assert arr.flags["C_CONTIGUOUS"] is True 24 | 25 | 26 | def test_casting_to_float(): 27 | uint8 = np.arange(0, 100, dtype=np.uint8) 28 | assert casting_to_float(uint8, 1.1) 29 | assert not casting_to_float(uint8, 1) 30 | 31 | 32 | def test_np_from_buffered_io(tmp_path): 33 | arr = RNG.poisson(20, (100, 100)).astype(np.uint8) 34 | fpath = tmp_path / "arr.img.gz" 35 | with gzip.open(fpath, "wb") as stream: 36 | stream.write(arr.tobytes()) 37 | with gzip.open(fpath, "rb") as buf: 38 | in1 = np_from_buffered_io(buf, np.dtype("b")) 39 | assert np.all(in1.reshape(arr.shape) == arr) 40 | in2 = np_from_buffered_io(buf, np.dtype("b"), 10, 10) 41 | assert np.all(in2 == arr.ravel()[10:20]) 42 | 43 | 44 | def test_enforce_order_and_object(): 45 | gross = np.dtype([("f1", "V4"), ("f2", "i2"), ("f3", ">i2")]) 46 | grossarray = np.array([(b"\x00\x00\x00\x01", 12, 12)], dtype=gross) 47 | enforced = enforce_order_and_object(grossarray) 48 | assert np.all(enforced == grossarray) 49 | assert enforced.dtype[0] == np.dtype("O") 50 | assert enforced.dtype[2] == np.dtype("i2") 51 | enforced2 = enforce_order_and_object(np.array([b"\x00"], dtype="V")) 52 | assert enforced2[0] == b"\x00" 53 | assert enforced2.dtype == np.dtype("O") 54 | enforced3 = enforce_order_and_object(np.array([3], dtype=">i2")) 55 | assert enforced3[0] == 3 56 | assert enforced3.dtype == np.dtype("i2") 57 | enforced4 = enforce_order_and_object(np.array([3], dtype=">i2")) 58 | assert enforced4[0] == 3 59 | assert enforced4.dtype == np.dtype("i2") 60 | 61 | 62 | def test_ibm_to_np(): 63 | assert ibm32_to_np_f32(np.frombuffer(b"\x00\x00\x01\xc2", "i4")) == -1 64 | assert ( 65 | ibm64_to_np_f64( 66 | np.frombuffer(b"\x00\x00\x00\x00\x00\x00\x01\xc2", "i8") 67 | ) 68 | == -1 69 | ) 70 | -------------------------------------------------------------------------------- /pdr/tests/test_parselabel_pds3.py: -------------------------------------------------------------------------------- 1 | from pdr.parselabel.pds3 import parse_pvl 2 | from pdr.tests.objects import SILLY_LABEL 3 | 4 | 5 | def test_parse_label(): 6 | params, _ = parse_pvl(SILLY_LABEL) 7 | assert params['^CAT'] == 'MEOW.CAT' 8 | assert params['CAT_NAME'] == 'LILY' 9 | assert params[ 10 | 'TAIL_COORDINATE_SYSTEM_PARMS' 11 | ]['TIP_OF_TAIL_FORMAT']['POINTINESS'] == 12 12 | assert params[ 13 | 'TAIL_COORDINATE_SYSTEM_PARMS' 14 | ]['ARTICULATION_DEVICE_ANGLE'][0] == \ 15 | {'value': -4.5e-05, 'units': 'rad'} 16 | -------------------------------------------------------------------------------- /pdr/tests/test_parselabel_pds4.py: -------------------------------------------------------------------------------- 1 | from pdr.pds4_tools.reader.label_objects import Label 2 | from pdr.parselabel.pds4 import reformat_pds4_tools_label 3 | 4 | from pdr.tests.objects import MINIMAL_PDS4_LABEL 5 | 6 | 7 | # pds4_tools offers no obvious way to parse a Label out of a str, 8 | # nor from an open file handle (which could be a stringio instance). 9 | 10 | def test_parse_label(tmp_path): 11 | minimal_pds4_label_f = tmp_path / "minimal_pds4.xml" 12 | with open(minimal_pds4_label_f, "wt") as fp: 13 | fp.write(MINIMAL_PDS4_LABEL) 14 | unpacked, params = reformat_pds4_tools_label( 15 | Label.from_file(minimal_pds4_label_f) 16 | ) 17 | assert sorted(params) == [ 18 | 'File_Area_Observational', 19 | 'Identification_Area', 20 | 'Observation_Area', 21 | 'Product_Observational', 22 | 'Reference_List', 23 | 'logical_identifier' 24 | ] 25 | 26 | PO = unpacked["Product_Observational"] 27 | assert PO["Observation_Area"] is None 28 | assert PO["Reference_List"] is None 29 | assert PO["File_Area_Observational"] is None 30 | 31 | IA = PO["Identification_Area"] 32 | assert IA["logical_identifier"] == "urn:nasa:pds:mc_pdr_testsuite:test_labels:test_minimal_label.dat" 33 | -------------------------------------------------------------------------------- /pdr/tests/test_primary_desktop_image.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | import pdr 6 | 7 | IMPATH = Path(__file__).parent / 'data' 8 | 9 | try: 10 | from PIL import Image 11 | pil_available = True 12 | except ImportError: 13 | pil_available = False 14 | 15 | 16 | # NOTE: loose value checks in this module are intended to allow for 17 | # differences in environment-level versions of libjpeg etc. 18 | 19 | 20 | @pytest.mark.skipif(not pil_available, reason="PIL not available") 21 | def test_simple_primary_jpeg(): 22 | im = pdr.read(IMPATH / 'squirrel.jpg') 23 | assert abs(im.IMAGE.mean() - 125.5) < 0.5 24 | assert im.metaget('mode') == 'RGB' 25 | assert im.metaget('format') == 'JPEG' 26 | assert im.standard == 'JPEG' 27 | 28 | 29 | @pytest.mark.skipif(not pil_available, reason="PIL not available") 30 | def test_phone_camera_mpo(): 31 | im = pdr.read(IMPATH / 'concert.jpeg') 32 | assert abs(im.IMAGE.mean() - 40 < 0.5) 33 | assert abs(im.Undefined_1.mean() - 5 < 0.5) 34 | assert im.metaget( 35 | 'MPEntry' 36 | )[0]['Attribute']['MPType'] == 'Baseline MP Primary Image' 37 | assert im.metaget('Model') == 'iPhone 13 Pro Max' 38 | assert im.metaget('Longitude') == (82.0, 33.0, 3.61) 39 | assert im.metaget('mode') == 'RGB' 40 | assert im.standard == 'MPO' 41 | 42 | 43 | @pytest.mark.skipif(not pil_available, reason="PIL not available") 44 | def test_simple_tiff(): 45 | im = pdr.read(IMPATH / 'kings_river_canyon.tiff') 46 | assert abs(im.IMAGE.mean() - 152.6 < 0.5) 47 | assert im.metaget('mimetype') == 'image/tiff' 48 | assert im.metaget('mode') == 'L' 49 | assert im.standard == 'TIFF' 50 | 51 | 52 | @pytest.mark.skipif(not pil_available, reason="PIL not available") 53 | def test_anigif(): 54 | im = pdr.read(IMPATH / 'F187B51_cycle_3.gif') 55 | assert len(im) == 43 56 | assert abs(im.FRAME_30.mean() - 115.5 < 0.5) 57 | assert abs(im.FRAME_5.mean() - 1.5 < 0.5) 58 | assert im.metaget('mode') == 'P' 59 | assert im.metaget('palette')[(238, 255, 0)] == 0 60 | 61 | 62 | @pytest.mark.skipif(not pil_available, reason="PIL not available") 63 | def test_png(): 64 | im = pdr.read(IMPATH / 'catseye_1.png') 65 | assert abs(im.IMAGE.mean() - 19.4 < 0.5) 66 | assert im.metaget('mode') == 'RGB' 67 | assert im.metaget('ExifOffset') == 168 68 | 69 | 70 | @pytest.mark.skipif(not pil_available, reason="PIL not available") 71 | def test_bmp(): 72 | im = pdr.read(IMPATH / 'weather.bmp') 73 | assert abs(im.IMAGE.mean() - 118.6 < 0.5) 74 | assert im.metaget('mode') == 'RGB' 75 | assert im.standard == 'BMP' 76 | 77 | 78 | @pytest.mark.skipif(not pil_available, reason="PIL not available") 79 | def test_animated_webp(): 80 | im = pdr.read(IMPATH / 'Simple_Animated_Clock.webp') 81 | assert len(im) == 287 82 | assert abs(im.FRAME_286.mean() - 1.5 < 0.5) 83 | assert im.metaget('mode') == 'RGBA' 84 | assert im.standard == 'WEBP' 85 | -------------------------------------------------------------------------------- /pdr/tests/test_primary_fits.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import pdr 4 | 5 | import pytest 6 | try: 7 | from astropy.io import fits 8 | fits_available = True 9 | except ImportError: 10 | fits_available = False 11 | 12 | RNG = np.random.default_rng() 13 | 14 | 15 | @pytest.mark.skipif(not fits_available, reason="astropy.io.fits not available") 16 | def test_array_roundtrip(tmp_path): 17 | arr = RNG.poisson(100, (100, 100)).astype(np.uint8) 18 | hdul = fits.HDUList() 19 | hdul.append(fits.ImageHDU(arr, name='POISSON')) 20 | hdul.writeto(tmp_path / 'temp.fits') 21 | data = pdr.read(tmp_path / 'temp.fits') 22 | assert data.keys() == ['POISSON'] 23 | assert np.all(data.POISSON == arr) 24 | -------------------------------------------------------------------------------- /pdr/tests/test_queries.py: -------------------------------------------------------------------------------- 1 | from pdr.parselabel.pds3 import parse_pvl, literalize_pvl 2 | from pdr.loaders.queries import ( 3 | generic_image_properties, 4 | get_qube_band_storage_type, 5 | generic_qube_properties, 6 | extract_axplane_metadata, 7 | ) 8 | 9 | from pdr.tests.objects import BLOCK_TEXT, QUBE_BLOCK_TEXT 10 | 11 | 12 | def basesamp(): 13 | block = literalize_pvl(parse_pvl(BLOCK_TEXT)[0]["IMAGE"]) 14 | base = base_sample_info(block) 15 | assert base == {"BYTES_PER_PIXEL": 4, "SAMPLE_TYPE": "IEEE_REAL"} 16 | assert im_sample_type(base) == ">" 17 | 18 | 19 | def test_generic_properties(): 20 | block = parse_pvl(BLOCK_TEXT)[0]["IMAGE"] 21 | props = generic_image_properties(block, ">f") 22 | assert props == { 23 | "BYTES_PER_PIXEL": 4, 24 | "is_vax_real": False, 25 | "sample_type": ">f", 26 | "nrows": 650, 27 | "ncols": 350, 28 | "nbands": 3, 29 | "band_storage_type": "BAND_SEQUENTIAL", 30 | "rowpad": 0, 31 | "colpad": 0, 32 | "bandpad": 0, 33 | "linepad": 0, 34 | } 35 | 36 | 37 | def test_qube_props(): 38 | params, _ = parse_pvl(QUBE_BLOCK_TEXT) 39 | qube_block = params["SPECTRAL_QUBE"] 40 | band_storage_type = get_qube_band_storage_type(qube_block) 41 | props = generic_qube_properties(qube_block, band_storage_type) 42 | assert props == { 43 | "BYTES_PER_PIXEL": 4, 44 | "sample_type": ">f", 45 | "axnames": ("SAMPLE", "LINE", "BAND"), 46 | "ncols": 100, 47 | "nrows": 66, 48 | "nbands": 17, 49 | "band_storage_type": "BAND_SEQUENTIAL", 50 | "rowpad": 0, 51 | "colpad": 0, 52 | "bandpad": 8, 53 | "suffix_bands": 8, 54 | "linepad": 0, 55 | "is_vax_real": False, 56 | } 57 | assert extract_axplane_metadata(qube_block, props) == { 58 | "rowpad": 0, 59 | "colpad": 0, 60 | "bandpad": 8, 61 | "suffix_bands": 8, 62 | } 63 | -------------------------------------------------------------------------------- /pdr/tests/test_scaling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import pdr 4 | from pdr._scaling import find_special_constants 5 | from pdr.parselabel.pds3 import parse_pvl 6 | 7 | RNG = np.random.default_rng() 8 | 9 | STUB = """ 10 | OBJECT = IMAGE 11 | INVALID_CONSTANT = 33 12 | END_OBJECT 13 | END 14 | """ 15 | 16 | 17 | def test_find_special_constants(): 18 | meta = pdr.Metadata(parse_pvl(STUB), 'PDS3') 19 | arr = RNG.choice(np.array([33, -32766, 100]), (100, 100)) 20 | specials = find_special_constants(meta, arr.astype(np.int16), 'IMAGE') 21 | assert specials == {"INVALID_CONSTANT": 33, "ISIS_LOW_INST_SAT": -32766} 22 | -------------------------------------------------------------------------------- /pdr/tests/test_table.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import pdr 4 | 5 | 6 | def test_simple_binary_table(binary_table_product, tracker_factory): 7 | prod_name, fpath, lpath = binary_table_product 8 | data = pdr.read(fpath, debug=True, tracker=tracker_factory(fpath)) 9 | assert list(data.TABLE.columns) == ['X_0', 'Y', 'X_1'] 10 | assert list(data.TABLE.dtypes) == [ 11 | np.dtype('uint8'), np.dtype('float32'), np.dtype('float64') 12 | ] 13 | assert data.TABLE.loc[0, 'X_0'] == 1 14 | assert np.isclose(data.TABLE.loc[5, 'Y'], 4.4) 15 | assert np.isclose(data.TABLE.loc[9, "X_1"], 8.8) 16 | 17 | 18 | def test_simple_dsv_table(dsv_table_product, tracker_factory): 19 | prod_name, fpath, lpath = dsv_table_product 20 | data = pdr.read(fpath, debug=True, tracker=tracker_factory(fpath)) 21 | assert list(data.SPREADSHEET.columns) == ['X_0', 'Y', 'X_1'] 22 | assert list(data.SPREADSHEET.dtypes) == [ 23 | np.dtype('float64'), np.dtype('O'), np.dtype('int64') 24 | ] 25 | assert np.isclose(data.SPREADSHEET.loc[0, 'X_0'], 5.5) 26 | assert data.SPREADSHEET.loc[5, 'Y'] == 'cat' 27 | assert data.SPREADSHEET.loc[9, "X_1"] == -12 28 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # pyproject.toml documentation for reference: 2 | # https://packaging.python.org/en/latest/ 3 | # https://setuptools.pypa.io/en/latest/userguide/ 4 | 5 | [project] 6 | name = "pdr" 7 | version = "1.4.0" 8 | description = "Planetary Data Reader" 9 | readme = "README.md" 10 | license = { file = "LICENSE.md" } 11 | 12 | authors = [ 13 | { name = "Chase Million", email = "chase@millionconcepts.com" }, 14 | { name = "Michael St. Clair", email = "mstclair@millionconcepts.com" }, 15 | { name = "Sierra Brown", email = "sierra@millionconcepts.com" }, 16 | { name = "Sabrina Curtis", email = "scurtis@millionconcepts.com" }, 17 | { name = "Zack Weinberg", email = "zack@millionconcepts.com" }, 18 | ] 19 | 20 | classifiers = [ 21 | "Development Status :: 4 - Beta", 22 | "License :: OSI Approved :: BSD License", 23 | "Operating System :: OS Independent", 24 | "Programming Language :: Python :: 3", 25 | ] 26 | 27 | requires-python = ">=3.9" 28 | dependencies = [ 29 | "dustgoggles", 30 | "more_itertools", 31 | "multidict", 32 | "numpy", 33 | "pandas>=2.0.0", 34 | "rms-vax" 35 | ] 36 | 37 | [project.optional-dependencies] 38 | pillow = ["pillow"] 39 | fits = ["astropy"] 40 | notebooks = ["jupyter"] 41 | pvl = ["pvl"] 42 | tests = ["pytest"] 43 | fuzzy = ["Levenshtein"] 44 | 45 | [project.urls] 46 | Repository = "https://github.com/MillionConcepts/pdr" 47 | 48 | [build-system] 49 | requires = ["setuptools >= 64"] 50 | build-backend = "setuptools.build_meta" 51 | 52 | [tool.setuptools.packages.find] 53 | where = ["."] 54 | include = ["pdr*"] 55 | namespaces = false 56 | 57 | # Recommended for new projects by pytest manual. 58 | [tool.pytest.ini_options] 59 | addopts = [ 60 | "--import-mode=importlib" 61 | ] 62 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # All package configuration is now in pyproject.toml. This file exists 2 | # solely for backward compatibility, e.g. allowing people to continue 3 | # to run "python3 setup.py develop" instead of "pip install -e ." 4 | 5 | import setuptools 6 | setuptools.setup() 7 | --------------------------------------------------------------------------------