├── .dockerignore ├── .github └── workflows │ ├── build-easyaccess-dev.yaml │ └── build-easyaccess.yaml ├── .gitignore ├── AUTHORS.md ├── CHANGES.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── FAQ.md ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── TODO.md ├── bin └── easyaccess ├── config └── conda │ └── meta.yaml ├── data ├── help.gif └── help_screenshot.png ├── easyaccess ├── __init__.py ├── config_ea.py ├── eaparser.py ├── easyaccess.py ├── eautils │ ├── __init__.py │ ├── cli_utils.py │ ├── db_api.py │ ├── db_utils.py │ ├── des_logo.py │ ├── des_utils.py │ ├── dircache.py │ ├── dtypes.py │ ├── ea_utils.py │ ├── fileio.py │ ├── fun_utils.py │ ├── import_utils.py │ └── python_api.py └── version.py ├── paper ├── classes_simple.png ├── easyaccess_users.png ├── easyaccess_welcome.png ├── paper.bib ├── paper.md └── paper.pdf ├── setup.py └── tests ├── test_api.py ├── test_connection.py ├── test_interpreter.py ├── test_load_table.py └── wrapped.py /.dockerignore: -------------------------------------------------------------------------------- 1 | Dockerfile -------------------------------------------------------------------------------- /.github/workflows/build-easyaccess-dev.yaml: -------------------------------------------------------------------------------- 1 | name: easyaccess-build-dev 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'dev' 7 | paths: 8 | - '**' 9 | 10 | jobs: 11 | docker: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - 15 | name: Checkout 16 | uses: actions/checkout@v2 17 | with: 18 | submodules: recursive 19 | - 20 | name: Set up QEMU 21 | uses: docker/setup-qemu-action@v1 22 | - 23 | name: Set up Docker Buildx 24 | uses: docker/setup-buildx-action@v1 25 | 26 | - 27 | name: Cache Docker layers 28 | uses: actions/cache@v2 29 | with: 30 | path: /tmp/.buildx-cache 31 | key: ${{ runner.os }}-buildx-${{ github.sha }} 32 | restore-keys: | 33 | ${{ runner.os }}-buildx- 34 | 35 | - 36 | name: Login to container registry 37 | uses: docker/login-action@v1 38 | with: 39 | registry: "hub.ncsa.illinois.edu" 40 | username: "${{ secrets.NCSA_HARBOR_USERNAME }}" 41 | password: "${{ secrets.NCSA_HARBOR_TOKEN }}" 42 | - 43 | name: Build and push 44 | id: docker_build 45 | uses: docker/build-push-action@v2 46 | with: 47 | context: . 48 | push: true 49 | tags: hub.ncsa.illinois.edu/des/easyaccess:dev 50 | # From https://github.com/docker/build-push-action/blob/master/docs/advanced/cache.md#github-cache 51 | cache-from: type=local,src=/tmp/.buildx-cache 52 | cache-to: type=local,dest=/tmp/.buildx-cache-new 53 | - 54 | # Temp fix 55 | # https://github.com/docker/build-push-action/issues/252 56 | # https://github.com/moby/buildkit/issues/1896 57 | name: Move cache 58 | run: | 59 | rm -rf /tmp/.buildx-cache 60 | mv /tmp/.buildx-cache-new /tmp/.buildx-cache 61 | - 62 | name: Image digest 63 | run: echo ${{ steps.docker_build.outputs.digest }} 64 | -------------------------------------------------------------------------------- /.github/workflows/build-easyaccess.yaml: -------------------------------------------------------------------------------- 1 | name: easyaccess-build 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'master' 7 | paths: 8 | - '**' 9 | 10 | jobs: 11 | docker: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - 15 | name: Checkout 16 | uses: actions/checkout@v2 17 | with: 18 | submodules: recursive 19 | - 20 | name: Set up QEMU 21 | uses: docker/setup-qemu-action@v1 22 | - 23 | name: Set up Docker Buildx 24 | uses: docker/setup-buildx-action@v1 25 | 26 | - 27 | name: Cache Docker layers 28 | uses: actions/cache@v2 29 | with: 30 | path: /tmp/.buildx-cache 31 | key: ${{ runner.os }}-buildx-${{ github.sha }} 32 | restore-keys: | 33 | ${{ runner.os }}-buildx- 34 | 35 | - 36 | name: Login to container registry 37 | uses: docker/login-action@v1 38 | with: 39 | registry: "hub.ncsa.illinois.edu" 40 | username: "${{ secrets.NCSA_HARBOR_USERNAME }}" 41 | password: "${{ secrets.NCSA_HARBOR_TOKEN }}" 42 | - 43 | name: Build and push 44 | id: docker_build 45 | uses: docker/build-push-action@v2 46 | with: 47 | context: . 48 | push: true 49 | tags: hub.ncsa.illinois.edu/des/easyaccess:latest 50 | # From https://github.com/docker/build-push-action/blob/master/docs/advanced/cache.md#github-cache 51 | cache-from: type=local,src=/tmp/.buildx-cache 52 | cache-to: type=local,dest=/tmp/.buildx-cache-new 53 | - 54 | # Temp fix 55 | # https://github.com/docker/build-push-action/issues/252 56 | # https://github.com/moby/buildkit/issues/1896 57 | name: Move cache 58 | run: | 59 | rm -rf /tmp/.buildx-cache 60 | mv /tmp/.buildx-cache-new /tmp/.buildx-cache 61 | - 62 | name: Image digest 63 | run: echo ${{ steps.docker_build.outputs.digest }} 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | MANIFEST 3 | build/ 4 | .idea/ 5 | TODO 6 | easyaccess.egg-info/ 7 | HELP 8 | RELEASE_NOTES 9 | .cache/ 10 | dist/ 11 | paper/Makefile 12 | paper/dotfiles/ 13 | paper/joss-logo.png 14 | paper/latex.template 15 | .ipynb_checkpoints/ 16 | Untitled.ipynb 17 | install_oracleclient.py 18 | easyaccess/other_files/ 19 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | ## Contributors to `easyaccess` 2 | This is an alphabetical list of contributors, if you feel you have contributed to the project and we have neglected adding you to the list please accept our apologizes and [let us know](mailto:mgckind@gmail.com) to correct it. 3 | 4 | - Monika Adamow 5 | - Matias Carrasco Kind 6 | - Alex Drlica-Wagner 7 | - Landon Gelman 8 | - Audrey Koziol 9 | - Donald Petravick 10 | - Eli Rykoff 11 | - Ignacio Sevilla 12 | - T. Andrew Manning 13 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # Changes 2 | 3 | ## v.1.4.8 4 | #### XXXX-XXX-XX 5 | - Fix issues with hanging after a Oracle Error (#PR 173 and issue #130) 6 | 7 | 8 | ## v1.4.7 9 | #### 2019-FEB-21 10 | - Update FAQ (PR #171) 11 | - Remove DB names restriction to allow other DBs (PR #170) 12 | 13 | ## v1.4.6 14 | #### 2019-JAN-14 15 | - No minor changes to the code, just the general documentation 16 | - Adding Contribution guidelines and Coda of Conduct 17 | - Adding Authors 18 | - Adding FAQ 19 | - Updating Readme and Installation instructions 20 | - Reformatting code 21 | - Update paper and references 22 | 23 | 24 | ## v1.4.5 25 | #### 2018-OCT-04 26 | - Restructuring code structure (PR #151) 27 | - Add paper reference 28 | - Minor fixes in unitests 29 | - Update documentation 30 | - Fix conda issues 31 | 32 | ## v1.4.4 33 | #### 2018-JUN-15 34 | - Improved version to work with public release DB 35 | - Update dtypes to match Y3 tables (PR #149) 36 | - Fix bytes to unicode issue for py3 when writing h5 files 37 | - Fix bug with Oracle types NUMBER with no scale/digits 38 | - Add desdr database and remove oldoper 39 | - Fix HDF5 output files, (see #140) 40 | - Add compression option to configuration, gzip is applied to .csv,.tab,.fits (latter only for 1 DB trip), bzip2 for hdf5 41 | fits can be opened in append mode when they are compressed, workaround is to increase prefetch 42 | 43 | ## v1.4.3 44 | #### 2017-AUG-15 45 | - Reduce the number of tries for connection 46 | - Add options to connect to new desoper and remove oldsci, passwords not longer linked (PR #136) 47 | - Allow `describe_table` to work for materialized views (see #135) 48 | 49 | ## v1.4.2 50 | #### 2017-MAY-23 51 | - Fix a minor migration bug from version 1.4.1 52 | - Add option to reset password after expiration (see #125) 53 | - Add support for new databases (see #126) 54 | - Update tutorial link 55 | - Fix minor bugs and update commands (see #119 #120) 56 | - Fix bugs regarding cx_Oracle (see #117) 57 | 58 | ## v1.4.0 59 | #### 2017-APR-16 60 | - Refactor and huge clean up, now bin/ folder has the startup script (see PR #113 and #111) 61 | - Fix metadata and cache lookup for autocompletion (see #92, #94, #93) 62 | - Add estimate number of rows on describe_table when available (see #75) 63 | - Toggle color mode without exiting interpreter (see PR #110) 64 | - 80 character loading bar (see PR #103) 65 | - Added more information to help command (see PR #100 and #109) 66 | - Fix config option and boolean values (see Pr #115) 67 | - Justify comments to the left in describe_table i(#105) 68 | - Added `change_db` option switch between dabatases without logging out (see PR #90 , #86) 69 | - Fixes authentication and des service files 70 | - Added api to access descut services directly (see PR #87) 71 | - Fix readline issues related with some OS X (see #88 and #112) 72 | 73 | 74 | ## v1.3.1 75 | #### 2016-MAY-24 76 | - Adds optional purge to drop_table (see PR #74) 77 | - Fix a bug setting prefetch parameters using config set #76 78 | - Improve import functionality for inline queries (see PR #78) 79 | - Add option to upload files in chunk of memory (in addition to the chunk by rows), using --memsize option. 80 | This calculates an approximate number of rows to upload on each trip, avoiding memory issues. (see PR #79) 81 | 82 | ## v1.3.0 83 | #### 2016-MAY-09 84 | - Fixes a bug with python2/3 compatibility (see Pull-request #36) 85 | - Removes ; from query for python API (query_* functions) issue #37 86 | - Added execproc to run sql/pl procedures, see help execproc for more information (see PR #48) 87 | - Fix show_index and describe_table commands and queries (no more repeated rows) (see PR #42 and #43) 88 | - Case insensitive options (PR #45) 89 | - Reorganization of the structure of the code, this way is much simpler to develop (PR #49) 90 | - Conda installation 91 | - In-query python functions (beta) write your own function and call it from query#67 92 | - Query and execution information in FITS header #50 93 | - Fix bug when loading sql from file #55 94 | - Dealing better with trailing white space #52 95 | - Fix several minor issues: #37, #40 96 | - configuration options at command line (reset and configuration per session) with --config 97 | - Improved python API , added some extra functions (pandas_to_db) and imprived docs#68 98 | - Load big files in chunks -- very useful to load big files in chunks without running into memory issues #66 99 | - Fix find_tables (to include owners) and describe_table (data_type) commands 100 | - Added unittest suites #65 #70 101 | - Added --no_refresh option in command line to quick start up 102 | 103 | 104 | ## v1.2.0 105 | #### 2015-OCT-01 106 | - Fixes several issues, like: #35, #34, #30, #26 #25 among others 107 | - Does not clear screen after query 108 | - Added a message when password has not been set in the last 200 days (added extra columns to whoami as well) 109 | - Fix problem with connection after entering a wrong query which used to hangs in some networks 110 | - Bugfixes (show_index, change display setting without restarting, others) 111 | - Added explain option to see the execution plan, i.e. DESDB ~> SELECT * FROM TABLE; < explain 112 | - Added extra configuration option for column width (useful when printing large text columns) 113 | - Added optional user/password from command line (using --user and --password ) 114 | - Added size info to mytables command 115 | - Added append_tables option to append data to existing tables 116 | - Minor changes in uploading tables 117 | - **python 3 compatible** 118 | - **change config.ini folder, from .easyacess/ to .easyaccess (typo)** 119 | - move multiprocess import call depending on whether the loading bar is set 120 | 121 | ## v1.1.0 122 | #### 2015-APR-28 123 | - **Change pyfits to fitsio** 124 | - Added autocommit as configuration option to commit changes or not by default (default = yes) 125 | - Added timeout (1 min) to initial metadata cache loading 126 | 127 | ## v1.0.8 128 | #### 2015-APR-27 129 | - Fixed bug with single character for fits 130 | - Fix enconding issues (between ASCII and UTF-8) 131 | - Loading bar termination issue fixed 132 | - Added message after loading table for sharing tables 133 | - Started migratrion from pyfits to fitsio 134 | - Set max limit for output files in MB 135 | - Add python API features for key commands and bugfix (All commnands can still be accessed from python API) 136 | 137 | ## v1.0.7 138 | #### 2015-MAR-31 139 | - Null and NaN values fixed and customizable (config nullvalue set -9999) It will print 'Null' on the screen but nullvalue on the files 140 | - Added a SQL syntax checker ( ; < check) 141 | - Reformatted help 142 | 143 | ## v1.0.6 144 | #### 2015-MAR-25 145 | - Background with Ctrl-Z, bg and fg 146 | - optional loading bar from config.ini file (default = yes) 147 | - Connections trials added and error information when not connected 148 | - print version at startup 149 | - Number of rows received when running queries into a file 150 | - Added command to change parameters from config.ini file from inside easyaccess (config) 151 | - Fix issue with describe_table in latest DB schemas 152 | 153 | ## v1.0.5 154 | #### 2015-MAR-20 155 | - version 1.0.4 was skipped to be in sync with pip 156 | - Added optional color in config file (default = yes) 157 | - Added quiet option for initialization (easyaccess -q) 158 | - command to open an online tutorial (online_tutorial) 159 | 160 | ## v1.0.3 161 | #### 2015-MAR-18 162 | - Fix an installation bug 163 | 164 | ## v1.0.2 165 | #### 2015-MAR-18 166 | - Added DES Logo 167 | - Loading bar 168 | - Check whether des service file is read/write only by user, otherwise it change access mode 169 | - Added comments from table to describe_table 170 | - Added command to add comments to tables and columns (add_comment) 171 | - Fix minor issues and improve formatting 172 | 173 | 174 | ## v1.0.1 175 | #### 2015-FEB-20 176 | - Fix a bug at exit after error 177 | - Added a checker of DES_SERVICES files and prompt user/password when file doesn't exist 178 | - Minor bugs 179 | 180 | ## v1.0.0 181 | #### 2015-FEB-17 182 | - release 183 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at mgckind@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to `easyaccess` 2 | 3 | 4 | When contributing to this repository, please first discuss the change you wish to make via [issues](https://github.com/mgckind/easyaccess/issues), 5 | [email](mailto:mgckind@gmail.com), or any other method with the owners of this repository before making a change. 6 | 7 | Please note we have a [code of conduct](CODE_OF_CONDUCT.md) for this project, please follow it in all your interactions with the project. 8 | 9 | ## Reporting Bugs 10 | 11 | When creating a bug or an issue please include as many details as possible. Create your issues [here](https://github.com/mgckind/easyaccess/issues) and follow this steps: 12 | 13 | - Check [previous closed issues](https://github.com/mgckind/easyaccess/issues?q=is%3Aissue+is%3Aclosed) , [current open issues](https://github.com/mgckind/easyaccess/issues?q=is%3Aopen+is%3Aissue) or our FAQ for a list of common questions or previously solved problems 14 | - Use a clear and descriptive title for the issue to identify the problem. 15 | - Provide all information about OS 16 | - Provide information about the installation process (source, pip, conda, Docker, etc) 17 | - software version including Python 18 | - Detailed information on how to reproduce the problem 19 | - Provide screenshots when useful 20 | 21 | ## Other contributions 22 | 23 | All kind of contributions are welcome, from fixing bugs, resolving issues or suggesting features. Look at the [current issues](https://github.com/mgckind/easyaccess/issues) to see how you can contribute and provide help. 24 | 25 | ## Pull Request Process 26 | 27 | 1. Ensure any install or build dependencies are removed before the end of the layer when doing a 28 | build. 29 | 2. Update the README.md with details of changes to the interface, this includes new environment 30 | variables, exposed ports, useful file locations and container parameters. 31 | 3. Increase the version numbers in any examples files and the README.md to the new version that this 32 | Pull Request would represent. The versioning scheme we use is [SemVer](http://semver.org/). 33 | 4. You may merge the Pull Request in once you have the sign-off of two other developers, or if you 34 | do not have permission to do that, you may request the second reviewer to merge it for you. 35 | 36 | ## Authors 37 | 38 | Please see our [list of contributors](AUTHORS.md), if you feel you have contributed to the project and we have neglected adding you to the list please accept our apologizes and [let us know](mailto:mgckind@gmail.com) to correct it. 39 | 40 | 41 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM oraclelinux:7-slim as oracle 2 | 3 | RUN curl -o /etc/yum.repos.d/public-yum-ol7.repo https://yum.oracle.com/public-yum-ol7.repo && \ 4 | yum-config-manager --enable ol7_oracle_instantclient && \ 5 | yum -y install oracle-instantclient18.3-basic 6 | 7 | FROM ubuntu:20.04 8 | 9 | # ORACLE DB Client installation (https://oracle.github.io/odpi/doc/installation.html#oracle-instant-client-zip) 10 | ENV PATH=$PATH:/usr/lib/oracle/18.3/client64/bin 11 | ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/oracle/18.3/client64/lib:/usr/lib 12 | COPY --from=oracle /usr/lib/oracle/ /usr/lib/oracle 13 | COPY --from=oracle /lib64/libaio.so.1 /usr/lib 14 | 15 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ 16 | python3-pip \ 17 | libaio1 \ 18 | wget \ 19 | imagemagick \ 20 | stiff \ 21 | mpich \ 22 | libopenmpi-dev \ 23 | openssh-client \ 24 | unzip \ 25 | && rm -rf /var/lib/apt/lists/* 26 | 27 | ARG UID=1001 28 | RUN echo "Building image with \"worker\" user ID: ${UID}" 29 | RUN useradd --create-home --shell /bin/bash worker --uid ${UID} 30 | 31 | # Copy easyaccess and compile: 32 | WORKDIR /home/worker 33 | COPY --chown=worker:worker ./ ./easyaccess 34 | WORKDIR /home/worker/easyaccess 35 | RUN python3 setup.py install 36 | -------------------------------------------------------------------------------- /FAQ.md: -------------------------------------------------------------------------------- 1 | ## Frequently Asked Questions 2 | 3 | Here we collect a list of FAQ related to installation, usage, etc. Please refer to the [issues page](https://github.com/mgckind/easyaccess/issues) for more information. 4 | 5 | - **Q: Installation with `pip` went smoothly, but where was `easyaccess` installed?** 6 | - A: When installed using pip, one can use the following: `pip show -f easyaccess` to check the installation folders. 7 | - **Q: When I try to connect I get the error: `ORA-21561: OID generation failed`. How do I solve this?** 8 | - A: Most of the time this problem can be solved by adding the name of your computer in the `/etc/hosts` file, next to the line that says `127.0.0.1 localhost`. Add the name of your computer (i.e. the output of the `hostname` command) to that line, so it looks like `127.0.0.1 localhost `. 9 | - **Q: Where is the configuration file and authentication file?** 10 | - A: By default, the configuration file is located at `$HOME/.easyaccess/config.ini` and the authentication file is at `$HOME/.desservices.ini` or can be set at the environment variable `$DES_SERVICES`. 11 | - **Q: I am a DES Collaborator, where can I reset my credentials?** 12 | - A: For collaborators only: Please use [this](https://des.ncsa.illinois.edu/help) form. 13 | - **Q: The client hangs after getting an Oracle error, and I need to close to the window. Can I fix this?** 14 | - A: This is a long-standing [issue](https://github.com/mgckind/easyaccess/issues/130) that we haven't been able to fix or reproduce on all the systems. Please report it on [issue #130](https://github.com/mgckind/easyaccess/issues/130) and add details about your OS and easyaccess version. We'd recommend a clean conda installation, which sometimes fixes the issue. 15 | - **Q: How can I install the Oracle client myself?** 16 | - A: Please follow the instructions posted [here](https://www.oracle.com/technetwork/database/database-technologies/instant-client/overview/index.html). 17 | - **Q: Are there other requirements besides python modules and Oracle clients?** 18 | - A: Usually not, for new OS or Virtual Machines you'd need to install `libaio` and `libbz2`. 19 | - **Q: How can I contribute to the project?** 20 | - A: Contributions are welcome! Please take a look at our [Code of Conduct](CODE_OF_CONDUCT.md) and [Contribution guide](CONTRIBUTING.md). 21 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | University of Illinois/NCSA Open Source License 2 | Copyright (c) 2015 University of Illinois at Urbana-Champaign 3 | All rights reserved. 4 | 5 | Developed by: Matias Carrasco Kind & DESDM group 6 | NCSA/University of Illinois 7 | https://des.ncsa.illinois.edu/ 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 10 | 11 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers. 12 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution. 13 | Neither the names Matias Carrasco Kind & DESDM group, NCSA/University of Illinois , nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission. 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. 15 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | include *.md 3 | include easyaccess 4 | recursive-include doc *.* 5 | recursive-include data *.* 6 | recursive-include tests *.* 7 | recursive-include config *.* 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # easyaccess latest release License ![](https://img.shields.io/conda/v/mgckind/easyaccess.svg) [![DOI](http://joss.theoj.org/papers/10.21105/joss.01022/status.svg)](https://doi.org/10.21105/joss.01022) 2 | 3 | Enhanced command line SQL interpreter client for astronomical surveys. 4 | ![help_screen](data/help.gif) 5 | 6 | ## Description 7 | `easyaccess` is an enhanced command line interpreter and Python package created to facilitate access to astronomical catalogs stored in SQL Databases. It provides a custom interface with custom commands and was specifically designed to access data from the Dark Energy Survey Oracle database, including autocompletion of tables, columns, users and commands, simple ways to upload and download tables using csv, fits and HDF5 formats, iterators, search and description of tables among others. It can easily be extended to another surveys or SQL databases. The package was completely written in Python and support customized addition of commands and functionalities. 8 | 9 | For a short tutorial check [here](http://matias-ck.com/easyaccess) 10 | 11 | **Current version = 1.4.7** 12 | 13 | #### DES DR1/DR2 users 14 | For DES public data release, you can start `easyaccess` with: 15 | 16 | easyaccess -s desdr 17 | 18 | To create an account click [here](https://des.ncsa.illinois.edu/easyweb/signup/). 19 | 20 | ## Requirements 21 | 22 | - [Oracle Client](https://www.oracle.com/technetwork/database/database-technologies/instant-client/overview/index.html) > 11g.2 (External library, no python) 23 | Check [here](https://www.oracle.com/technetwork/database/database-technologies/instant-client/overview/index.html) for instructions on how to install these libraries 24 | - [cx_Oracle](https://cx-oracle.readthedocs.io/en/latest/index.html) 25 | - Note that cx_Oracle needs libaio on some Linux systems 26 | - Note that cx_Oracle needs libbz2 on some Linux systems 27 | - [fitsio](https://github.com/esheldon/fitsio) == 1.0.5 28 | - [pandas](http://pandas.pydata.org/) >= 0.14 29 | - [numpy](https://docs.scipy.org/doc/numpy-1.15.1/reference/index.html) 30 | - [termcolor](https://pypi.python.org/pypi/termcolor) 31 | - [PyTables](http://pytables.github.io/) (optional, for hdf5 output) 32 | - [future](http://python-future.org/) (for python 2/3 compatibility) 33 | - [requests](http://docs.python-requests.org/en/master/) 34 | - [gnureadline](https://github.com/ludwigschwardt/python-gnureadline) (optional, for better console behavior in OS X) 35 | 36 | ## Installation 37 | 38 | Installing `easyaccess` can be a little bit tricky given the external libraries required, in particular the Oracle libraries which are free to use. If you are primarily interested in *using* the `easyaccess` client, we recommend building and running the Docker image as described below. 39 | 40 | ### Docker 41 | 42 | Building and running `easyaccess` in Docker is easy. Run the `docker build` command followed by a `docker run` command as shown here: 43 | 44 | ``` 45 | $ docker build -t easyaccessclient:latest . 46 | $ docker run -it --rm easyaccessclient:latest easyaccess -s desdr 47 | 48 | Enter username : 49 | Enter password : 50 | Connecting to DB ** desdr ** ... 51 | Loading metadata into cache... 52 | _______ 53 | \ \ 54 | // / . .\ 55 | // / . _\ 56 | // / . / // 57 | \\ \ . / // 58 | \\ \_____/ // 59 | \\_______// DARK ENERGY SURVEY 60 | `-------` DATA MANAGEMENT 61 | 62 | easyaccess 1.4.8-dev. The DESDM Database shell. 63 | 64 | _________ 65 | DESDR ~> SELECT RA, DEC, MAG_AUTO_G, TILENAME FROM DR2_MAIN sample(0.001) FETCH FIRST 5 ROWS ONLY ; 66 | 67 | 68 | RA DEC MAG_AUTO_G TILENAME 69 | 1 13.142238 -43.729656 21.594194 DES0053-4331 70 | 2 13.023884 -58.278531 24.332121 DES0053-5831 71 | 3 13.103845 -62.469223 23.710896 DES0053-6248 72 | 4 13.057452 -18.648209 27.200878 DES0051-1832 73 | 5 13.095345 -38.464359 25.606516 DES0050-3832 74 | 75 | ``` 76 | 77 | ### Source Installation 78 | 79 | `easyaccess` is based heavily on the Oracle python client `cx_Oracle`, you can follow the installation instructions from [here](https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html#quick-start-cx-oracle-installation). For `cx_Oracle` to work, you will need the Oracle Instant Client packages which can be obtained from [here](https://www.oracle.com/technetwork/database/database-technologies/instant-client/overview/index.html). 80 | 81 | Make sure you have these libraries installed before proceeding to the installation of easyaccess, you can try by opening a Python interpreter and type: 82 | 83 | import cx_Oracle 84 | 85 | If you have issues, please check the [Troubleshooting page](https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html#troubleshooting) or our [FAQ page](FAQ.md). 86 | 87 | You can clone this repository and install `easyaccess` with: 88 | 89 | python setup.py install 90 | 91 | ## FAQ 92 | We have a running list of [FAQ](FAQ.md) which we will constantly update, please check [here](FAQ.md). 93 | 94 | ## Contributing 95 | Please take a look st our [Code of Conduct](CODE_OF_CONDUCT.md) and or [contribution guide](CONTRIBUTING.md). 96 | 97 | 98 | ## Citation 99 | If you use `easyaccess` in your work we would encourage to use this reference [https://arxiv.org/abs/1810.02721](https://arxiv.org/abs/1810.02721) or copy/paste this BibTeX: 100 | ``` 101 | @ARTICLE{2018arXiv181002721C, 102 | author = {{Carrasco Kind}, M. and {Drlica-Wagner}, A. and {Koziol}, A.~M.~G. and 103 | {Petravick}, D.}, 104 | title = "{easyaccess: Enhanced SQL command line interpreter for astronomical surveys}", 105 | journal = {arXiv e-prints}, 106 | keywords = {Astrophysics - Instrumentation and Methods for Astrophysics}, 107 | year = 2018, 108 | month = Oct, 109 | eid = {arXiv:1810.02721}, 110 | pages = {arXiv:1810.02721}, 111 | archivePrefix = {arXiv}, 112 | eprint = {1810.02721}, 113 | primaryClass = {astro-ph.IM}, 114 | adsurl = {https://ui.adsabs.harvard.edu/\#abs/2018arXiv181002721C}, 115 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 116 | } 117 | ``` 118 | 119 | 120 | ## Usage 121 | 122 | For a short tutorial and documentation see [here](http://matias-ck.com/easyaccess), note that not all the features are available for the public use, i.e., DR1 users. 123 | 124 | ### Some *great* features 125 | - Nice output format (using pandas) 126 | - Very flexible configuration 127 | - Smart tab autocompletion for commands, table names, column names, and file paths 128 | - Write output results to CSV, TAB, FITS, or HDF5 files 129 | - Load tables from CSV, FITS or HDF5 files directly into DB (memory friendly by using number of rows or memory limit) 130 | - Intrinsic DB commands to describe tables, schema, quota, and more 131 | - Easyaccess can be imported as module from Python with a complete Python API 132 | - Run commands directly from command line 133 | - Load SQL queries from a file and/or from the editor 134 | - Show the execution plan of a query if needed 135 | - Python functions can be run in a inline query 136 | 137 | 138 | #### Interactive interpreter 139 | 140 | Assuming that ```easyaccess``` is in your path, you can enter the interactive interpreter by calling ```easyaccess``` without any command line arguments: 141 | 142 | easyaccess 143 | 144 | #### Running SQL commands 145 | Once inside the interpreter run SQL queries by adding a ";" at the end:: 146 | 147 | DESDB ~> select ... from ... where ... ; 148 | 149 | To save the results into a table add ">" after the end of the query (after ";") and namefile at the end of line 150 | 151 | DESDB ~> select ... from ... where ... ; > test.fits 152 | 153 | The file types supported so far are: .csv, .tab, .fits, and .h5. Any other extension is ignored. 154 | 155 | #### Load tables 156 | To load a table it needs to be in a csv format with columns names in the first row 157 | the name of the table is taken from filename or with optional argument --tablename 158 | 159 | DESDB ~> load_table --tablename --chunksize --memsize 160 | 161 | The --chunsize and --memsize are optional arguments to facilitate uploading big files. 162 | 163 | #### Load SQL queries 164 | To load SQL queries just run: 165 | 166 | DESDB ~> loadsql 167 | or 168 | 169 | DESDB ~> @filename.sql 170 | 171 | The query format is the same as the interpreter, SQL statement must end with ";" and to write output files the query must be followed by " > " 172 | 173 | #### Configuration 174 | 175 | The configuration file is located at ```$HOME/.easyaccess/config.ini``` but everything can be configured from inside easyaccess type: 176 | 177 | DESDB ~> help config 178 | 179 | to see the meanings of all the options, and: 180 | 181 | DESDB ~> config all show 182 | 183 | to see the current values, to modify one value, e.g., the prefetch value 184 | 185 | DESDB ~> config prefetch set 50000 186 | 187 | and to see any particular option (e.g., timeout): 188 | 189 | DESDB ~> config timeout show 190 | 191 | #### Command line usage 192 | 193 | Much of the functionality provided through the interpreter is also available directly from the command line. To see a list of command-line options, use the ```--help``` option 194 | 195 | easyaccess --help 196 | 197 | ## Architecture 198 | 199 | We have included a simplified UML diagram describing the architecture and dependencies of `easyaccess` which shows only the different methods for a given class and the name of the file hosting a given class. The main class, `easy_or()`, inherits all methods from all different subclasses, making this model flexible and extendable to other surveys or databases. These methods are then converted to command line commands and functions that can be called inside `easyaccess`. Given that there are some DES specific functions, we have moved DES methods into a separate class `DesActions()`. 200 | 201 | ![`easyaccess` architecture diagram](paper/classes_simple.png) 202 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # To-do list 2 | 3 | - run multiple queries from a sql file 4 | - add exec like for procedures, add do_execute(self, procedure, args) 5 | - deal with date time variables 6 | - argparser argument groups 7 | - add functionality for grabbing images (SE or COADD) by object or ra,dec 8 | - parse config into easy_or object more automatically 9 | - Only catch exceptions at top level (cmdloop); raise everywhere else. 10 | - Add '--debug' command to print more output/raise exceptions 11 | - Select item from history into th buffer 12 | - Submit query to a batch job manager 13 | -------------------------------------------------------------------------------- /bin/easyaccess: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import easyaccess as ea 3 | ea.cli() 4 | -------------------------------------------------------------------------------- /config/conda/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: easyaccess 3 | version: "1.4.7" 4 | 5 | source: 6 | git_rev: 1.4.7 7 | git_url: https://github.com/mgckind/easyaccess.git 8 | 9 | build: 10 | number: 0 11 | preserve_egg_dir: True 12 | no_link: 13 | - bin/* 14 | script: python setup.py install --single-version-externally-managed --record=record.txt 15 | #noarch_python: True 16 | 17 | 18 | requirements: 19 | build: 20 | - python 21 | - future >=0.15.0 22 | - requests 23 | - pandas 24 | - termcolor >=1.1.0 25 | - fitsio >=0.9.11 26 | - oracle-instantclient ==11.2.0.4.0 27 | - cx_oracle ==5.3 28 | - numpy >=1.13.0 29 | - python-dateutil 30 | 31 | run: 32 | - python 33 | - future >=0.15.0 34 | - requests 35 | - pandas 36 | - termcolor >=1.1.0 37 | - fitsio >=0.9.11 38 | - oracle-instantclient ==11.2.0.4.0 39 | - cx_oracle ==5.3 40 | - numpy >=1.13.0 41 | - python-dateutil 42 | 43 | test: 44 | # Python imports 45 | imports: 46 | - easyaccess 47 | - cx_Oracle 48 | - fitsio 49 | 50 | about: 51 | home: https://github.com/mgckind/easyaccess 52 | license: University of Illinois/NCSA Open Source License 53 | license_file: LICENSE.txt 54 | summary: 'Python command line interpreter to access DES Oracle database' 55 | 56 | # See 57 | # http://docs.continuum.io/conda/build.html for 58 | # more information about meta.yaml 59 | -------------------------------------------------------------------------------- /data/help.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mgckind/easyaccess/c1cf082123094ec6435fb989d863825d294e461e/data/help.gif -------------------------------------------------------------------------------- /data/help_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mgckind/easyaccess/c1cf082123094ec6435fb989d863825d294e461e/data/help_screenshot.png -------------------------------------------------------------------------------- /easyaccess/__init__.py: -------------------------------------------------------------------------------- 1 | from .easyaccess import * 2 | from .version import __version__ 3 | from .eautils import db_api as api 4 | from .eautils.python_api import * 5 | 6 | version = __version__ 7 | __all__ = ["eautils", "config_ea", "easyaccess", "version", "eaparser"] 8 | -------------------------------------------------------------------------------- /easyaccess/config_ea.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | # For compatibility with old python 4 | try: 5 | from builtins import input, range 6 | import configparser 7 | except ImportError: 8 | from __builtin__ import raw_input, range 9 | import ConfigParser as configparser 10 | import getpass 11 | import sys 12 | import cx_Oracle 13 | import os 14 | import re 15 | 16 | # raw_input only exists in python 2. This will take care of it 17 | try: 18 | input = raw_input 19 | except NameError: 20 | pass 21 | 22 | 23 | configcomment = """# 24 | # Easyaccess default parameters 25 | # 26 | # database : Default is dessci, change to desoper, destest, desdr and others 27 | # Make sure the db-"database" section is in the .desservices.ini 28 | # editor : Default editor to open from inside easyaccess if $EDITOR is not set 29 | # prefetch : Prefetch number of rows to get from oracle (not the number of total rows) 30 | # This determine the number of trips to the DB to get all results from query 31 | # (default: 30000) 32 | # histcache : The number of line in the history cache (when possible) 33 | # timeout : The time in seconds before closing a connection for a query to print on screen 34 | # If the results are redirected to a file there is not a timeout (default 20 min) 35 | # nullvalue : The value used to replace null or empty entries when printing into a file 36 | # outfile_max_mb : Max size of each fits file in MB (default 1GB) 37 | # compression : Toggles compression on output files (default no) 38 | # autocommit : Auto commit changes in DB (default yes) 39 | # trim_whitespace : Trim whitespace from strings when uploading data to the DB (default yes) 40 | # desdm_coldefs : Use DESDM DB compatible data types when uploading data (default yes) 41 | 42 | # Display default parameters 43 | # 44 | # color_terminal : Display colors in terminal (default yes) 45 | # loading_bar : Display a loading bar when querying the DB (default yes) 46 | # max_rows : Max number of rows to display on the screen. 47 | # Doesn't apply to output files (default 2500) 48 | # width : Width of the output format on the screen (default 1000) 49 | # max_columns : Max number of columns to display on the screen. 50 | # Doesn't apply to output files (default 50) 51 | # max_colwidth : Max number of characters per column at display. 52 | # Doesn't apply to output files (def. 500) 53 | """ 54 | 55 | descomment = """# 56 | # DES services configuration 57 | # Please modify the passwords accordingly 58 | # 59 | """ 60 | 61 | 62 | def get_config(configfile): 63 | """ 64 | Loads config file or create one if not 65 | Returns a configParser object 66 | """ 67 | config = configparser.ConfigParser() 68 | configwrite = False 69 | check = config.read(configfile) 70 | if check == []: 71 | configwrite = True 72 | print('\nCreating a configuration file... at %s\n' % configfile) 73 | # 74 | # easyaccess section 75 | # 76 | if not config.has_section('easyaccess'): 77 | configwrite = True 78 | config.add_section('easyaccess') 79 | if not config.has_option('easyaccess', 'database'): 80 | configwrite = True 81 | config.set('easyaccess', 'database', 'dessci') 82 | if not config.has_option('easyaccess', 'editor'): 83 | configwrite = True 84 | config.set('easyaccess', 'editor', 'nano') 85 | if not config.has_option('easyaccess', 'prefetch'): 86 | configwrite = True 87 | config.set('easyaccess', 'prefetch', '30000') 88 | if not config.has_option('easyaccess', 'histcache'): 89 | configwrite = True 90 | config.set('easyaccess', 'histcache', '5000') 91 | if not config.has_option('easyaccess', 'timeout'): 92 | configwrite = True 93 | config.set('easyaccess', 'timeout', '1200') 94 | if not config.has_option('easyaccess', 'nullvalue'): 95 | configwrite = True 96 | config.set('easyaccess', 'nullvalue', '-9999') 97 | if not config.has_option('easyaccess', 'outfile_max_mb'): 98 | configwrite = True 99 | config.set('easyaccess', 'outfile_max_mb', '1000') 100 | if not config.has_option('easyaccess', 'autocommit'): 101 | configwrite = True 102 | config.set('easyaccess', 'autocommit', 'yes') 103 | if not config.has_option('easyaccess', 'compression'): 104 | configwrite = True 105 | config.set('easyaccess', 'compression', 'no') 106 | if not config.has_option('easyaccess', 'trim_whitespace'): 107 | configwrite = True 108 | config.set('easyaccess', 'trim_whitespace', 'yes') 109 | if not config.has_option('easyaccess', 'desdm_coldefs'): 110 | configwrite = True 111 | config.set('easyaccess', 'desdm_coldefs', 'yes') 112 | # 113 | # display section 114 | # 115 | if not config.has_section('display'): 116 | configwrite = True 117 | config.add_section('display') 118 | if not config.has_option('display', 'color_terminal'): 119 | configwrite = True 120 | config.set('display', 'color_terminal', 'yes') 121 | if not config.has_option('display', 'loading_bar'): 122 | configwrite = True 123 | config.set('display', 'loading_bar', 'yes') 124 | if not config.has_option('display', 'max_rows'): 125 | configwrite = True 126 | config.set('display', 'max_rows', '2500') 127 | if not config.has_option('display', 'width'): 128 | configwrite = True 129 | config.set('display', 'width', '1000') 130 | if not config.has_option('display', 'max_columns'): 131 | configwrite = True 132 | config.set('display', 'max_columns', '50') 133 | if not config.has_option('display', 'max_colwidth'): 134 | configwrite = True 135 | config.set('display', 'max_colwidth', '500') 136 | 137 | check = True 138 | if configwrite: 139 | check = write_config(configfile, config) 140 | config.read(configfile) 141 | if check: 142 | return config 143 | 144 | 145 | def write_config(configfile, config_ob): 146 | """ 147 | Writes configuration file 148 | """ 149 | try: 150 | F = open(configfile, 'w') 151 | F.write(configcomment + '\n') 152 | config_ob.write(F) 153 | F.flush() 154 | F.close() 155 | return True 156 | except: 157 | print("Problems writing the configuration file %s" % configfile) 158 | return False 159 | 160 | 161 | def get_desconfig(desfile, db, verbose=True, user=None, pw1=None): 162 | """ 163 | Loads des config file or create one if it does not exist. 164 | """ 165 | server_desoper = 'desdb-oper.ncsa.illinois.edu' 166 | server_dessci = 'desdb-sci.ncsa.illinois.edu' 167 | server_public = 'desdb-dr.ncsa.illinois.edu' 168 | port_n = '1521' 169 | 170 | if not db[:3] == 'db-': 171 | db = 'db-' + db 172 | 173 | config = configparser.ConfigParser() 174 | configwrite = False 175 | check = config.read(desfile) 176 | if check == []: 177 | configwrite = True 178 | if verbose: 179 | print('\nError in DES_SERVICES config file, creating a new one...') 180 | if verbose: 181 | print('File might not exists or is not configured correctly') 182 | if verbose: 183 | print() 184 | 185 | databases = ['db-dessci', 'db-desdr', 'db-desoper'] 186 | 187 | if db not in databases and not config.has_section(db): 188 | msg = '\nDatabase entered is not in %s '%databases 189 | msg += 'or in DES_SERVICE file, continue anyway? [y]/n\n' 190 | check_db = input(msg) 191 | if check_db.lower() in ('n', 'no'): 192 | sys.exit(0) 193 | 194 | # Add the default databases 195 | if not config.has_section(db): 196 | if verbose: 197 | print('\nAdding section %s to DES_SERVICES file\n' % db) 198 | configwrite = True 199 | if db == 'db-dessci': 200 | kwargs = {'host': server_dessci, 'port': port_n, 'service_name': 'dessci'} 201 | elif db == 'db-desdr': 202 | kwargs = {'host': server_public, 'port': port_n, 'service_name': 'desdr'} 203 | elif db == 'db-desoper': 204 | kwargs = {'host': server_desoper, 'port': port_n, 'service_name': 'desoper'} 205 | else: 206 | kwargs = {'host': server_desoper, 'port': port_n, 'service_name': db[3:]} 207 | dsn = cx_Oracle.makedsn(**kwargs) 208 | good = False 209 | if user is None: 210 | for i in range(3): 211 | try: 212 | user = input('Enter username : ') 213 | pw1 = getpass.getpass(prompt='Enter password : ') 214 | ctemp = cx_Oracle.connect(user, pw1, dsn=dsn) 215 | good = True 216 | break 217 | except: 218 | (type, value, traceback) = sys.exc_info() 219 | print() 220 | if value.args[0].code == 28001: 221 | print("ORA-28001: the password has expired or cannot be the default one") 222 | print("Need to create a new password\n") 223 | password = pw1 224 | pw1 = getpass.getpass(prompt='Enter new password:') 225 | if re.search('\W', pw1): 226 | print("\nPassword contains whitespace, not set\n") 227 | sys.exit(0) 228 | if not pw1: 229 | print("\nPassword cannot be blank\n") 230 | sys.exit(0) 231 | pw2 = getpass.getpass(prompt='Re-Enter new password:') 232 | if pw1 != pw2: 233 | print("Passwords don't match, not set\n") 234 | sys.exit(0) 235 | try: 236 | ctemp = cx_Oracle.connect(user, password, dsn=dsn, newpassword=pw1) 237 | good = True 238 | break 239 | except: 240 | print('\n Check your credentials and/or database access\n') 241 | sys.exit(0) 242 | print(value) 243 | if value.args[0].code == 1017: 244 | pass 245 | if value.args[0].code == 12514: 246 | print("Check that database '%s' exists\n"%db) 247 | sys.exit(0) 248 | else: 249 | sys.exit(0) 250 | if good: 251 | ctemp.close() 252 | else: 253 | if verbose: 254 | print('\n Check your credentials and/or database access\n') 255 | sys.exit(0) 256 | config.add_section(db) 257 | 258 | if not config.has_option(db, 'user'): 259 | configwrite = True 260 | config.set(db, 'user', user) 261 | if not config.has_option(db, 'passwd'): 262 | configwrite = True 263 | config.set(db, 'passwd', pw1) 264 | if db == 'db-dessci': 265 | if not config.has_option(db, 'name'): 266 | configwrite = True 267 | config.set(db, 'name', 'dessci') 268 | if not config.has_option(db, 'server'): 269 | configwrite = True 270 | config.set(db, 'server', server_dessci) 271 | elif db == 'db-desoper': 272 | if not config.has_option(db, 'name'): 273 | configwrite = True 274 | config.set(db, 'name', 'desoper') 275 | if not config.has_option(db, 'server'): 276 | configwrite = True 277 | config.set(db, 'server', server_desoper) 278 | elif db == 'db-desdr': 279 | if not config.has_option(db, 'name'): 280 | configwrite = True 281 | config.set(db, 'name', 'desdr') 282 | if not config.has_option(db, 'server'): 283 | configwrite = True 284 | config.set(db, 'server', server_public) 285 | else: 286 | if not config.has_option(db, 'name'): 287 | configwrite = True 288 | config.set(db, 'name', db[3:]) 289 | if not config.has_option(db, 'server'): 290 | configwrite = True 291 | config.set(db, 'server', server_desoper) 292 | if not config.has_option(db, 'port'): 293 | configwrite = True 294 | config.set(db, 'port', port_n) 295 | 296 | check = True 297 | if configwrite: 298 | check = write_desconfig(desfile, config) 299 | config.read(desfile) 300 | if check: 301 | return config 302 | 303 | 304 | def write_desconfig(configfile, config_ob): 305 | """ 306 | Writes configuration file 307 | """ 308 | try: 309 | F = open(configfile, 'w') 310 | F.write(descomment + '\n') 311 | config_ob.write(F) 312 | F.flush() 313 | F.close() 314 | os.chmod(configfile, 2 ** 8 + 2 ** 7) # rw------- 315 | return True 316 | except: 317 | print("Problems writing the configuration file %s" % configfile) 318 | return False 319 | -------------------------------------------------------------------------------- /easyaccess/eaparser.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | from . import config_ea as config_mod 4 | from .version import __version__ 5 | import sys 6 | import os 7 | 8 | 9 | class MyParser(argparse.ArgumentParser): 10 | def error(self, message): 11 | print("\n*****************") 12 | sys.stderr.write("error: %s \n" % message) 13 | print("*****************\n") 14 | self.print_help() 15 | sys.exit(2) 16 | 17 | 18 | def get_args(config_file): 19 | conf = config_mod.get_config(config_file) 20 | parser = MyParser( 21 | description="Easy access to the DES database. There is a configuration file " 22 | "located in %s for more customizable options" % config_file 23 | ) 24 | parser.add_argument( 25 | "-v", "--version", action="store_true", help="print version number and exit" 26 | ) 27 | parser.add_argument( 28 | "-c", "--command", dest="command", help="Executes command and exit" 29 | ) 30 | parser.add_argument( 31 | "-l", 32 | "--loadsql", 33 | dest="loadsql", 34 | help="Loads a sql command, execute it and exit", 35 | ) 36 | parser.add_argument( 37 | "-lt", 38 | "--load_table", 39 | dest="loadtable", 40 | help="Loads data from a csv, tab, or fits formatted file \ 41 | into a DB table using the filename as the table name or a custom \ 42 | name with --tablename MYTABLE. Not available in desdr.", 43 | ) 44 | parser.add_argument( 45 | "-at", 46 | "--append_table", 47 | dest="appendtable", 48 | help="Appends data from a csv, tab, or fits formatted file \ 49 | into a DB table using the filename as the table name or a custom \ 50 | name with --tablename MYABLE. Not available in desdr.", 51 | ) 52 | parser.add_argument( 53 | "--tablename", 54 | dest="tablename", 55 | help="Custom table name to be used with --load_table\ 56 | or --append_table. Not available in desdr.", 57 | ) 58 | parser.add_argument( 59 | "--chunksize", 60 | dest="chunksize", 61 | type=int, 62 | default=None, 63 | help="Number of rows to be inserted at a time. Useful for large files " 64 | "that do not fit in memory. Use with --load_table or --append_table", 65 | ) 66 | parser.add_argument( 67 | "--memsize", 68 | dest="memsize", 69 | type=int, 70 | default=None, 71 | help=" Size of chunk to be read at a time in Mb. Use with --load_table or " 72 | "--append_table", 73 | ) 74 | parser.add_argument( 75 | "-s", 76 | "--db", 77 | dest="db", 78 | help="Override database name [e.g., dessci, desoper, destest, desdr]", 79 | ) 80 | parser.add_argument( 81 | "-q", 82 | "--quiet", 83 | action="store_true", 84 | dest="quiet", 85 | help="Silence initialization, no loading bar", 86 | ) 87 | parser.add_argument("-u", "--user", dest="user") 88 | parser.add_argument("-p", "--password", dest="password") 89 | parser.add_argument( 90 | "-nr", 91 | "--no_refresh", 92 | dest="norefresh", 93 | action="store_true", 94 | help="Do not refresh metadata at starting up to speed initialization. " 95 | "Metadata can always be refreshed from inside using the " 96 | "refresh_metadata command", 97 | ) 98 | parser.add_argument( 99 | "--config", 100 | help="--config show, will print content of " 101 | "config file\n" 102 | "--config reset will reset config to default " 103 | "values\n" 104 | "--config set param1=val1 param2=val2 will " 105 | "modify parameters for the session only", 106 | nargs="+", 107 | ) 108 | args = parser.parse_args() 109 | 110 | if args.version: 111 | print("\nCurrent : easyaccess {:} \n".format(__version__)) 112 | sys.exit() 113 | 114 | if args.config: 115 | if args.config[0] == "show": 116 | print("\n Showing content of the config file (%s) :\n" % config_file) 117 | file_temp = open(config_file, "r") 118 | for line in file_temp.readlines(): 119 | print(line.strip()) 120 | file_temp.close() 121 | sys.exit() 122 | elif args.config[0] == "reset": 123 | print("\n ** Reset config file (%s) to its default!! **:\n" % config_file) 124 | check = input(" Proceed? (y/[n]) : ") 125 | if check.lower() == "y": 126 | os.remove(config_file) 127 | conf = config_mod.get_config(config_file) 128 | sys.exit() 129 | elif args.config[0] == "set": 130 | if len(args.config) == 1: 131 | parser.print_help() 132 | sys.exit() 133 | entries = ",".join(args.config[1:]) 134 | entries = entries.replace(",,", ",") 135 | entries = entries.split(",") 136 | for e in entries: 137 | if e == "": 138 | continue 139 | updated = False 140 | try: 141 | key, value = e.split("=") 142 | for section in conf.sections(): 143 | if conf.has_option(section, key): 144 | conf.set(section, key, str(value)) 145 | updated = True 146 | if not updated: 147 | raise 148 | except: 149 | print( 150 | "Check the key exists or that you included the '=' for the " 151 | "parameter\nFor more info use --help." 152 | ) 153 | sys.exit() 154 | else: 155 | parser.print_help() 156 | sys.exit() 157 | return args 158 | -------------------------------------------------------------------------------- /easyaccess/eautils/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["db_api", "des_logo", "dircache", "dtypes", 2 | "fileio", "fun_utils", "import_utils", "ea_utils", 3 | "cli_utils", "db_utils", "python_api", "des_utils"] 4 | -------------------------------------------------------------------------------- /easyaccess/eautils/cli_utils.py: -------------------------------------------------------------------------------- 1 | from easyaccess.eautils.ea_utils import * 2 | from easyaccess.version import last_pip_version 3 | from easyaccess.version import __version__ 4 | import os 5 | import sys 6 | import webbrowser 7 | 8 | try: 9 | from builtins import input, str, range 10 | except ImportError: 11 | from __builtin__ import input, str, range 12 | 13 | try: 14 | import readline 15 | readline_present = True 16 | try: 17 | import gnureadline as readline 18 | except ImportError: 19 | pass 20 | except ImportError: 21 | readline_present = False 22 | 23 | 24 | class CommandActions(object): 25 | def do_history(self, arg): 26 | """ 27 | Print the history buffer to the screen, oldest to most recent. 28 | IF argument n is present print the most recent N items. 29 | 30 | Usage: history [n] 31 | """ 32 | if readline_present: 33 | nall = readline.get_current_history_length() 34 | firstprint = 0 35 | if arg.strip(): 36 | firstprint = max(nall - int(arg), 0) 37 | for index in range(firstprint, nall): 38 | print(index, readline.get_history_item(index)) 39 | # if arg.strip(): 40 | # self.do_clear(None) 41 | # line = readline.get_history_item(int(arg)) 42 | # line = self.precmd(line) 43 | # self.onecmd(line) 44 | 45 | def do_shell(self, line): 46 | """ 47 | Execute shell commands, ex. shell pwd 48 | You can also use ! like !ls, or !pwd to access the shell 49 | 50 | Uses autocompletion after first command 51 | """ 52 | os.system(line) 53 | 54 | def do_clear(self, line): 55 | """ 56 | Clear screen. There is a shortcut by typing . on the interpreter 57 | """ 58 | # TODO: platform dependent 59 | # tmp = sp.call('clear', shell=True) 60 | sys.stdout.flush() 61 | if line is None: 62 | return 63 | try: 64 | tmp = os.system('clear') 65 | except: 66 | try: 67 | tmp = os.system('cls') 68 | except: 69 | pass 70 | 71 | def do_version(self, line): 72 | """ 73 | Print current and latest pip version of easyacccess 74 | """ 75 | last_version = last_pip_version() 76 | print() 77 | print(colored("Current version : easyaccess {}".format( 78 | __version__), "green", self.ct)) 79 | print(colored("Last pip version : easyaccess {}".format( 80 | last_version), "green", self.ct)) 81 | print() 82 | return 83 | 84 | def do_prefetch(self, line): 85 | """ 86 | Shows, sets or sets to default the number of prefetch rows from Oracle 87 | The default is 10000, increasing this number uses more memory but return 88 | data faster. Decreasing this number reduce memory but increases 89 | communication trips with database thus slowing the process. 90 | 91 | Usage: 92 | - prefetch show : Shows current value 93 | - prefetch set : Sets the prefetch to 94 | - prefetch default : Sets value to 10000 95 | """ 96 | line = "".join(line.split()) 97 | if line.find('show') > -1: 98 | print('\nPrefetch value = {:}\n'.format(self.prefetch)) 99 | elif line.find('set') > -1: 100 | val = line.split('set')[-1] 101 | if val != '': 102 | self.prefetch = int(val) 103 | self.config.set('easyaccess', 'prefetch', str(val)) 104 | self.writeconfig = True 105 | print('\nPrefetch value set to {:}\n'.format(self.prefetch)) 106 | elif line.find('default') > -1: 107 | self.prefetch = 30000 108 | self.config.set('easyaccess', 'prefetch', '30000') 109 | self.writeconfig = True 110 | print('\nPrefetch value set to default (30000) \n') 111 | else: 112 | print('\nPrefetch value = {:}\n'.format(self.prefetch)) 113 | 114 | 115 | def do_EOF(self, line): 116 | # Exit program on ^D (Ctrl+D) 117 | print() # For some reason this is missing... 118 | self.do_exit(line) 119 | 120 | def do_quit(self, line): 121 | self.do_exit(line) 122 | 123 | def do_select(self, line): 124 | self.default('select ' + line) 125 | 126 | def do_SELECT(self, line): 127 | self.default('SELECT ' + line) 128 | 129 | def do_clear_history(self, line): 130 | if readline_present: 131 | readline.clear_history() 132 | 133 | def do_online_tutorial(self, line): 134 | tut = webbrowser.open_new_tab( 135 | 'http://matias-ck.com/easyaccess/') 136 | del tut 137 | 138 | def _complete_tables(self, text): 139 | options_tables = self.cache_table_names 140 | if text: 141 | return [option for option in options_tables if option.startswith(text.upper())] 142 | else: 143 | return options_tables 144 | 145 | def _complete_colnames(self, text): 146 | options_colnames = self.cache_column_names 147 | if text: 148 | return [option for option in options_colnames if option.startswith(text.upper())] 149 | else: 150 | return options_colnames 151 | 152 | def complete_prefetch(self, text, line, start_index, end_index): 153 | if text: 154 | return [option for option in options_prefetch if option.startswith(text)] 155 | else: 156 | return options_prefetch 157 | 158 | def complete_shell(self, text, line, start_idx, end_idx): 159 | if line: 160 | line = ' '.join(line.split()[1:]) 161 | return complete_path(line) 162 | 163 | 164 | def complete_config(self, text, line, start_index, end_index): 165 | line2 = ' '.join(line.split()) 166 | args = line2.split() 167 | if text: 168 | if len(args) > 2: 169 | return [option for option in options_config2 if option.startswith(text)] 170 | else: 171 | return [option for option in options_config if option.startswith(text)] 172 | else: 173 | if len(args) > 1: 174 | return options_config2 175 | else: 176 | return options_config 177 | -------------------------------------------------------------------------------- /easyaccess/eautils/db_api.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | """ 3 | api client to access cutout services in DES for coadded images and Single Exposures 4 | """ 5 | __author__ = 'Matias Carrasco Kind' 6 | import easyaccess.config_ea as config_mod 7 | import os 8 | import stat 9 | import getpass 10 | import requests 11 | import time 12 | from easyaccess.eautils.python_api import connect 13 | 14 | DESFILE = os.getenv("DES_SERVICES") 15 | if not DESFILE: 16 | DESFILE = os.path.join(os.getenv("HOME"), ".desservices.ini") 17 | if os.path.exists(DESFILE): 18 | AMODE = stat.S_IMODE(os.stat(DESFILE).st_mode) 19 | if AMODE != 2 ** 8 + 2 ** 7: 20 | print('Changing permissions to des_service file to read/write only by user') 21 | os.chmod(DESFILE, 2 ** 8 + 2 ** 7) 22 | 23 | 24 | class Token(object): 25 | """ 26 | Token object that keeps track of tockes created to make requests 27 | """ 28 | 29 | def __init__(self, token, url): 30 | self.value = token 31 | self.url = url 32 | self._active = True 33 | 34 | def __repr__(self): 35 | return 'Token(token=%s, url=%s)' % (self.value, self.url) 36 | 37 | def __str__(self): 38 | return self.value 39 | 40 | def ttl(self): 41 | """ 42 | Time-To-Live (ttl) prints out the time in seconds for the current token 43 | """ 44 | req = self.url + '/api/token/?token=%s' % self.value 45 | temp = requests.get(req) 46 | if temp.json()['status'] == 'ok': 47 | self._active = True 48 | else: 49 | self._active = False 50 | print(temp.json()['message']) 51 | 52 | @property 53 | def active(self): 54 | """ 55 | Checks whether the token is still active and valid 56 | """ 57 | req = self.url + '/api/token/?token=%s' % self.value 58 | temp = requests.get(req) 59 | if temp.json()['status'] == 'ok': 60 | self._active = True 61 | else: 62 | self._active = False 63 | return self._active 64 | 65 | 66 | class Job(object): 67 | def __init__(self, jobid, user, token, url): 68 | self._jobid = jobid 69 | self._user = user 70 | self._token = token 71 | self._url = url 72 | 73 | def __repr__(self): 74 | return 'Job(jobid=%s, user=%s, token=%s, url=%s)' % (self._jobid, 75 | self._user, self._token, self._url) 76 | 77 | def __str__(self): 78 | return self._jobid 79 | 80 | @property 81 | def status(self): 82 | req = self._url + '/api/jobs/?token=%s&jobid=%s' % (self._token, self._jobid) 83 | temp = requests.get(req) 84 | self.req_status = temp.json()['status'] 85 | self.message = temp.json()['message'] 86 | if temp.json()['status'] == 'ok': 87 | self._status = temp.json()['job_status'] 88 | if temp.json()['job_status'] == 'SUCCESS': 89 | self.links = temp.json()['links'] 90 | else: 91 | self._status = 'Error' 92 | return self._status 93 | 94 | def __delete__(self): 95 | req = self._url + '/api/jobs/?token=%s&jobid=%s' % (self._token, self._jobid) 96 | temp = requests.delete(req) 97 | if temp.json()['status'] == 'ok': 98 | print('Job %s was deleted from the DB' % self._jobid) 99 | else: 100 | print(temp.text) 101 | 102 | def delete(self): 103 | return self.__delete__() 104 | 105 | 106 | class MyJobs(object): 107 | 108 | def __init__(self, user=None, token=None, root_url=None, db='desoper', verbose=False): 109 | passwd = None 110 | self.desconf = config_mod.get_desconfig(DESFILE, db) 111 | self._db = db 112 | self.verbose = verbose 113 | self.jobid = None 114 | self.token = None 115 | self.submit = None 116 | self._status = None 117 | self.job = None 118 | self.links = None 119 | self.files = [] 120 | if user is not None: 121 | if self.verbose: 122 | print('Bypassing .desservices file with user : %s' % user) 123 | if passwd is None: 124 | passwd = getpass.getpass(prompt='Enter password : ') 125 | self.desconf.set('db-' + self._db, 'user', user) 126 | self.desconf.set('db-' + self._db, 'passwd', passwd) 127 | self.user = self.desconf.get('db-' + self._db, 'user') 128 | self._passwd = self.desconf.get('db-' + self._db, 'passwd') 129 | self.root_url = root_url.strip('/') 130 | self.get_token() 131 | self.jobs_info = requests.get( 132 | self.root_url + '/api/jobs/?token=%s&list_jobs' % self.token).json() 133 | self._jobs = [Job(j, self.user, self.token, self.root_url) 134 | for j in self.jobs_info['list_jobs']] 135 | self.creation_times = self.jobs_info['creation_time'] 136 | self.job_types = self.jobs_info['job_type'] 137 | 138 | def get_token(self): 139 | """Generate a new token using user and password in the API.""" 140 | ext = '/api/token/' 141 | req = self.root_url + ext 142 | res = requests.post(req, data={'username': self.user, 'password': self._passwd}) 143 | status = res.json()['status'] 144 | if status == 'ok': 145 | self.token = Token(res.json()['token'], self.root_url) 146 | else: 147 | self.token = None 148 | 149 | def __len__(self): 150 | return len(self._jobs) 151 | 152 | def __repr__(self): 153 | return 'My Jobs (%d in total)' % len(self._jobs) 154 | 155 | def __getitem__(self, index): 156 | return self._jobs[index] 157 | 158 | def __delitem__(self, index): 159 | del self._jobs[index] 160 | return 161 | 162 | @property 163 | def list(self): 164 | return self._jobs 165 | 166 | 167 | class DesCoaddCuts(object): 168 | """ 169 | This Class handles the object for the cutouts 170 | 171 | Parameters: 172 | ----------- 173 | 174 | user (optional) : DB username 175 | passwd (optional) : DB password 176 | root_url (optional) : The url for the cutouts API 177 | db (optional) : DB to be used (default: desoper) 178 | verbose (optional) : print extra information 179 | """ 180 | 181 | def __init__(self, user=None, root_url=None, db='desoper', verbose=True): 182 | passwd = None 183 | self.desconf = config_mod.get_desconfig(DESFILE, db) 184 | self._db = db 185 | self.verbose = verbose 186 | self.jobid = None 187 | self.token = None 188 | self.submit = None 189 | self._status = None 190 | self.job = None 191 | self.links = None 192 | self.files = [] 193 | if user is not None: 194 | if self.verbose: 195 | print('Bypassing .desservices file with user : %s' % user) 196 | if passwd is None: 197 | passwd = getpass.getpass(prompt='Enter password : ') 198 | self.desconf.set('db-' + self._db, 'user', user) 199 | self.desconf.set('db-' + self._db, 'passwd', passwd) 200 | self.user = self.desconf.get('db-' + self._db, 'user') 201 | self._passwd = self.desconf.get('db-' + self._db, 'passwd') 202 | self.root_url = root_url.strip('/') 203 | 204 | def get_token(self): 205 | """Generate a new token using user and password in the API.""" 206 | ext = '/api/token/' 207 | req = self.root_url + ext 208 | res = requests.post(req, data={'username': self.user, 'password': self._passwd}) 209 | status = res.json()['status'] 210 | if status == 'ok': 211 | self.token = Token(res.json()['token'], self.root_url) 212 | else: 213 | self.token = None 214 | if self.verbose: 215 | print(res.json()['message']) 216 | return self.token.value 217 | 218 | def make_cuts(self, ra=None, dec=None, csvfile=None, xsize=None, 219 | ysize=None, email=None, list_only=False, wait=False, timeout=3600): 220 | """ 221 | Submit a job to generate the cuts on the server side, if wait keyword id 222 | True the functions waits until the job is completed 223 | """ 224 | req = self.root_url + '/api/jobs/' 225 | self.body = {'token': self.token.value, 'list_only': 'false', 'job_type': 'coadd'} 226 | if ra is not None: 227 | try: 228 | self.body['ra'] = str(list(ra)) 229 | self.body['dec'] = str(list(dec)) 230 | except: 231 | self.body['ra'] = str(ra) 232 | self.body['dec'] = str(dec) 233 | if xsize is not None: 234 | try: 235 | self.body['xsize'] = str(list(xsize)) 236 | except: 237 | self.body['xsize'] = str(xsize) 238 | if ysize is not None: 239 | try: 240 | self.body['ysize'] = str(list(ysize)) 241 | except: 242 | self.body['ysize'] = str(ysize) 243 | if email is not None: 244 | self.body['email'] = email 245 | if list_only: 246 | self.body['list_only'] = 'true' 247 | if csvfile is not None: 248 | self.body['ra'] = '0,0' 249 | self.body['dec'] = '0,0' 250 | self.body_files = {'csvfile': open(csvfile, 'rb')} 251 | self.submit = requests.post(req, data=self.body, files=self.body_files) 252 | else: 253 | self.submit = requests.post(req, data=self.body) 254 | self._status = 'Submitted' 255 | if self.verbose: 256 | print(self.submit.json()['message']) 257 | if self.submit.json()['status'] == 'ok': 258 | self.job = Job(self.submit.json()['job'], self.user, self.token, self.root_url) 259 | elif self.submit.json()['status'] == 'error': 260 | self.job = None 261 | if not self.verbose: 262 | print(self.submit.json()['message']) 263 | else: 264 | assert False, self.submit.text 265 | if wait: 266 | t_init = time.time() 267 | if self.job is not None: 268 | for _ in range(100000): 269 | if self.job.status == 'SUCCESS': 270 | requests.get(self.root_url + 271 | '/api/refresh/?user={}&jid={}'.format(self.user, self.jobid)) 272 | self._status = self.job.req_status 273 | break 274 | if time.time() - t_init > timeout: 275 | break 276 | time.sleep(0.5) 277 | if self._status != 'ok': 278 | print('Job is taking longer than expected, ' 279 | 'will continue running but check status later') 280 | 281 | @property 282 | def status(self): 283 | """Return the status of the submited job (if any).""" 284 | if self.job is None: 285 | return 'No jobs has been submitted' 286 | else: 287 | try: 288 | status = self.job.status 289 | if status == 'SUCCESS': 290 | self._status = 'ok' 291 | requests.get(self.root_url + 292 | '/api/refresh/?user={}&jid={}'.format(self.user, self.jobid)) 293 | return status 294 | except: 295 | self._status = 'Error!' 296 | return self.job.text 297 | 298 | def get_files(self, folder=None, print_only=False, force=True): 299 | """Copy all files generated to local folder.""" 300 | if self.job.status == 'SUCCESS': 301 | self._status = 'ok' 302 | else: 303 | print('Job is not completed yet or there was an error, check job status') 304 | return 305 | if self._status == 'ok': 306 | self.files = [] 307 | self.links = self.job.links 308 | if folder is not None: 309 | if not os.path.exists(folder): 310 | os.mkdir(folder) 311 | else: 312 | folder = '' 313 | k = 0 314 | for link in self.links: 315 | if link.endswith('png') or link.endswith('fits'): 316 | temp_file = os.path.join(folder, os.path.basename(link)) 317 | self.files.append(temp_file) 318 | if print_only: 319 | print(temp_file) 320 | else: 321 | if not force: 322 | if os.path.exists(temp_file): 323 | continue 324 | req = requests.get(link, stream=True) 325 | if req.status_code == 200: 326 | with open(temp_file, 'wb') as temp_file: 327 | for chunk in req: 328 | temp_file.write(chunk) 329 | k += 1 330 | if self.verbose: 331 | print('%d files copied to local server' % k) 332 | else: 333 | print('Something went wrong with the job') 334 | 335 | def show_pngs(self, folder=None, limit=100): 336 | """Display all pngs generated after copying files in local directory.""" 337 | from IPython.display import Image, display 338 | if folder is None: 339 | folder = '' 340 | displayed = 0 341 | for file_png in self.files: 342 | if file_png.endswith('.png'): 343 | if displayed == limit: 344 | break 345 | temp_display = Image(filename=os.path.join(folder, file_png)) 346 | display(temp_display) 347 | displayed += 1 348 | 349 | 350 | class DesSingleCuts(DesCoaddCuts): 351 | # def __init__(self): 352 | # super(DesSingleCuts, self).__init__() 353 | 354 | def make_cuts(self, ra=None, dec=None, csvfile=None, band=None, blacklist=True, 355 | xsize=None, ysize=None, email=None, list_only=False, wait=False, timeout=3600): 356 | """ 357 | Submit a job to generate the cuts on the server side, if wait keyword id 358 | True the functions waits until the job is completed 359 | """ 360 | req = self.root_url + '/api/jobs/' 361 | self.body = {'token': self.token.value, 'list_only': 'false', 'job_type': 'single'} 362 | if ra is not None: 363 | try: 364 | self.body['ra'] = str(list(ra)) 365 | self.body['dec'] = str(list(dec)) 366 | except: 367 | self.body['ra'] = str(ra) 368 | self.body['dec'] = str(dec) 369 | if xsize is not None: 370 | try: 371 | self.body['xsize'] = str(list(xsize)) 372 | except: 373 | self.body['xsize'] = str(xsize) 374 | if ysize is not None: 375 | try: 376 | self.body['ysize'] = str(list(ysize)) 377 | except: 378 | self.body['ysize'] = str(ysize) 379 | if email is not None: 380 | self.body['email'] = email 381 | if list_only: 382 | self.body['list_only'] = 'true' 383 | if not blacklist: 384 | self.body['no_blacklist'] = 'true' 385 | if band is not None: 386 | self.body['band'] = str(list(band)) 387 | if csvfile is not None: 388 | self.body['ra'] = '0,0' 389 | self.body['dec'] = '0,0' 390 | self.body_files = {'csvfile': open(csvfile, 'rb')} 391 | self.submit = requests.post(req, data=self.body, files=self.body_files) 392 | else: 393 | self.submit = requests.post(req, data=self.body) 394 | self._status = 'Submitted' 395 | if self.verbose: 396 | print(self.submit.json()['message']) 397 | if self.submit.json()['status'] == 'ok': 398 | self.job = Job(self.submit.json()['job'], self.user, self.token, self.root_url) 399 | elif self.submit.json()['status'] == 'error': 400 | self.job = None 401 | if not self.verbose: 402 | print(self.submit.json()['message']) 403 | else: 404 | assert False, self.submit.text 405 | if wait: 406 | t_init = time.time() 407 | if self.job is not None: 408 | for _ in range(100000): 409 | if self.job.status == 'SUCCESS': 410 | requests.get(self.root_url + 411 | '/api/refresh/?user={}&jid={}'.format(self.user, self.jobid)) 412 | self._status = self.job.req_status 413 | break 414 | if time.time() - t_init > timeout: 415 | break 416 | time.sleep(0.5) 417 | if self._status != 'ok': 418 | print('Job is taking longer than expected,' 419 | 'will continue running but check status later') 420 | 421 | 422 | class DesSingleExposure(object): 423 | """ 424 | This Class handles the object for Single Exposures and individual CCDs 425 | 426 | Parameters: 427 | ----------- 428 | 429 | user (optional) : DB username 430 | passwd (optional) : DB password 431 | root_url (optional) : The url for the cutouts API 432 | db (optional) : DB to be used (default: desoper) 433 | verbose (optional) : print extra information 434 | """ 435 | 436 | def __init__(self, user=None, 437 | root_url='https://desar2.cosmology.illinois.edu/DESFiles/desarchive/', 438 | db='desoper', verbose=True): 439 | passwd = None 440 | self.desconf = config_mod.get_desconfig(DESFILE, db) 441 | self._db = db 442 | self.verbose = verbose 443 | self.links = [] 444 | self.files = [] 445 | if user is not None: 446 | if self.verbose: 447 | print('Bypassing .desservices file with user : %s' % user) 448 | if passwd is None: 449 | passwd = getpass.getpass(prompt='Enter password : ') 450 | self.desconf.set('db-' + self._db, 'user', user) 451 | self.desconf.set('db-' + self._db, 'passwd', passwd) 452 | self.user = self.desconf.get('db-' + self._db, 'user') 453 | self._passwd = self.desconf.get('db-' + self._db, 'passwd') 454 | self.root_url = root_url 455 | self.base_query = """ 456 | SELECT 457 | file_archive_info.PATH || '/' || file_archive_info.FILENAME || file_archive_info.COMPRESSION 458 | as path, 459 | image.PFW_ATTEMPT_ID, 460 | image.BAND, 461 | image.CCDNUM, 462 | image.NITE, 463 | image.EXPNUM 464 | FROM 465 | ops_proctag, image, file_archive_info 466 | WHERE 467 | file_archive_info.FILENAME = image.FILENAME AND 468 | image.PFW_ATTEMPT_ID = ops_proctag.PFW_ATTEMPT_ID AND 469 | image.FILETYPE = 'red_immask' AND 470 | ops_proctag.TAG = '{tag}' AND 471 | image.EXPNUM = {expnum} AND image.CCDNUM in ({ccd}); 472 | """ 473 | 474 | def get_paths(self, expnum, ccd, tag='Y3A1_FINALCUT'): 475 | 476 | try: 477 | ccd = ','.join(map(str, ccd)) 478 | except: 479 | pass 480 | inputs = dict(expnum=expnum, ccd=ccd, tag=tag) 481 | self.base_query = self.base_query.format(**inputs) 482 | print(self.base_query) 483 | con = connect(self._db, user=self.user, passwd=self._passwd) 484 | self.data = con.query_to_pandas(self.base_query) 485 | print(self.data) 486 | for j in range(len(self.data)): 487 | self.links.append(self.root_url + self.data.PATH.ix[j]) 488 | 489 | def get_files(self, folder=None, print_only=False, force=True): 490 | """Copy all files to local folder.""" 491 | 492 | if folder is not None: 493 | if not os.path.exists(folder): 494 | os.mkdir(folder) 495 | else: 496 | folder = '' 497 | k = 0 498 | for link in self.links: 499 | temp_file = os.path.join(folder, os.path.basename(link)) 500 | self.files.append(temp_file) 501 | if print_only: 502 | print(temp_file) 503 | else: 504 | if not force: 505 | if os.path.exists(temp_file): 506 | continue 507 | req = requests.get(link, stream=True, auth=(self.user, self._passwd)) 508 | if req.status_code == 200: 509 | with open(temp_file, 'wb') as temp_file: 510 | for chunk in req: 511 | temp_file.write(chunk) 512 | k += 1 513 | if self.verbose: 514 | print('%d files copied to local server' % k) 515 | -------------------------------------------------------------------------------- /easyaccess/eautils/des_logo.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | __author__ = 'Matias Carrasco Kind' 3 | import sys 4 | try: 5 | from termcolor import colored as c 6 | except ImportError: 7 | def c(line, color): 8 | return line 9 | 10 | 11 | def noc(line, color): 12 | return line 13 | 14 | 15 | def print_deslogo(color=True): 16 | char0 = u"\u203E" 17 | char1 = u"\u203E" 18 | char2 = u"\u00B4" 19 | if sys.stdout.encoding != 'UTF-8': 20 | char0 = ' ' 21 | char1 = '-' 22 | char2 = '`' 23 | if color: 24 | c2 = c 25 | else: 26 | c2 = noc 27 | L = [] 28 | if sys.stdout.encoding != 'UTF-8': 29 | L.append(" _______ ") 30 | L.append(""" \\"""+char0*6+"""\ """) 31 | L.append(" "+c2("//", "red")+" / . .\ ") 32 | L.append(" "+c2("//", "red")+" / . _\ ") 33 | L.append(c2("//", "red")+" / . / "+c2("//", "red")+" ") 34 | L.append(c2("\\\\", "red")+" \ . / "+c2("//", "red")+" ") 35 | L.append(c2(" \\\\", "red")+" \_____/ "+c2("//", "red")+" ") 36 | L.append(c2(" \\\\_______//", "red")+" DARK ENERGY SURVEY") 37 | last = c2(""" `"""+char1*7+char2, "red") + " DATA MANAGEMENT" 38 | L.append(last) 39 | 40 | print() 41 | for l in L: 42 | print(l) 43 | 44 | 45 | if __name__ == "__main__": 46 | print() 47 | print_deslogo() 48 | -------------------------------------------------------------------------------- /easyaccess/eautils/des_utils.py: -------------------------------------------------------------------------------- 1 | from easyaccess.eautils.ea_utils import * 2 | import easyaccess.config_ea as config_mod 3 | import pandas as pd 4 | import os 5 | import sys 6 | import getpass 7 | import re 8 | import cx_Oracle 9 | 10 | 11 | try: 12 | from builtins import input, str, range 13 | except ImportError: 14 | from __builtin__ import input, str, range 15 | 16 | 17 | try: 18 | import readline 19 | readline_present = True 20 | try: 21 | import gnureadline as readline 22 | except ImportError: 23 | pass 24 | except ImportError: 25 | readline_present = False 26 | 27 | 28 | class DesActions(object): 29 | def do_set_password(self, arg): 30 | """ 31 | DB:Set a new password on this DES instance 32 | 33 | Usage: set_password 34 | """ 35 | print() 36 | pw1 = getpass.getpass(prompt='Enter new password:') 37 | if re.search('\W', pw1): 38 | print(colored("\nPassword contains whitespace, not set\n", "red", self.ct)) 39 | return 40 | if not pw1: 41 | print(colored("\nPassword cannot be blank\n", "red", self.ct)) 42 | return 43 | pw2 = getpass.getpass(prompt='Re-Enter new password:') 44 | print() 45 | if pw1 != pw2: 46 | print(colored("Passwords don't match, not set\n", "red", self.ct)) 47 | return 48 | 49 | query = """alter user %s identified by "%s" """ % (self.user, pw1) 50 | confirm = 'Password changed in %s' % self.dbname.upper() 51 | try: 52 | self.query_and_print(query, print_time=False, suc_arg=confirm) 53 | self.desconfig.set('db-'+self.dbname, 'passwd', pw1) 54 | config_mod.write_desconfig(desfile, self.desconfig) 55 | except: 56 | confirm = 'Password could not be changed in %s\n' % self.dbname.upper() 57 | print(colored(confirm, "red", self.ct)) 58 | print(sys.exc_info()) 59 | 60 | def do_change_db(self, line): 61 | """ 62 | DB: Change to another database, namely dessci, desoper, destest 63 | 64 | Usage: 65 | change_db DB # Changes to DB, it does not refresh metadata, e.g.: change_db desoper 66 | 67 | """ 68 | if line == '': 69 | return self.do_help('change_db') 70 | line = " ".join(line.split()) 71 | key_db = line.split()[0] 72 | if key_db in ('dessci', 'desoper', 'destest'): 73 | if key_db == self.dbname: 74 | print(colored("Already connected to : %s" % key_db, "green", self.ct)) 75 | return 76 | self.dbname = key_db 77 | # connect to db 78 | try: 79 | self.user = self.desconfig.get('db-' + self.dbname, 'user') 80 | self.password = self.desconfig.get('db-' + self.dbname, 'passwd') 81 | self.dbhost = self.desconfig.get('db-' + self.dbname, 'server') 82 | self.service_name = self.desconfig.get('db-' + self.dbname, 'name') 83 | except: 84 | print(colored("DB {} does not exist in your desservices file".format( 85 | key_db), "red", self.ct)) 86 | return 87 | kwargs = {'host': self.dbhost, 'port': self.port, 'service_name': self.service_name} 88 | self.dsn = cx_Oracle.makedsn(**kwargs) 89 | if not self.quiet: 90 | print('Connecting to DB ** %s ** ...' % self.dbname) 91 | self.con.close() 92 | connected = False 93 | for tries in range(1): 94 | try: 95 | self.con = cx_Oracle.connect( 96 | self.user, self.password, dsn=self.dsn) 97 | if self.autocommit: 98 | self.con.autocommit = True 99 | connected = True 100 | break 101 | except Exception as e: 102 | lasterr = str(e).strip() 103 | print(colored( 104 | "Error when trying to connect to database: %s" % lasterr, "red", self.ct)) 105 | print("\n Retrying...\n") 106 | time.sleep(5) 107 | if not connected: 108 | print( 109 | '\n ** Could not successfully connect to DB. Try again later. Aborting. ** \n') 110 | os._exit(0) 111 | self.cur = self.con.cursor() 112 | self.cur.arraysize = int(self.prefetch) 113 | print() 114 | print("Run refresh_metadata_cache to reload the auto-completion metatada") 115 | self.set_messages() 116 | return 117 | else: 118 | print(colored("DB {} does not exist or you don't have access to it".format( 119 | key_db), "red", self.ct)) 120 | return 121 | 122 | def complete_change_db(self, text, line, start_index, end_index): 123 | options_db = ['desoper', 'dessci', 'destest'] 124 | if text: 125 | return [option for option in options_db if option.startswith(text.lower())] 126 | else: 127 | return options_db 128 | 129 | def do_find_user(self, line): 130 | """ 131 | DB:Finds users given 1 criteria (either first name or last name) 132 | 133 | Usage: 134 | - find_user Doe # Finds all users with Doe in their names 135 | - find_user John% # Finds all users with John IN their names (John, Johnson, etc...) 136 | - find_user P% # Finds all users with first, lastname or username starting with P 137 | 138 | """ 139 | if line == '': 140 | return self.do_help('find_user') 141 | line = " ".join(line.split()) 142 | keys = line.split() 143 | if self.dbname in ('dessci', 'desoper'): 144 | query = 'select * from des_users where ' 145 | if self.dbname in ('destest'): 146 | query = 'select * from dba_users where ' 147 | if len(keys) >= 1: 148 | query += 'upper(firstname) like upper(\'' + keys[0] + '\') or ' 149 | query += 'upper(lastname) like upper(\'' + keys[0] + '\') or ' 150 | query += 'upper(username) like upper (\'' + keys[0] + '\')' 151 | self.query_and_print(query, print_time=False, clear=True) 152 | 153 | def complete_find_user(self, text, line, start_index, end_index): 154 | options_users = self.cache_usernames 155 | if text: 156 | return [option for option in options_users if option.startswith(text.lower())] 157 | else: 158 | return options_users 159 | 160 | def do_find_tables_with_column(self, arg): 161 | """ 162 | DB:Finds tables having a column name matching column-name-string. 163 | 164 | Usage: find_tables_with_column 165 | Example: find_tables_with_column %MAG% # hunt for columns with MAG 166 | """ 167 | if arg == '': 168 | return self.do_help('find_tables_with_column') 169 | query = """ 170 | SELECT t.owner || '.' || t.table_name as table_name, t.column_name 171 | FROM all_tab_cols t, DES_ADMIN.CACHE_TABLES d 172 | WHERE t.column_name LIKE '%s' 173 | AND t.owner || '.' || t.table_name = d.table_name 174 | """ % (arg.upper()) 175 | 176 | self.query_and_print(query) 177 | return 178 | 179 | def complete_find_tables_with_column(self, text, line, begidx, lastidx): 180 | return self._complete_colnames(text) 181 | 182 | def do_whoami(self, arg): 183 | """ 184 | DB:Print information about the user's details. 185 | 186 | Usage: whoami 187 | """ 188 | # It might be useful to print user roles as well 189 | # select GRANTED_ROLE from USER_ROLE_PRIVS 190 | 191 | if self.dbname in ('dessci', 'desoper'): 192 | sql_getUserDetails = """ 193 | select d.username, d.email, d.firstname as first, d.lastname as last, 194 | trunc(sysdate-t.ptime,0)||' days ago' last_passwd_change, 195 | trunc(sysdate-t.ctime,0)||' days ago' created 196 | from des_users d, sys.user$ t where 197 | d.username = '""" + self.user + """' and t.name=upper(d.username)""" 198 | if self.dbname in ('destest'): 199 | print( 200 | colored('\nThis function is not implemented in destest\n', 'red', self.ct)) 201 | sql_getUserDetails = "select * from dba_users where username = '" + self.user + "'" 202 | self.query_and_print(sql_getUserDetails, print_time=False, clear=True) 203 | 204 | 205 | def get_tables_names_user(self, user): 206 | if user == "": 207 | return do_help('tables_names_user') 208 | user = user.replace(";", "") 209 | query = """ 210 | select distinct table_name from all_tables 211 | where owner=\'%s\' order by table_name""" % user.upper() 212 | temp = self.cur.execute(query) 213 | tnames = pd.DataFrame(temp.fetchall()) 214 | self.do_clear(None) 215 | if len(tnames) > 0: 216 | print(colored('\nPublic tables from %s' % 217 | user.upper(), "cyan", self.ct)) 218 | print(tnames) 219 | else: 220 | if self.dbname in ('dessci', 'desoper'): 221 | query = """ 222 | select count(username) as cc from des_users 223 | where upper(username) = upper('%s')""" % user 224 | if self.dbname in ('destest'): 225 | query = """ 226 | select count(username) as cc from dba_users 227 | where upper(username) = upper('%s')""" % user 228 | temp = self.cur.execute(query) 229 | tnames = temp.fetchall() 230 | if tnames[0][0] == 0: 231 | print(colored('User %s does not exist in DB' % 232 | user.upper(), 'red', self.ct)) 233 | else: 234 | print(colored('User %s has no tables' % 235 | user.upper(), 'cyan', self.ct)) 236 | 237 | 238 | def complete_user_tables(self, text, line, start_index, end_index): 239 | options_users = self.cache_usernames 240 | if text: 241 | return [option for option in options_users if option.startswith(text.lower())] 242 | else: 243 | return options_users 244 | -------------------------------------------------------------------------------- /easyaccess/eautils/dircache.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | __all__ = ["listdir", "opendir", "annotate", "reset"] 4 | 5 | cache = {} 6 | 7 | 8 | def reset(): 9 | """Reset the cache completely.""" 10 | global cache 11 | cache = {} 12 | 13 | 14 | def listdir(path): 15 | """List directory contents, using cache.""" 16 | try: 17 | cached_mtime, list = cache[path] 18 | del cache[path] 19 | except KeyError: 20 | cached_mtime, list = -1, [] 21 | try: 22 | mtime = os.stat(path)[8] 23 | except os.error: 24 | return [] 25 | if mtime != cached_mtime: 26 | try: 27 | list = os.listdir(path) 28 | except os.error: 29 | return [] 30 | list.sort() 31 | cache[path] = mtime, list 32 | return list 33 | 34 | 35 | opendir = listdir # XXX backward compatibility 36 | 37 | 38 | def annotate(head, list): 39 | """Add '/' suffixes to directories.""" 40 | for i in range(len(list)): 41 | if os.path.isdir(os.path.join(head, list[i])): 42 | list[i] = list[i] + '/' 43 | -------------------------------------------------------------------------------- /easyaccess/eautils/dtypes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Module for dealing with data type conversion between 4 | Oracle, python, numpy, FITS, pandas, ... 5 | 6 | Some useful documentation: 7 | Oracle: https://docs.oracle.com/cd/B28359_01/server.111/b28318/datatype.htm 8 | cx_Oracle: https://cx-oracle.readthedocs.org/en/latest/ 9 | numpy: http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html 10 | FITS: https://heasarc.gsfc.nasa.gov/docs/software/fitsio/c/c_user/node20.html 11 | 12 | Also, can double check agains Erin Sheldon's DESDB module: 13 | https://github.com/esheldon/desdb 14 | 15 | Numpy datetimes: 16 | http://docs.scipy.org/doc/numpy-1.10.0/reference/arrays.datetime.html 17 | """ 18 | 19 | import cx_Oracle 20 | import numpy as np 21 | 22 | # Oracle data types 23 | or_n = cx_Oracle.NUMBER 24 | or_s = cx_Oracle.STRING 25 | or_f = cx_Oracle.NATIVE_FLOAT 26 | or_dt = cx_Oracle.DATETIME 27 | or_ts = cx_Oracle.TIMESTAMP 28 | # This is actually OBJECTVAR (hence 'or_ov') 29 | or_ov = cx_Oracle.OBJECT 30 | 31 | 32 | def oracle2numpy(desc): 33 | """Takes an Oracle data type and converts to a numpy dtype string. 34 | 35 | TODO: Vectorize? 36 | 37 | Parameters: 38 | ---------- 39 | info: Oracle column descriptor 40 | 41 | Returns: 42 | dtype: Numpy dtype string 43 | """ 44 | name = desc[0] 45 | otype = desc[1] 46 | size = desc[3] 47 | digits = desc[4] 48 | scale = desc[5] 49 | 50 | if otype == or_n: 51 | # When no scale/digits avaiable, return float 52 | if scale is None and digits is None: 53 | return "f8" 54 | if scale == 0 and digits != 0: 55 | # Nothing after the decimal; integers 56 | if digits <= 4: 57 | return "i2" 58 | elif digits <= 9: 59 | return "i4" 60 | # This is sloppy... 61 | else: 62 | return "i8" 63 | else: 64 | if digits is None: 65 | return "f8" 66 | # Otherwise, floats 67 | if digits <= 6: 68 | return "f4" 69 | elif digits <= 15: 70 | return "f8" 71 | else: 72 | # I didn't know this existed... 73 | return "f16" 74 | elif otype == or_f: 75 | # Native floats 76 | if size == 4: 77 | return "f4" 78 | elif size == 8: 79 | return "f8" 80 | elif otype == or_s: 81 | return "S" + str(size) 82 | else: 83 | # Ignore other Oracle types for now 84 | return "" 85 | # msg = "Unsupported Oracle type: %s" % otype 86 | # raise ValueError(msg) 87 | 88 | 89 | def oracle2fitsio(desc): 90 | """Takes an Oracle data type and converts to a numpy dtype 91 | suitable for writing with fitsio. 92 | 93 | Parameters: 94 | ---------- 95 | info: Oracle column descriptor 96 | 97 | Returns: 98 | dtype: Numpy dtype string 99 | """ 100 | name = desc[0] 101 | otype = desc[1] 102 | size = desc[3] 103 | digits = desc[4] 104 | scale = desc[5] 105 | 106 | if (otype == or_dt) or (otype == or_ts): 107 | return "S50" 108 | else: 109 | return oracle2numpy(desc) 110 | 111 | 112 | def numpy2oracle(dtype): 113 | """Takes a numpy dtype object and converts to an Oracle data type 114 | string. 115 | 116 | TODO: Vectorize? 117 | 118 | Parameters: 119 | ---------- 120 | dtype: Numpy dtype object 121 | 122 | Returns: 123 | -------- 124 | otype: Oracle data type string 125 | """ 126 | kind = dtype.kind 127 | size = dtype.itemsize 128 | 129 | if (kind == 'S'): 130 | # string type 131 | return 'VARCHAR2(%d)' % size 132 | elif (kind == 'i' or kind == 'u'): 133 | if (size == 1): 134 | # 1-byte (8 bit) integer 135 | return 'NUMBER(3,0)' 136 | elif (size == 2): 137 | # 2-byte (16 bit) integer 138 | return 'NUMBER(5,0)' 139 | elif (size == 4): 140 | # 4-byte (32 bit) integer 141 | return 'NUMBER(10,0)' 142 | else: 143 | # 8-byte (64 bit) integer 144 | # This is sloppy... 145 | # 'i8' is 19 digits; 'u8' is 20 digits 146 | return 'NUMBER(20,0)' 147 | elif (kind == 'f'): 148 | if (size == 4): 149 | # 4-byte (32 bit) float 150 | return 'BINARY_FLOAT' 151 | elif (size == 8): 152 | # 8-byte (64 bit) double 153 | return 'BINARY_DOUBLE' 154 | else: 155 | msg = "Unsupported float type: %s" % kind 156 | raise ValueError(msg) 157 | elif (kind == 'M'): 158 | # Should test on CREATED_DATE from PROD.PROCTAG@DESOPER 159 | return 'DATETIME' 160 | elif (kind == 'O'): 161 | # Careful pandas creates objects for strings... 162 | return 'OBJECT' 163 | else: 164 | return "" 165 | # msg = "Unsupported numpy dtype: %s" % dtype 166 | # raise ValueError(msg) 167 | 168 | 169 | def numpy2desdm(desc): 170 | """ 171 | Impose DESDM typing conventions based on column name. 172 | 173 | This is an experimental function for imposing some of the DESDM 174 | 'conventions' for defining column types. The 'conventions' come 175 | mostly from the existing Y1A1 and PROD tables. 176 | 177 | This function is NOT comprehensive. 178 | 179 | Parameters: 180 | ---------- 181 | desc : numpy dtype descriptor (i.e., np.dtype.descr) 182 | 183 | Returns: 184 | -------- 185 | otype: Oracle data type string 186 | """ 187 | name = desc[0].upper() 188 | dtype = np.dtype(desc[1]) 189 | 190 | # It would be better to do this with a lookup dictionary rather 191 | # than 'if/elif' clauses. However, it is hard to do 'startswith' 192 | # when searching dictionary keys. It would also be more flexible 193 | # to use regexs. 194 | 195 | # Integer values 196 | if name.startswith(('CCDNUM')) or name in ['ATTNUM']: 197 | return "NUMBER(2,0)" 198 | elif name.startswith(('FLAGS_', 'OBSERVED_', 'MODEST_CLASS')): 199 | return "NUMBER(3,0)" 200 | elif name.startswith(('NEPOCHS')): 201 | return "NUMBER(4,0)" 202 | elif name in ['REQNUM']: 203 | return "NUMBER(7,0)" 204 | elif name.startswith(('HPIX', 'EXPNUM')): 205 | return "NUMBER(10,0)" 206 | # Temporary adjustment to deal with large object numbers 207 | elif name in ['COADD_OBJECTS_ID', 'COADD_OBJECT_ID', 'OBJECT_NUMBER', 'OBJECT_ID']: 208 | return "NUMBER(11,0)" 209 | elif name in ['QUICK_OBJECT_ID']: 210 | return "NUMBER(15,0)" 211 | # Floating point values 212 | elif name.strip('WAVG_').startswith(("CLASS_STAR", "SPREAD_", "SPREADERR_")): 213 | return 'BINARY_FLOAT' 214 | elif name.strip('WAVG_').startswith(("MAG_", "MAGERR_", "CALIB_MAG_")): 215 | return 'BINARY_FLOAT' 216 | # ADW: Y3A2 tables currently implement as doubles for no apparent reason. 217 | #elif name.strip('MOF_').startswith(("CM_MAG_","PSF_MAG_")): 218 | # return 'BINARY_FLOAT' 219 | #elif name.strip('SOF_').startswith(("CM_MAG_","PSF_MAG_")): 220 | # return 'BINARY_FLOAT' 221 | elif name.startswith(('SLR_SHIFT', 'DESDM_ZP', 'DESDM_ZPERR')): 222 | # DEPRECATED: ADW 2018-06-07 223 | return "NUMBER(6,4)" 224 | elif name in ['RA', 'DEC', 'RADEG', 'DECDEG', 'L', 'B']: 225 | return "NUMBER(9,6)" 226 | elif name.startswith(('ALPHAWIN','DELTAWIN')): 227 | return "BINARY_DOUBLE" 228 | # String values 229 | elif name in ['BAND']: 230 | # Needs to fit 'VR' and 'block' 231 | return "VARCHAR2(5)" 232 | elif name in ['UNITNAME']: 233 | # Why is this so large? Usually "D%8d" = VARCHAR2(9) 234 | return "VARCHAR2(20)" 235 | elif name in ['TAG']: 236 | return "VARCHAR2(30)" 237 | elif name in ['FILENAME']: 238 | # This is VARCHAR2(60) in prod.se_object, but seems like overkill 239 | # This is VARCHAR2(100) in Y3A2_COADD_OBJECT_BAND, but seems like overkill 240 | return "VARCHAR2(50)" 241 | else: 242 | return numpy2oracle(dtype) 243 | 244 | 245 | if __name__ == "__main__": 246 | import argparse 247 | description = __doc__ 248 | parser = argparse.ArgumentParser(description=description) 249 | args = parser.parse_args() 250 | -------------------------------------------------------------------------------- /easyaccess/eautils/ea_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import itertools 4 | import sys 5 | import os 6 | import time 7 | import signal 8 | import stat 9 | import easyaccess.eautils.fileio as eafile 10 | import easyaccess.eautils.dircache as dircache 11 | 12 | 13 | def without_color(line, color, mode=0): 14 | return line 15 | 16 | 17 | try: 18 | from termcolor import colored as with_color 19 | 20 | def colored(line, color, mode=0): 21 | if mode == 1: 22 | return with_color(line, color) 23 | else: 24 | return line 25 | except ImportError: 26 | colored = without_color 27 | 28 | desfile = os.getenv("DES_SERVICES") 29 | if not desfile: 30 | desfile = os.path.join(os.getenv("HOME"), ".desservices.ini") 31 | if os.path.exists(desfile): 32 | amode = stat.S_IMODE(os.stat(desfile).st_mode) 33 | if amode != 2 ** 8 + 2 ** 7: 34 | print('Changing permissions to des_service file to read/write by user') 35 | os.chmod(desfile, 2 ** 8 + 2 ** 7) # rw by user owner only 36 | 37 | 38 | def print_exception(pload=None, mode=1): 39 | (type, value, traceback) = sys.exc_info() 40 | if pload and (pload.pid is not None): 41 | os.kill(pload.pid, signal.SIGKILL) 42 | print() 43 | print(colored(type, "red", mode)) 44 | print(colored(value, "red", mode)) 45 | print() 46 | 47 | 48 | config_file = os.path.join(os.environ["HOME"], ".easyaccess/config.ini") 49 | options_prefetch = ['show', 'set', 'default'] 50 | options_add_comment = ['table', 'column'] 51 | options_edit = ['show', 'set_editor'] 52 | options_out = eafile.FILE_EXTS 53 | options_def = eafile.FILE_DEFS 54 | # ADW: It would be better to grab these from the config object 55 | options_config = ['all', 'database', 'editor', 'prefetch', 'histcache', 'timeout', 56 | 'outfile_max_mb', 'max_rows', 'max_columns', 57 | 'width', 'max_colwidth', 'color_terminal', 'loading_bar', 'filepath', 'nullvalue', 58 | 'autocommit', 'compression', 'trim_whitespace', 'desdm_coldefs'] 59 | options_config2 = ['show', 'set'] 60 | options_app = ['check', 'submit', 'explain'] 61 | 62 | 63 | def read_buf(fbuf): 64 | """ 65 | Read SQL files, sql statement should end with ';' if parsing to a file to write. 66 | """ 67 | try: 68 | with open(fbuf) as f: 69 | content = f.read() 70 | except: 71 | print('\n' + 'Fail to load the file "{:}"'.format(fbuf)) 72 | return "" 73 | list = [item for item in content.split('\n')] 74 | newquery = '' 75 | for line in list: 76 | if line[0:2] == '--': 77 | continue 78 | newquery += ' ' + line.split('--')[0] 79 | # newquery = newquery.split(';')[0] 80 | return newquery 81 | 82 | 83 | class KeyParser(argparse.ArgumentParser): 84 | def error(self, message): 85 | sys.exit(2) 86 | 87 | 88 | def loading(): 89 | char_s = u"\u2606" 90 | if sys.stdout.encoding != 'UTF-8': 91 | char_s = "o" 92 | print() 93 | spinner = itertools.cycle(list(range(13)) + list(range(1, 14, 1))[::-1]) 94 | line2 = " Ctrl-C to abort; " 95 | try: 96 | while True: 97 | line = list(' | |') 98 | time.sleep(0.1) 99 | idx = int(next(spinner)) 100 | line[5 + idx] = char_s 101 | sys.stdout.write("".join(line)) 102 | sys.stdout.write(line2) 103 | sys.stdout.flush() 104 | sys.stdout.write('\b' * len(line) + '\b' * len(line2)) 105 | except: 106 | pass 107 | 108 | 109 | def complete_path(line): 110 | line = line.split() 111 | if len(line) < 2: 112 | filename = '' 113 | path = './' 114 | else: 115 | path = line[1] 116 | if '/' in path: 117 | i = path.rfind('/') 118 | filename = path[i + 1:] 119 | path = path[:i] 120 | else: 121 | filename = path 122 | path = './' 123 | ls = dircache.listdir(path) 124 | ls = ls[:] 125 | dircache.annotate(path, ls) 126 | if filename == '': 127 | return ls 128 | else: 129 | return [f for f in ls if f.startswith(filename)] 130 | -------------------------------------------------------------------------------- /easyaccess/eautils/fileio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Module for file input/output with pandas, fitsio, ... 4 | 5 | Some useful documentation: 6 | fitsio: https://github.com/esheldon/fitsio 7 | numpy: 8 | pandas: 9 | 10 | """ 11 | import os 12 | import datetime 13 | import numpy as np 14 | import pandas as pd 15 | import fitsio 16 | import easyaccess.eautils.dtypes as eatypes 17 | import easyaccess.version as version 18 | import sys 19 | 20 | try: 21 | from termcolor import colored 22 | except ImportError: 23 | def colored(line, color): return line 24 | 25 | PANDAS_DEFS = ('comma separated text', 'space separated tex', 'HDF5 format') 26 | PANDAS_EXTS = ('.csv', '.tab', '.h5') 27 | 28 | FITS_DEFS = ('FITS format',) 29 | FITS_EXTS = ('.fits',) 30 | 31 | GZIP_EXTS = ('.fits', '.csv', '.tab') 32 | 33 | FILE_DEFS = PANDAS_DEFS + FITS_DEFS 34 | FILE_EXTS = PANDAS_EXTS + FITS_EXTS 35 | 36 | 37 | def get_filename(line): 38 | """ 39 | Return filename after checking it has the right structure (no extra periods) 40 | """ 41 | line = line.replace(';', '') 42 | if line == "": 43 | print('\nMust include table filename!\n') 44 | return 45 | if line.find('.') == -1: 46 | print(colored('\nError in filename\n', "red")) 47 | return 48 | 49 | filename = "".join(line.split()) 50 | basename = os.path.basename(filename) 51 | alls = basename.split('.') 52 | if len(alls) > 2: 53 | # Oracle tables cannot contain a '.' 54 | print("\nDo not use extra '.' in filename\n") 55 | return 56 | 57 | return filename 58 | 59 | 60 | def get_chunksize(filename, memory=500): 61 | """ 62 | Get the approximate number of lines ot be read given memory constrains 63 | 64 | Parameters: 65 | ----------- 66 | filename : File name 67 | memory : Memory in MB to compute the approximate number of rows 68 | 69 | Returns: 70 | -------- 71 | The number of rows need to be read for each chunk of memory 72 | """ 73 | base, ext = os.path.splitext(filename) 74 | check_filetype(ext, FILE_EXTS) 75 | 76 | if ext in PANDAS_EXTS: 77 | if ext == '.csv': 78 | sepa = ',' 79 | elif ext == '.tab': 80 | sepa = None 81 | elif ext == '.h5': 82 | return IOError('\nReading HDF5 files by chunks is not supported yet\n') 83 | temp = pd.read_csv(filename, sep=sepa, nrows=100) 84 | bytes_per_row = temp.memory_usage(index=True).sum() / 100. 85 | del temp 86 | elif ext in FITS_EXTS: 87 | temp = fitsio.FITS(filename) 88 | temp_data = temp[1][0:100] 89 | bytes_per_row = temp_data.nbytes / 100. 90 | temp.close() 91 | del temp_data 92 | 93 | return int(memory * 1024**2 / bytes_per_row) 94 | 95 | 96 | def cutquery(query, length): 97 | """ 98 | Return query in a list of fixed sized character strings 99 | """ 100 | return [query[0 + i:length + i] for i in range(0, len(query), length)] 101 | 102 | 103 | def unrecognized_filetype(filename, types=None): 104 | """ 105 | Return message about unrecognized file type. 106 | 107 | Parameters: 108 | ----------- 109 | filename : File name (or extension) 110 | 111 | Returns: 112 | -------- 113 | msg : Unrecognized file message 114 | """ 115 | if types is None: 116 | types = FILE_EXTS 117 | # Try to split the filename 118 | base, ext = os.path.splitext(filename) 119 | # Also allow just the file extension 120 | if ext == '': 121 | ext = base 122 | 123 | msg = "Unrecognized file type: '%s'\n" % ext 124 | msg += "Supported filetypes:\n" 125 | msg += ' ' + ', '.join("'%s'" % t for t in types) 126 | return msg 127 | 128 | 129 | def check_filetype(filename, types=None): 130 | """ 131 | Check file extension against allowed types. 132 | 133 | Parameters: 134 | ----------- 135 | filename : Name (or extension) of file 136 | 137 | Returns: 138 | -------- 139 | True : (Or raises IOError) 140 | """ 141 | if types is None: 142 | types = FILE_EXTS 143 | # Try to split the filename 144 | base, ext = os.path.splitext(filename) 145 | # Also allow just the file extension 146 | if ext == '': 147 | ext = base 148 | 149 | if ext not in types: 150 | msg = unrecognized_filetype(ext, types) 151 | raise IOError(msg) 152 | else: 153 | return True 154 | 155 | 156 | def write_file(filename, data, desc, fileindex=1, mode='w', max_mb=1000, query='', comp=False): 157 | """ 158 | Write a pandas DataFrame to a file. Append to existing file as 159 | long as smaller than specified size. Create a new file (and 160 | increment fileindex) when file grows too large. 161 | 162 | 'fileindex' is 1-indexed for backwards compatibility 163 | 164 | Parameters: 165 | ----------- 166 | filename : Output base filename (incremented by 'fileindex') 167 | data : The DataFrame to write to the file 168 | desc : The Oracle data descriptor 169 | fileindex: The index of the file to write. 170 | mode : The write-mode: 'w'=write new file, 'a'=append to existing file 171 | max_mb : Maximum file size. 172 | query : Query used to generate data 173 | comp : Use compresion (gzip) 174 | 175 | Returns: 176 | fileindex: The (possibly incremented) fileindex. 177 | """ 178 | base, ext = os.path.splitext(filename) 179 | check_filetype(filename, FILE_EXTS) 180 | fileout = filename 181 | if comp and ext in GZIP_EXTS: 182 | fileout += '.gz' 183 | if mode == 'w': 184 | header = True 185 | if mode == 'a': 186 | if (fileindex == 1): 187 | thisfile = filename 188 | else: 189 | thisfile = base + '_%06d' % fileindex + ext 190 | if comp and ext in GZIP_EXTS: 191 | thisfile += '.gz' 192 | 193 | # check the size of the current file 194 | size = float(os.path.getsize(thisfile)) / (2. ** 20) 195 | 196 | if (size > max_mb): 197 | # it's time to increment the file 198 | if (fileindex == 1): 199 | # this is the first one ... it needs to be moved 200 | lastfile = base + '_%06d' % fileindex + ext 201 | if comp and ext in GZIP_EXTS: 202 | filename += '.gz' 203 | lastfile += '.gz' 204 | os.rename(filename, lastfile) 205 | 206 | # and make a new filename, after incrementing 207 | fileindex += 1 208 | 209 | thisfile = base + '_%06d' % fileindex + ext 210 | if comp and ext in GZIP_EXTS: 211 | thisfile += '.gz' 212 | fileout = thisfile 213 | mode = 'w' 214 | header = True 215 | else: 216 | fileout = thisfile 217 | header = False 218 | 219 | if ext in PANDAS_EXTS: 220 | write_pandas(fileout, data, fileindex, mode=mode, header=header, query=query, comp=comp) 221 | if ext in FITS_EXTS: 222 | write_fitsio(fileout, data, desc, fileindex, mode=mode, query=query, comp=comp) 223 | 224 | return fileindex 225 | 226 | 227 | def write_pandas(filename, df, fileindex, mode='w', header=True, query='', comp=False): 228 | """ 229 | Write a pandas DataFrame to a file. Accepted file extension are 230 | defined by 'PANDAS_EXTS'. 231 | 232 | Parameters: 233 | ----------- 234 | filename: Output filename: '.csv','.tab','.h5' 235 | df : DataFrame object 236 | fileindex: Index of this file (modifies filename based on maxfilesize) 237 | mode : Write mode: 'w'=write, 'a'=append 238 | header : Write header information 239 | 240 | Returns: 241 | -------- 242 | None 243 | """ 244 | base, ext = os.path.splitext(filename.replace('.gz','')) 245 | check_filetype(filename.replace('.gz',''), PANDAS_EXTS) 246 | # convert b to unicode (python 3) for convenience 247 | if sys.version_info[0] == 3: 248 | for col in df: 249 | if df[col].dtype == np.object: 250 | df[col] = df[col].str.decode('utf-8') 251 | if ext == '.csv': 252 | if comp: 253 | df.to_csv(filename, index=False, float_format='%.8f', sep=',', 254 | mode=mode, header=header,compression='gzip') 255 | else: 256 | df.to_csv(filename, index=False, float_format='%.8f', sep=',', 257 | mode=mode, header=header, encoding='utf-8') 258 | if ext == '.tab': 259 | if comp: 260 | df.to_csv(filename, index=False, float_format='%.8f', sep=' ', 261 | mode=mode, header=header,compression='gzip') 262 | else: 263 | df.to_csv(filename, index=False, float_format='%.8f', sep=' ', 264 | mode=mode, header=header, encoding='utf-8') 265 | if ext == '.h5': 266 | if mode == 'w': 267 | append = False 268 | else: 269 | append = True 270 | # get current index 271 | with pd.HDFStore(filename) as storage: 272 | try: 273 | nrows = storage.get_storer('data').nrows 274 | except: 275 | nrows = 0 276 | df.index = pd.Series(df.index) + nrows 277 | if comp: 278 | df.to_hdf(filename, 'data', mode=mode, format='t', append=append, 279 | data_columns=True, complevel=9, complib='bzip2') 280 | else: 281 | df.to_hdf(filename, 'data', mode=mode, format='t', append=append, 282 | data_columns=True) 283 | 284 | 285 | def write_fitsio(filename, df, desc, fileindex, mode='w', query='', comp=False): 286 | """ 287 | Write a pandas DataFrame to a FITS binary table using fitsio. 288 | 289 | It is necessary to convert the pandas.DataFrame to a numpy.array 290 | before writing, which leads to some hit in performance. 291 | 292 | Parameters: 293 | ----------- 294 | filename: Base output FITS filename (over-write if already exists). 295 | df : DataFrame object 296 | desc : Oracle descriptor object 297 | fileindex: Index of this file (modifies filename based on maxfilesize) 298 | mode : Write mode: 'w'=write, 'a'=append 299 | query : Query used to create file 300 | comp : Compression 301 | 302 | Returns: 303 | -------- 304 | None 305 | """ 306 | check_filetype(filename.replace('.gz',''), FITS_EXTS) 307 | # Create the proper recarray dtypes 308 | dtypes = [] 309 | for d in desc: 310 | name, otype = d[0:2] 311 | if otype == eatypes.or_ov: 312 | # Assume that Oracle OBJECTVARs are 'f8' 313 | # Could this be better addressed elsewhere? 314 | dtypes.append((name, 'f8', len(df[name].values[0]))) 315 | print(d, dtypes[-1]) 316 | elif otype == 'updated': 317 | dtypes.append((name, df[name].dtype.kind + str(df[name].dtype.itemsize))) 318 | else: 319 | dtypes.append((name, eatypes.oracle2fitsio(d))) 320 | 321 | # Create numpy array to write 322 | arr = np.zeros(len(df.index), dtype=dtypes) 323 | 324 | # fill array 325 | for d in desc: 326 | name, otype = d[0:2] 327 | if otype == eatypes.or_ov: 328 | arr[name] = np.array(df[name].values.tolist()) 329 | else: 330 | arr[name][:] = df[name].values 331 | 332 | # write or append... 333 | if mode == 'w': 334 | # assume that this is smaller than the max size! 335 | if os.path.exists(filename): 336 | os.remove(filename) 337 | fits = fitsio.FITS(filename, mode='rw') 338 | created = datetime.datetime.now().strftime('%Y-%b-%d %H:%M:%S') 339 | fits.write(arr) 340 | fits[1].write_history('Created by easyaccess ' + version.__version__ + ' on ' + created) 341 | fits[1].write_comment('Query = ' + query) 342 | fits.close() 343 | # fitsio.write(filename, arr, clobber=True) 344 | elif mode == 'a': 345 | # just append 346 | fits = fitsio.FITS(filename, mode='rw') 347 | fits[1].append(arr) 348 | fits.close() 349 | else: 350 | msg = "Illegal write mode!" 351 | raise Exception(msg) 352 | 353 | 354 | def read_file(filename): 355 | """ 356 | Read an input file with pandas or fitsio. 357 | 358 | Unfortunately, the conversion between pandas and numpy is too slow 359 | to put data into a consistent framework. 360 | 361 | Accepted file extensions are defined by 'FILE_EXTS'. 362 | 363 | Parameters: 364 | ---------- 365 | filename : Input filename 366 | 367 | Returns: 368 | -------- 369 | data : pandas.DataFrame or fitsio.FITS object 370 | """ 371 | base, ext = os.path.splitext(filename) 372 | check_filetype(ext, FILE_EXTS) 373 | 374 | if ext in PANDAS_EXTS: 375 | data = read_pandas(filename) 376 | elif ext in FITS_EXTS: 377 | data = read_fitsio(filename) 378 | else: 379 | raise IOError() 380 | return data 381 | 382 | 383 | def get_dtypes(df): 384 | """ 385 | Get dtypes from pandas DataFrame or fitsio.FITS 386 | """ 387 | if df.file_type == 'pandas': 388 | dtypes = [df[c].dtype if df[c].dtype.kind != 'O' 389 | else np.dtype('S' + str(max(df[c].str.len()))) 390 | for i, c in enumerate(df)] 391 | 392 | if df.file_type == 'fits': 393 | dtype = df[1].get_rec_dtype(vstorage='fixed')[0] 394 | dtypes = [dtype[i] for i, d in enumerate(dtype.descr)] 395 | return dtypes 396 | 397 | 398 | def read_pandas(filename): 399 | """ 400 | Read an input file into a pandas DataFrame. Accepted file 401 | extension defined by 'PANDAS_EXTS'. 402 | 403 | Parameters: 404 | ---------- 405 | filename : Input filename 406 | 407 | Returns: 408 | -------- 409 | df : pandas.DataFrame object 410 | """ 411 | # ADW: Pandas does a pretty terrible job of automatic typing 412 | base, ext = os.path.splitext(filename) 413 | check_filetype(filename, PANDAS_EXTS) 414 | 415 | try: 416 | if ext in ('.csv', '.tab'): 417 | if ext == '.csv': 418 | sepa = ',' 419 | if ext == '.tab': 420 | sepa = None 421 | df = pd.read_csv(filename, sep=sepa, iterator=True) 422 | iterator = True 423 | elif ext in ('.h5'): 424 | df = pd.read_hdf(filename, key='data') # iterator for hdf in padnas 0.18 425 | iterator = False 426 | except: 427 | msg = 'Problem reading %s\n' % filename 428 | raise IOError(msg) 429 | 430 | # Monkey patch to grab columns and values 431 | # List comprehension is faster but less readable 432 | # dtypes = [df[c].dtype if df[c].dtype.kind != 'O' 433 | # else np.dtype('S' + str(max(df[c].str.len()))) 434 | # for i, c in enumerate(df)] 435 | 436 | # df.ea_get_columns = df.columns.values.tolist 437 | # df.ea_get_values = df.values.tolist 438 | # df.ea_get_dtypes = lambda: dtypes 439 | df.file_type = 'pandas' 440 | 441 | return df, iterator 442 | 443 | 444 | def read_fitsio(filename): 445 | """Read an input FITS file into a numpy recarray. Accepted file 446 | extensions defined by 'FITS_EXTS'. 447 | 448 | Parameters: 449 | ---------- 450 | filename : Input filename 451 | 452 | Returns: 453 | -------- 454 | fits : fitsio.FITS object 455 | """ 456 | check_filetype(filename, FITS_EXTS) 457 | try: 458 | fits = fitsio.FITS(filename) 459 | except: 460 | msg = 'Problem reading %s\n' % filename 461 | raise IOError(msg) 462 | # Monkey patch to grab columns and values 463 | # dtype = fits[1].get_rec_dtype(vstorage='fixed')[0] 464 | # dtypes = [dtype[i] for i, d in enumerate(dtype.descr)] 465 | 466 | # fits.ea_get_columns = fits[1].get_colnames 467 | # fits.ea_get_values = fits[1].read().tolist 468 | # fits.ea_get_dtypes = lambda: dtypes 469 | fits.file_type = 'fits' 470 | 471 | # ## # Hack to just get a subset of columns 472 | # ## x1,x2 = 25,37 473 | # ## 474 | # ## fits.ea_get_columns = lambda: fits[1].get_colnames()[x1:x2] 475 | # print fits.ea_get_columns() 476 | # ## fits.ea_get_values = lambda: fits[1].read(columns=fits.ea_get_columns()).tolist() 477 | # ## fits.ea_get_dtypes = lambda: dtypes[x1:x2] 478 | 479 | return fits, True 480 | 481 | 482 | if __name__ == "__main__": 483 | import argparse 484 | 485 | description = __doc__ 486 | parser = argparse.ArgumentParser(description=description) 487 | args = parser.parse_args() 488 | -------------------------------------------------------------------------------- /easyaccess/eautils/fun_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import inspect 3 | import re 4 | from functools import wraps 5 | import importlib 6 | import numpy as np 7 | import pandas as pd 8 | 9 | 10 | try: 11 | from termcolor import colored 12 | except ImportError: 13 | def colored(line, color): 14 | return line 15 | 16 | 17 | def init_func(): 18 | global ea_func_dictionary 19 | ea_func_dictionary = {} 20 | 21 | 22 | def toeasyaccess(custom): 23 | @wraps(custom) 24 | def easy_function(*args, **kwargs): 25 | check = inspect.getargspec(custom) 26 | nargs = len(check.args) 27 | if check.defaults is not None: 28 | ndef = len(check.defaults) 29 | else: 30 | ndef = 0 31 | return custom(*args, **kwargs) 32 | 33 | check = inspect.getargspec(custom) 34 | try: 35 | n_def = len(check.defaults) 36 | except: 37 | n_def = 0 38 | nargs_p = len(check.args) - n_def 39 | head = [] 40 | for j, ag in enumerate(check.args): 41 | if j < nargs_p: 42 | head.append(ag) 43 | else: 44 | head.append(ag + '=' + str(check.defaults[j - nargs_p])) 45 | temp = easy_function 46 | temp.__doc1__ = '(' + ', '.join(head) + ')' 47 | temp.in_easyaccess = True 48 | temp.__doc__ = custom.__doc__ 49 | 50 | return temp 51 | 52 | 53 | def parseQ(query, myglobals=None): 54 | entries = re.findall('/\*p:(.*?)\*/', query) 55 | funs = None 56 | args = None 57 | names = None 58 | nf = 0 59 | if len(entries) > 0: 60 | funs = [] 61 | args = [] 62 | names = [] 63 | for e in entries: 64 | try: 65 | name = e.split()[e.split().index('as') + 1] 66 | except: 67 | name = None 68 | temp = "".join(e.split()) 69 | f = temp[:temp.find('(')] 70 | if name is None: 71 | name = f.lower() 72 | ar = temp[temp.find('(') + 1:temp.find(')')] 73 | funs.append(f) # f.lower() 74 | all_args = ar.split(',') 75 | positional = [] 76 | optional = [] 77 | new = [] 78 | for a in all_args: 79 | if a.find('=') > -1: 80 | optional.append(a) 81 | new.append(a) 82 | else: 83 | positional.append(a) 84 | args.append([new, len(positional)]) 85 | names.append(name) 86 | b = [j + ' as F' + str(nf) + 'arg' + str(i) for i, j in enumerate(positional)] 87 | query = query.replace('/*p:' + e + '*/', ",".join(b)) 88 | nf += 1 89 | for f in funs: 90 | modname = f 91 | if f.find('.') > -1: 92 | modname, func_name = f.split('.') 93 | try: 94 | if myglobals is None: 95 | _ = ea_func_dictionary[f] 96 | except: 97 | print(colored("\n\nYou might need to import %s" % modname, "red")) 98 | raise 99 | return query, funs, args, names 100 | 101 | 102 | def updateDF(D, f, a, n, idx, myglobals=None): 103 | """ 104 | Updates a data frame in place. 105 | """ 106 | # ii = np.where(D.columns.values == 'F' + str(idx) + 'ARG0')[0][0] 107 | ii = D.columns.get_loc('F' + str(idx) + 'ARG0') 108 | func = f[idx] 109 | if func.find('.') > -1: 110 | modname, func_name = func.split('.') 111 | try: 112 | if myglobals is not None: 113 | HM = myglobals[modname] 114 | else: 115 | HM = ea_func_dictionary[func] 116 | except: 117 | print(colored("\n\nYou might need to import %s" % modname, "red")) 118 | raise 119 | if myglobals is not None: 120 | H = getattr(HM, func_name) 121 | else: 122 | H = HM 123 | else: 124 | if myglobals is not None: 125 | H = globals()[func] 126 | else: 127 | H = ea_func_dictionary[func] 128 | args = [] 129 | kwargs = {} 130 | for j in range(a[idx][1]): 131 | args.append(D['F' + str(idx) + 'ARG' + str(j)]) 132 | for sa in a[idx][0]: 133 | key, value = sa.split('=') 134 | kwargs[key] = value 135 | temp = H(*args, **kwargs) 136 | D.insert(ii, n[idx].upper(), temp) 137 | for j in range(a[idx][1]): 138 | D.drop('F' + str(idx) + 'ARG' + str(j), 1, inplace=True) 139 | return D 140 | -------------------------------------------------------------------------------- /easyaccess/eautils/import_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import easyaccess.eautils.fun_utils as fun_utils 3 | from inspect import getmembers, isfunction 4 | try: 5 | from termcolor import colored 6 | except ImportError: 7 | def colored(line, color): return line 8 | 9 | 10 | def print_exception(): 11 | (type, value, traceback) = sys.exc_info() 12 | print() 13 | print(colored(type, "red")) 14 | print(colored(value, "red")) 15 | print() 16 | 17 | 18 | class Import(object): 19 | def do_import(self, line): 20 | """ 21 | Use to import modules to call functions inline query, 22 | similar to the import module in python. 23 | 24 | Use only to import modules directly, like: 25 | DESDB ~> import module 26 | or 27 | DESDB ~> import module as name 28 | 29 | Functions inside module need to be wrapped for easyaccess, like 30 | 31 | from eautils.fun_utils import toeasyaccess 32 | 33 | @toeasyaccess 34 | def my_func(a,b): 35 | ... 36 | return column 37 | """ 38 | 39 | line.replace(';', '') 40 | line = ' '.join(line.split()) 41 | line = line.split() 42 | if len(line) == 3 and line[1] == 'as': 43 | mod = line[0] 44 | modname = line[-1] 45 | elif len(line) == 1: 46 | mod = line[0] 47 | modname = line[0] 48 | else: 49 | print(colored('Use: import module OR import module as name', "red")) 50 | return 51 | command = modname + ' = importlib.import_module(\'' + mod + '\')' 52 | if modname in globals().keys(): 53 | try: 54 | exec('reload(' + modname + ')', globals()) 55 | except NameError: 56 | exec('importlib.reload(' + modname + ')', globals()) 57 | try: 58 | exec(command, globals()) 59 | func_list = [f for f in getmembers(globals()[modname]) if ( 60 | isfunction(f[1]) and hasattr(f[1], 'in_easyaccess'))] 61 | if len(func_list) > 0: 62 | print(colored("The following functions are accessible by easyaccess", "green")) 63 | print(colored("i.e., they are wrapped with @toeasyaccess", "green")) 64 | print('') 65 | for f in func_list: 66 | print(' ' + modname + '.' + f[0] + '()') 67 | fun_utils.ea_func_dictionary[modname + '.' + f[0]] = f[1] 68 | else: 69 | print(colored("No function wrapped for easyaccess was found in " + modname, "red")) 70 | print(colored("See documentation to see how to wrap functions", "red")) 71 | except: 72 | print_exception() 73 | return 74 | 75 | def do_help_function(self, line): 76 | """ 77 | Print help from a loaded external function wrapped by @toeasyaccess 78 | It uses autocompletion 79 | 80 | Use: DESDB ~> help_function function 81 | 82 | Use: DESDB ~> help_function all 83 | To list all loaded functions 84 | """ 85 | line = line.replace(';', '') 86 | line = line.replace('()', '') 87 | if line.split() == []: 88 | return self.do_help('help_function') 89 | function = line.split()[0] 90 | if function.lower() == 'all': 91 | print("\nThese are the loaded functions for easyaccess:\n") 92 | for k in fun_utils.ea_func_dictionary.keys(): 93 | print(' ' + k) 94 | return 95 | if function not in fun_utils.ea_func_dictionary.keys(): 96 | print(colored("\nFunction {0} is not loaded, please import module " 97 | "(check help import for more info)\n".format(function), "red")) 98 | return 99 | else: 100 | print("\nHelp for {0}:\n".format(function)) 101 | func = fun_utils.ea_func_dictionary[function] 102 | print(function + func.__doc1__) 103 | print(func.__doc__) 104 | 105 | def complete_help_function(self, text, line, start_index, end_index): 106 | if text: 107 | return [function for function in fun_utils.ea_func_dictionary.keys() 108 | if function.startswith(text)] 109 | else: 110 | return fun_utils.ea_func_dictionary.keys() 111 | -------------------------------------------------------------------------------- /easyaccess/eautils/python_api.py: -------------------------------------------------------------------------------- 1 | from easyaccess.easyaccess import easy_or 2 | import easyaccess.config_ea as config_mod 3 | import easyaccess.eautils.fileio as eafile 4 | import easyaccess.eautils.fun_utils as fun_utils 5 | from easyaccess.eautils.ea_utils import desfile, config_file, colored, read_buf 6 | import pandas as pd 7 | import getpass 8 | 9 | try: 10 | from builtins import input, str, range 11 | except ImportError: 12 | from __builtin__ import input, str, range 13 | 14 | 15 | class IterData(object): 16 | """ 17 | Iterator class for cx_oracle 18 | """ 19 | 20 | def __init__(self, cursor, extra_func=None): 21 | self.rows_count = 0 22 | self.cursor = cursor 23 | self.extra_func = extra_func 24 | self.data = pd.DataFrame(self.cursor.fetchmany(), columns=[ 25 | rec[0] for rec in self.cursor.description]) 26 | if self.extra_func is not None and not self.data.empty: 27 | funs, args, names = self.extra_func 28 | for kf in range(len(funs)): 29 | self.data = fun_utils.updateDF( 30 | self.data, funs, args, names, kf) 31 | 32 | def __iter__(self): 33 | return self 34 | 35 | def next(self): 36 | if not self.data.empty: 37 | data = self.data 38 | self.rows_count += len(data) 39 | self.data = pd.DataFrame(self.cursor.fetchmany(), columns=[ 40 | rec[0] for rec in self.cursor.description]) 41 | if self.extra_func is not None and not self.data.empty: 42 | funs, args, names = self.extra_func 43 | for kf in range(len(funs)): 44 | self.data = fun_utils.updateDF( 45 | self.data, funs, args, names, kf) 46 | return data 47 | else: 48 | self.cursor.close() 49 | raise StopIteration('No more data in the DB') 50 | 51 | 52 | def to_pandas(cur): 53 | """ 54 | Returns a pandas DataFrame from a executed query 55 | """ 56 | if cur.description is not None: 57 | data = pd.DataFrame(cur.fetchall(), columns=[ 58 | rec[0] for rec in cur.description]) 59 | else: 60 | data = "" 61 | return data 62 | 63 | 64 | class connect(easy_or): 65 | def __init__(self, section='', user=None, passwd=None, quiet=False, refresh=False): 66 | """ 67 | Creates a connection to the DB as easyaccess commands, section is 68 | obtained from config file, can be bypass here, e.g., section = desoper 69 | 70 | Parameters: 71 | ----------- 72 | section : DB connection : dessci, desoper, destest 73 | user : Manualy use username 74 | passwd : password for username (if not enter is prompted) 75 | quiet : Don't print much 76 | 77 | Returns: 78 | -------- 79 | easy_or object 80 | """ 81 | self.quiet = quiet 82 | conf = config_mod.get_config(config_file) 83 | self.conf = conf 84 | pd.set_option('display.max_rows', conf.getint('display', 'max_rows')) 85 | pd.set_option('display.width', conf.getint('display', 'width')) 86 | pd.set_option('display.max_columns', 87 | conf.getint('display', 'max_columns')) 88 | pd.set_option('display.max_colwidth', 89 | conf.getint('display', 'max_colwidth')) 90 | if section == '': 91 | db = conf.get('easyaccess', 'database') 92 | else: 93 | db = section 94 | if user is not None: 95 | print('Bypassing .desservices file with user : %s' % user) 96 | if passwd is None: 97 | passwd = getpass.getpass(prompt='Enter password : ') 98 | desconf = config_mod.get_desconfig(desfile, db, 99 | verbose=False, user=user, pw1=passwd) 100 | desconf.set('db-' + db, 'user', user) 101 | desconf.set('db-' + db, 'passwd', passwd) 102 | else: 103 | desconf = config_mod.get_desconfig(desfile, db) 104 | easy_or.__init__(self, conf, desconf, db, interactive=False, quiet=quiet, pymod=True) 105 | try: 106 | self.cur.execute('create table FGOTTENMETADATA (ID int)') 107 | except: 108 | pass 109 | self.loading_bar = False 110 | 111 | def cursor(self): 112 | cursor = self.con.cursor() 113 | cursor.arraysize = int(self.prefetch) 114 | return cursor 115 | 116 | def ping(self, quiet=None): 117 | if quiet is None: 118 | quiet = self.quiet 119 | try: 120 | self.con.ping() 121 | if not quiet: 122 | print('Still connected to DB') 123 | return True 124 | except: 125 | if not quiet: 126 | print('Connection with DB lost') 127 | return False 128 | 129 | def close(self): 130 | self.con.close() 131 | 132 | def ea_import(self, import_line='', help=False): 133 | """ 134 | Executes a import of module with functions to be used for inline query functions, 135 | checks whether function is wrapped @toeasyaccess and add module to library. 136 | 137 | Parameters: 138 | ----------- 139 | import_line : the usual line after import. 140 | help : Print current loaded functions wrapped for easyaccess 141 | 142 | 143 | Use: 144 | ---- 145 | ea_import('module as name') 146 | ea_import('my_module') 147 | 148 | Returns: 149 | -------- 150 | 151 | Add functions from module to internal library to be used inline queries 152 | """ 153 | 154 | if help: 155 | self.do_help_function('all') 156 | return True 157 | if import_line != '': 158 | self.do_import(' ' + import_line) 159 | return True 160 | 161 | def query_to_pandas(self, query, prefetch='', iterator=False): 162 | """ 163 | Executes a query and return the results in pandas DataFrame. If result is too big 164 | it is better to save results to a file 165 | 166 | Parameters: 167 | ----------- 168 | query : The SQL query to be executed 169 | prefetch : Number of rows to retrieve at each trip to the DB 170 | iterator : Return interator, get data with .next() method (to avoid get all data at once) 171 | 172 | Returns: 173 | -------- 174 | If iterator is False (default) the function returns a pandas DataFrame 175 | with the result of the query. If the iterator is True, it will return an iterator 176 | to retrieve data one piece at a time. 177 | """ 178 | cursor = self.con.cursor() 179 | cursor.arraysize = int(self.prefetch) 180 | if prefetch != '': 181 | cursor.arraysize = int(prefetch) 182 | query = query.replace(';', '') 183 | query, funs, args, names = fun_utils.parseQ(query, myglobals=globals()) 184 | extra_func = [funs, args, names] 185 | if funs is None: 186 | extra_func = None 187 | temp = cursor.execute(query) 188 | if temp.description is not None: 189 | if iterator: 190 | data = IterData(temp, extra_func) 191 | else: 192 | data = pd.DataFrame(temp.fetchall(), columns=[rec[0] for rec in temp.description]) 193 | if extra_func is not None: 194 | for kf in range(len(funs)): 195 | data = fun_utils.updateDF(data, funs, args, names, kf) 196 | else: 197 | data = "" 198 | if not iterator: 199 | cursor.close() 200 | return data 201 | 202 | def describe_table(self, tablename): 203 | """ 204 | Describes a table from the DB 205 | """ 206 | return self.do_describe_table(tablename, False, return_df=True) 207 | 208 | def loadsql(self, filename): 209 | """ 210 | Reads sql statement from a file, returns query to be parsed in 211 | query_and_save, query_to_pandas, etc. 212 | """ 213 | query = read_buf(filename) 214 | if query.find(';') > -1: 215 | query = query.split(';')[0] 216 | return query 217 | 218 | def mytables(self): 219 | """ 220 | List tables in own schema 221 | 222 | Returns: 223 | -------- 224 | A pandas dataframe with a list of owner's tables 225 | """ 226 | return self.do_mytables('', return_df=True, extra='') 227 | 228 | def myquota(self): 229 | """ 230 | Show quota in current database 231 | """ 232 | self.do_myquota('') 233 | 234 | def load_table(self, table_file, name=None, chunksize=None, memsize=None): 235 | """ 236 | Loads and create a table in the DB. If name is not passed, is taken from 237 | the filename. Formats supported are 'fits', 'csv' and 'tab' files 238 | 239 | Parameters: 240 | ----------- 241 | table_file : Filename to be uploaded as table (.csv, .fits, .tab) 242 | name : Name of the table to be created 243 | chunksize : Number of rows to upload at a time to avoid memory issues 244 | memsize : Size of chunk to be read. In Mb. 245 | If both specified, the lower number of rows is selected 246 | 247 | Returns: 248 | -------- 249 | True if success otherwise False 250 | 251 | """ 252 | try: 253 | self.do_load_table(table_file, name=name, chunksize=chunksize, memsize=memsize) 254 | return True 255 | except: 256 | # exception 257 | return False 258 | 259 | def append_table(self, table_file, name=None, chunksize=None, memsize=None): 260 | """ 261 | Appends data to a table in the DB. If name is not passed, is taken from 262 | the filename. Formats supported are 'fits', 'csv' and 'tab' files 263 | 264 | Parameters: 265 | ----------- 266 | table_file : Filename to be uploaded as table (.csv, .fits, .tab) 267 | name : Name of the table to be created 268 | chunksize : Number of rows to upload at a time to avoid memory issues 269 | memsize : Size of chunk to be read. In Mb. 270 | If both specified, the lower number of rows is selected 271 | 272 | Returns: 273 | -------- 274 | True if success otherwise False 275 | """ 276 | try: 277 | self.do_append_table(table_file, name=name, chunksize=chunksize, memsize=memsize) 278 | return True 279 | except: 280 | return False 281 | 282 | def find_tables(self, pattern=''): 283 | """ 284 | Lists tables and views matching an oracle pattern. 285 | 286 | Parameters: 287 | ----------- 288 | pattern : The patter to search tables for, e.g. Y1A1_GOLD 289 | 290 | Returns: 291 | -------- 292 | A pandas DataFram with the owners and table names. To select from a table use 293 | owner.table_name, is owner is DES_ADMIN just use table_name 294 | """ 295 | pattern = pattern.replace('%', '') 296 | pattern = ''.join(pattern.split()) 297 | pattern = "%" + pattern + "%" 298 | return self.do_find_tables(pattern, extra='', return_df=True) 299 | 300 | def pandas_to_db(self, df, tablename=None, append=False): 301 | """ Writes a pandas DataFrame directly to the DB 302 | 303 | Parameters: 304 | ----------- 305 | df : The DataFrame to be loaded to the DB 306 | tablename : The name of the table to be created 307 | append : Set True if appending to existing table, if table doesn't exists it is created 308 | 309 | 310 | Returns: 311 | -------- 312 | True or False depending on the success 313 | """ 314 | if tablename is None: 315 | print("Please indicate a tablename to be ingested in the DB") 316 | return False 317 | if self.check_table_exists(tablename) and not append: 318 | print( 319 | colored('\n Table already exists. Table can be removed with:', 'red', self.ct)) 320 | print(colored(' DESDB ~> DROP TABLE %s;\n' % 321 | tablename.upper(), 'red', self.ct)) 322 | return False 323 | df.file_type = 'pandas' 324 | if len(df) == 0: 325 | print('DataFrame is empty') 326 | return False 327 | dtypes = eafile.get_dtypes(df) 328 | columns = df.columns.values.tolist() 329 | values = df.values.tolist() 330 | if not self.check_table_exists(tablename): 331 | if append: 332 | print('Table does not exist. Creating table\n') 333 | self.create_table(tablename, columns, dtypes) 334 | self.insert_data(tablename, columns, values, dtypes) 335 | return True 336 | -------------------------------------------------------------------------------- /easyaccess/version.py: -------------------------------------------------------------------------------- 1 | """easyaccess version""" 2 | 3 | import logging 4 | import warnings 5 | from datetime import datetime 6 | 7 | warnings.filterwarnings("ignore") 8 | 9 | 10 | def last_pip_version(): 11 | import requests 12 | 13 | logging.getLogger("requests").setLevel(logging.WARNING) 14 | """ 15 | Return last available version of easyaccess from pypi 16 | """ 17 | url = "https://pypi.python.org/pypi/%s/json" % ("easyaccess",) 18 | data = requests.get(url, verify=False).json() 19 | uploads = [] 20 | for k in data["releases"].keys(): 21 | try: 22 | up_time = data["releases"][k][0]["upload_time"] 23 | uploads.append([k, datetime.strptime(up_time, "%Y-%m-%dT%H:%M:%S")]) 24 | except: 25 | pass 26 | return sorted(uploads, key=lambda x: x[1])[-1][0] 27 | 28 | 29 | version_tag = (1, 4, 8, 'dev') 30 | __version__ = ".".join(map(str, version_tag[:3])) 31 | 32 | if len(version_tag) > 3: 33 | __version__ = "%s-%s" % (__version__, version_tag[3]) 34 | -------------------------------------------------------------------------------- /paper/classes_simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mgckind/easyaccess/c1cf082123094ec6435fb989d863825d294e461e/paper/classes_simple.png -------------------------------------------------------------------------------- /paper/easyaccess_users.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mgckind/easyaccess/c1cf082123094ec6435fb989d863825d294e461e/paper/easyaccess_users.png -------------------------------------------------------------------------------- /paper/easyaccess_welcome.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mgckind/easyaccess/c1cf082123094ec6435fb989d863825d294e461e/paper/easyaccess_welcome.png -------------------------------------------------------------------------------- /paper/paper.bib: -------------------------------------------------------------------------------- 1 | @book{NumPy, 2 | place={USA}, 3 | title={A guide to NumPy}, 4 | publisher={Trelgol Publishing}, 5 | author={Oliphant, Travis E}, 6 | year={2006} 7 | } 8 | 9 | %pandas citation (per their instructions) 10 | @InProceedings{pandas, 11 | author = {Wes McKinney}, 12 | title = {Data Structures for Statistical Computing in Python}, 13 | booktitle = {Proceedings of the 9th Python in Science Conference}, 14 | pages = {51 - 56}, 15 | year = {2010}, 16 | editor= {St\'efan van der Walt and Jarrod Millman} 17 | } 18 | 19 | @Book{h5py, 20 | keywords = {python, hdf5}, 21 | year = {2013}, 22 | publisher = {O'Reilly}, 23 | title = {Python and HDF5}, 24 | author = {Andrew Collette} 25 | } 26 | 27 | @misc{PyTables, 28 | author = {PyTables Developers Team}, 29 | title = {{PyTables}: Hierarchical Datasets in {Python}}, 30 | year = {2002-2018}, 31 | url = "http://www.pytables.org/" 32 | } 33 | 34 | @misc{requests, 35 | author = {Kenneth Reitz}, 36 | title = {Requests: HTTP for Humans}, 37 | year = {2012-2018}, 38 | url = "http://docs.python-requests.org/" 39 | } 40 | 41 | @misc{fitsio, 42 | author = {Sheldon, Erin}, 43 | title = {fitsio}, 44 | year = {2018}, 45 | publisher = {GitHub}, 46 | journal = {GitHub repository}, 47 | howpublished = {\url{https://github.com/esheldon/fitsio}} 48 | } 49 | 50 | @conference{jupyter, 51 | Author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando P{\'e}rez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica 52 | Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Dami{\'a}n Avila and Safia Abdalla and Carol Willing}, 53 | Booktitle = {Positioning and Power in Academic Publishing: Players, Agents and Agendas}, 54 | Editor = {F. Loizides and B. Schmidt}, 55 | Organization = {IOS Press}, 56 | Pages = {87 - 90}, 57 | Title = {Jupyter Notebooks -- a publishing format for reproducible computational workflows}, 58 | Year = {2016}} 59 | 60 | 61 | 62 | @ONLINE{hdf5, 63 | author = {{The HDF Group}}, 64 | title = "{Hierarchical Data Format, version 5}", 65 | year = {1997-2018}, 66 | note = {http://www.hdfgroup.org/HDF5/} 67 | } 68 | 69 | @ARTICLE{FITS, 70 | author = {{Wells}, D.~C. and {Greisen}, E.~W. and {Harten}, R.~H.}, 71 | title = "{FITS - a Flexible Image Transport System}", 72 | journal = {\aaps}, 73 | year = 1981, 74 | month = jun, 75 | volume = 44, 76 | pages = {363}, 77 | adsurl = {http://adsabs.harvard.edu/abs/1981A%26AS...44..363W}, 78 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 79 | } 80 | 81 | 82 | @misc{termcolor, 83 | author = {Lepa, Konstantin}, 84 | title = {termcolorr}, 85 | year = {2018}, 86 | publisher = {PyPi}, 87 | journal = {PyPi Repository}, 88 | howpublished = {\url{https://pypi.org/project/termcolor/}} 89 | } 90 | 91 | @misc{cxoracle, 92 | author = {{Oracle Corp.}}, 93 | title = {cxOracle}, 94 | year = {2018}, 95 | publisher = {GitHub}, 96 | journal = {GitHub repository}, 97 | howpublished = {\url{https://github.com/oracle/python-cx_Oracle}} 98 | } 99 | 100 | 101 | @ARTICLE{DES2005, 102 | author = {{DES Collaboration}}, 103 | title = "{The Dark Energy Survey}", 104 | journal = {ArXiv Astrophysics e-prints}, 105 | eprint = {astro-ph/0510346}, 106 | keywords = {Astrophysics}, 107 | year = 2005, 108 | month = oct, 109 | adsurl = {http://adsabs.harvard.edu/abs/2005astro.ph.10346T}, 110 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 111 | } 112 | 113 | @ARTICLE{DES2016, 114 | author = {{DES Collaboration}}, 115 | title = "{The Dark Energy Survey: more than dark energy - an overview}", 116 | journal = {MNRAS}, 117 | archivePrefix = "arXiv", 118 | eprint = {1601.00329}, 119 | keywords = {surveys, minor planets, asteroids: general, supernovae: general, Galaxy: general, galaxies: general, quasars: general}, 120 | year = 2016, 121 | month = aug, 122 | volume = 460, 123 | pages = {1270-1299}, 124 | doi = {10.1093/mnras/stw641}, 125 | adsurl = {http://adsabs.harvard.edu/abs/2016MNRAS.460.1270D}, 126 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 127 | } 128 | 129 | @ARTICLE{DR1, 130 | author = {{DES Collaboration}}, 131 | title = "{The Dark Energy Survey: Data Release 1}", 132 | journal = {\apjs}, 133 | archivePrefix = "arXiv", 134 | eprint = {1801.03181}, 135 | primaryClass = "astro-ph.IM", 136 | keywords = {astronomical databases: miscellaneous, catalogs, cosmology: observations, surveys, techniques: image processing, techniques: photometric }, 137 | year = 2018, 138 | month = dec, 139 | volume = 239, 140 | eid = {18}, 141 | pages = {18}, 142 | doi = {10.3847/1538-4365/aae9f0}, 143 | adsurl = {http://adsabs.harvard.edu/abs/2018ApJS..239...18A}, 144 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 145 | } 146 | -------------------------------------------------------------------------------- /paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'easyaccess: Enhanced SQL command line interpreter for astronomical surveys' 3 | pubnumber: FERMILAB-PUB-18-520-AE 4 | tags: 5 | - Python 6 | - Astronomy 7 | - SQL 8 | - Surveys 9 | authors: 10 | - name: Matias Carrasco Kind 11 | orcid: 0000-0002-4802-3194 12 | affiliation: 1 13 | - name: Alex Drlica-Wagner 14 | orcid: 0000-0001-8251-933X 15 | affiliation: 2 16 | - name: Audrey Koziol 17 | orcid: 0000-0001-8234-2116 18 | affiliation: 1 19 | - name: Don Petravick 20 | orcid: 0000-0002-3685-2497 21 | affiliation: 1 22 | affiliations: 23 | - name: National Center for Supercomputing Applications, University of Illinois at Urbana-Champaign. 1205 W Clark St, Urbana, IL USA 61801 24 | index: 1 25 | - name: Fermi National Accelerator Laboratory, P. O. Box 500, Batavia,IL 60510, USA 26 | index: 2 27 | date: 27 Sep 2018 28 | bibliography: paper.bib 29 | --- 30 | 31 | 32 | # Summary 33 | 34 | `easyaccess` is an enhanced command line interpreter and Python package created to facilitate access to astronomical catalogs stored in SQL Databases. It provides a custom interface with custom commands and was specifically designed to access data from the [Dark Energy Survey](https://www.darkenergysurvey.org/) Oracle database, although it can easily be extended to another survey or SQL database. The package was completely written in [Python](https://www.python.org/) and supports customized addition of commands and functionalities. 35 | Visit [https://github.com/mgckind/easyaccess](https://github.com/mgckind/easyaccess) to view installation instructions, tutorials, and the Python source code for `easyaccess`. 36 | 37 | # The Dark Energy Survey 38 | 39 | The Dark Energy Survey (DES) [@DES2005; @DES2016] is an international, collaborative effort of over 500 scientists from 26 institutions in seven countries. The primary goals of DES are to reveal the nature of the mysterious dark energy and dark matter by mapping hundreds of millions of galaxies, detecting thousands of supernovae, and finding patterns in the large-scale structure of the Universe. Survey operations began on on August 31, 2013 and will conclude in early 2019. For about 500 nights, DES has been taking thousands of deep images of southern sky, which are transferred and processed at the National Center for Supercomputing Applications ([NCSA](http://www.ncsa.illinois.edu/)). The images are processed to produce catalogs of astronomical sources with hundreds of millions of entries (billions in the case of individual detections), describing the sources found within the images and other relevant metadata. 40 | A significant subset of the DES data was recently [made public](https://des.ncsa.illinois.edu/releases/dr1) [@DR1] and can be accessed through several mechanisms including `easyaccess` and [web interfaces](https://des.ncsa.illinois.edu/easyweb/) that run `easyaccess` as a backend. This public release includes information for almost 400 million astrophysical sources and complementary tables to allow scientific analysis. 41 | 42 | ## DES users 43 | 44 | The first release of `easyaccess` was on February 17th, 2015 and since then, over 300 DES Collaborators have used it to access the DES databases (Figure 1). We note that roughly 800 DES accounts exist, but this includes all database users including those that created accounts before the release of `easyaccess`. In August 2018 (version 1.4.4), we added support for the public DES data release, and since then we have increased the number of public users. 45 | 46 | ![Number of user since first version](easyaccess_users.png) 47 | 48 | # `easyaccess` 49 | 50 | `easyaccess` is a command line interpreter that is heavily based on `termcolor` [@termcolor] and the [`cmd`](https://docs.python.org/3/library/cmd.html) Python core module. It interfaces with `cx_Oracle` [@cxoracle] to communicate with Oracle, `requests` [@requests] for external URL requests, and other external open source libraries, including NumPy [@NumPy], `pandas` [@pandas], `fitsio` [@fitsio] and `h5py` [@h5py] to handle and transform array data. 51 | Figure 2 shows an example of the welcome screen as seen by a DES user. 52 | 53 | ![Welcome screenshot](easyaccess_welcome.png) 54 | 55 | ## Features 56 | 57 | `easyaccess` has a variety of features including a history of past commands and smart tab auto-completion for commands, functions, columns, users, tables, and paths. Tables can be written directly into comma-separated-value (CSV) or white-space separated text files, FITS [@FITS] files, and HDF5 [@hdf5] files. It provides an iteration scheme to avoid memory constraints when retrieving large tables. Tables can also be displayed on the command line and most of the formatting is done using `pandas`. Similarly, privileged users can easily upload tables to the database from any of the file format described above in order to share data with other users. The uploading mechanism is done chunk-wise, allowing large tables to be loaded while keeping memory usage low. 58 | 59 | In addition, there are a variety of customized functions to search and describe the tables, search for users and user tables, check quota usage, check the Oracle execution plan, and soon the ability to run asynchronous jobs through a dedicated server. There are dozens of other minor features that allow for a seamless experience while exploring and discovering data within the hundreds of tables inside the DB. 60 | 61 | One can also load SQL queries from a file into the database, or run SQL queries inside the `easyaccess` python module in another IDE. Most of the features are also exposed through a Python API and can be run inside a Jupyter [@jupyter] notebook or similar tool alongside scientific analyses. 62 | 63 | Users can also use `easyaccess` to submit and request cutouts around specific positions or objects which are generated from the images. This allows better integration with other data services for a richer scientific workflow. 64 | 65 | 66 | ## Installation 67 | 68 | API documentation and installation instructions are available on the [online documentation](http://matias-ck.com/easyaccess/) or and the [Readme](https://github.com/mgckind/easyaccess) file on the repository 69 | 70 | 71 | # Acknowledgments 72 | 73 | The DES Data Management System is supported by the National Science Foundation under Grant NSF AST 07-15036 and NSF AST 08-13543. 74 | 75 | # References 76 | -------------------------------------------------------------------------------- /paper/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mgckind/easyaccess/c1cf082123094ec6435fb989d863825d294e461e/paper/paper.pdf -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | try: 4 | from setuptools import setup, find_packages 5 | except ImportError: 6 | from distutils.core import setup 7 | 8 | prjdir = os.path.dirname(__file__) 9 | __version__ = '' 10 | 11 | 12 | def read(filename): 13 | return open(os.path.join(prjdir, filename)).read() 14 | 15 | 16 | exec(open('easyaccess/version.py').read()) 17 | 18 | 19 | if sys.argv[-1] == 'publish': 20 | os.system("python setup.py sdist upload") 21 | os.system("python setup.py bdist_wheel --universal upload ") 22 | print("You probably want to also tag the version now:") 23 | print(" git tag -a %s -m 'version %s'" % (__version__, __version__)) 24 | print(" git push --tags") 25 | sys.exit() 26 | 27 | extra_link_args = [] 28 | libraries = [] 29 | library_dirs = [] 30 | include_dirs = [] 31 | try: 32 | pkgs = find_packages() 33 | except NameError: 34 | pkgs = ['easyaccess', 'easyaccess.eautils', 'tests'] 35 | setup( 36 | name='easyaccess', 37 | version=__version__, 38 | author='Matias Carrasco Kind', 39 | author_email='mcarras2@illinois.edu', 40 | scripts=['bin/easyaccess'], 41 | packages=pkgs, 42 | license='LICENSE.txt', 43 | description='Easy access to the DES DB. Enhanced command line SQL interpreter client for DES', 44 | long_description=read('README.md'), 45 | url='https://github.com/des-labs/easyaccess', 46 | install_requires=['pandas >= 0.14', 'termcolor', 'fitsio == 1.0.5', 'setuptools', 47 | 'cx_Oracle == 8.0.1', 'numpy == 1.19.2', 'future >= 0.15.0', 'requests'], 48 | ) 49 | -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import unittest 3 | import easyaccess as ea 4 | import numpy as np 5 | import pandas as pd 6 | import os 7 | import fitsio 8 | 9 | 10 | def create_test_data(): 11 | r = np.linspace(0, 360, 100) 12 | d = np.linspace(-90, 90, 100) 13 | ra, dec = np.meshgrid(r, d) 14 | dtype = [('RA', float), ('DEC', float)] 15 | return np.rec.fromarrays([ra.flat, dec.flat], dtype=dtype) 16 | 17 | 18 | class TestApi(unittest.TestCase): 19 | 20 | con = ea.connect(quiet=True) 21 | tablename = 'testtable' 22 | nrows = 10000 23 | prefetch = 4000 24 | chunk = 1000 25 | memsize = 1 26 | sqlfile = 'temp.sql' 27 | csvfile = 'temp.csv' 28 | fitsfile = 'temp.fits' 29 | h5file = 'temp.h5' 30 | 31 | def test_ea_import(self): 32 | print('\n*** test_ea_import ***\n') 33 | test1 = self.con.ea_import('wrapped') 34 | if test1 is not None: 35 | self.assertTrue(test1) 36 | test2 = self.con.ea_import('wrapped', help=True) 37 | if test2 is not None: 38 | self.assertTrue(test2) 39 | 40 | def test_pandas_to_db(self): 41 | print('\n*** test_pandas_to_db ***\n') 42 | data = create_test_data() 43 | df = pd.DataFrame(data) 44 | self.assertEqual(len(df), self.nrows) 45 | try: 46 | self.con.drop_table(self.tablename) 47 | except: 48 | pass 49 | self.assertTrue(self.con.pandas_to_db(df, tablename=self.tablename)) 50 | cursor = self.con.cursor() 51 | self.assertTrue(self.con.ping()) 52 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 53 | fetch = temp.fetchall() 54 | self.assertEqual(len(fetch), self.nrows) 55 | # appending 56 | self.assertTrue(self.con.pandas_to_db(df, tablename=self.tablename, append=True)) 57 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 58 | fetch = temp.fetchall() 59 | self.assertEqual(len(fetch), self.nrows * 2) 60 | self.con.drop_table(self.tablename) 61 | self.assertTrue(self.con.pandas_to_db(df, tablename=self.tablename, append=True)) 62 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 63 | fetch = temp.fetchall() 64 | self.assertEqual(len(fetch), self.nrows) 65 | self.con.drop_table(self.tablename) 66 | cursor.close() 67 | 68 | def test_query_to_pandas(self): 69 | print('\n*** test_query_to_pandas ***\n') 70 | data = create_test_data() 71 | df = pd.DataFrame(data) 72 | self.assertEqual(len(df), self.nrows) 73 | try: 74 | self.con.drop_table(self.tablename) 75 | except: 76 | pass 77 | self.assertTrue(self.con.pandas_to_db(df, tablename=self.tablename)) 78 | query = 'select RA,DEC from {:}'.format(self.tablename.upper()) 79 | df2 = self.con.query_to_pandas(query) 80 | self.assertEqual(len(df), len(df2)) 81 | self.assertEqual(df.columns.values.tolist().sort(), df2.columns.values.tolist().sort()) 82 | # iterator 83 | df3 = self.con.query_to_pandas(query, prefetch=4000, iterator=True) 84 | self.assertEqual(len(df3.next()), 4000) 85 | self.assertEqual(df3.next().columns.values.tolist().sort(), 86 | df.columns.values.tolist().sort()) 87 | self.assertEqual(len(df3.next()), 2000) 88 | self.con.drop_table(self.tablename) 89 | 90 | def test_describe_table(self): 91 | print('\n*** test_describe_table ***\n') 92 | data = create_test_data() 93 | df = pd.DataFrame(data) 94 | self.assertEqual(len(df), self.nrows) 95 | try: 96 | self.con.drop_table(self.tablename) 97 | except: 98 | pass 99 | self.assertTrue(self.con.pandas_to_db(df, tablename=self.tablename)) 100 | self.assertEqual(len(self.con.describe_table(self.tablename)), 2) 101 | self.con.drop_table(self.tablename) 102 | 103 | def test_loadsql(self): 104 | print('\n*** test_loadsql ***\n') 105 | data = create_test_data() 106 | df = pd.DataFrame(data) 107 | self.assertEqual(len(df), self.nrows) 108 | try: 109 | self.con.drop_table(self.tablename) 110 | except: 111 | pass 112 | self.assertTrue(self.con.pandas_to_db(df, tablename=self.tablename)) 113 | query = """ 114 | -- This is a comment 115 | select RA, DEC from %s -- this is another comment 116 | """ % self.tablename 117 | with open(self.sqlfile, 'w') as F: 118 | F.write(query) 119 | df2 = self.con.query_to_pandas(self.con.loadsql(self.sqlfile)) 120 | self.assertEqual(len(df), len(df2)) 121 | self.assertEqual(df.columns.values.tolist().sort(), df2.columns.values.tolist().sort()) 122 | query = """ 123 | -- This is a comment 124 | select RA, DEC from %s ; -- this is another comment 125 | """ % self.tablename 126 | with open(self.sqlfile, 'w') as F: 127 | F.write(query) 128 | df2 = self.con.query_to_pandas(self.con.loadsql(self.sqlfile)) 129 | self.assertEqual(len(df), len(df2)) 130 | self.assertEqual(df.columns.values.tolist().sort(), df2.columns.values.tolist().sort()) 131 | self.con.drop_table(self.tablename) 132 | os.remove(self.sqlfile) 133 | 134 | @unittest.skip("need to change table name") 135 | def test_mytables(self): 136 | print('\n*** test_mytables ***\n') 137 | df = self.con.mytables() 138 | self.assertTrue('FGOTTENMETADATA' in df['TABLE_NAME'].values.tolist()) 139 | 140 | def test_load_table_csv(self): 141 | print('\n*** test_load_table_csv ***\n') 142 | data = create_test_data() 143 | df = pd.DataFrame(data) 144 | self.assertEqual(len(df), self.nrows) 145 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 146 | self.assertTrue(os.path.exists(self.csvfile)) 147 | self.con.drop_table(os.path.splitext(self.csvfile)[0].upper()) 148 | # name from filename 149 | self.assertTrue(self.con.load_table(self.csvfile)) 150 | cursor = self.con.cursor() 151 | temp = cursor.execute('select RA,DEC from %s' % os.path.splitext(self.csvfile)[0].upper()) 152 | fetch = temp.fetchall() 153 | self.assertEqual(len(fetch), self.nrows) 154 | # appending 155 | self.assertTrue(self.con.append_table(self.csvfile)) 156 | cursor = self.con.cursor() 157 | temp = cursor.execute('select RA,DEC from %s' % os.path.splitext(self.csvfile)[0].upper()) 158 | fetch = temp.fetchall() 159 | self.assertEqual(len(fetch), self.nrows * 2) 160 | self.con.drop_table(os.path.splitext(self.csvfile)[0].upper()) 161 | # name from tablename 162 | self.con.drop_table(self.tablename) 163 | self.assertTrue(self.con.load_table(self.csvfile, name=self.tablename)) 164 | cursor = self.con.cursor() 165 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 166 | fetch = temp.fetchall() 167 | self.assertEqual(len(fetch), self.nrows) 168 | # appending 169 | self.assertTrue(self.con.append_table(self.csvfile, name=self.tablename)) 170 | cursor = self.con.cursor() 171 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 172 | fetch = temp.fetchall() 173 | self.assertEqual(len(fetch), self.nrows * 2) 174 | # chunksize 175 | self.con.drop_table(self.tablename) 176 | self.assertTrue(self.con.load_table( 177 | self.csvfile, name=self.tablename, chunksize=self.chunk)) 178 | cursor = self.con.cursor() 179 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 180 | fetch = temp.fetchall() 181 | self.assertEqual(len(fetch), self.nrows) 182 | # appending 183 | self.assertTrue(self.con.append_table( 184 | self.csvfile, name=self.tablename, chunksize=self.chunk)) 185 | cursor = self.con.cursor() 186 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 187 | fetch = temp.fetchall() 188 | self.assertEqual(len(fetch), self.nrows * 2) 189 | self.con.drop_table(self.tablename) 190 | try: 191 | os.remove(self.csvfile) 192 | except: 193 | pass 194 | 195 | def test_load_append_table_memory_csv(self): 196 | print('\n*** test_load_append_table_memory_csv ***\n') 197 | data = create_test_data() 198 | df = pd.DataFrame(data) 199 | self.assertEqual(len(df), self.nrows) 200 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 201 | for i in range(9): 202 | df.to_csv(self.csvfile, index=False, float_format='%.8f', 203 | sep=',', mode='a', header=False) 204 | self.assertTrue(os.path.exists(self.csvfile)) 205 | # memsize 206 | self.con.drop_table(self.tablename) 207 | self.assertTrue(self.con.load_table( 208 | self.csvfile, name=self.tablename, memsize=self.memsize)) 209 | cursor = self.con.cursor() 210 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 211 | fetch = temp.fetchall() 212 | self.assertEqual(len(fetch), self.nrows * 10) 213 | # appending 214 | self.assertTrue(self.con.append_table( 215 | self.csvfile, name=self.tablename, memsize=self.memsize)) 216 | cursor = self.con.cursor() 217 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 218 | fetch = temp.fetchall() 219 | self.assertEqual(len(fetch), self.nrows * 20) 220 | # end 221 | try: 222 | os.remove(self.csvfile) 223 | except: 224 | pass 225 | self.con.drop_table(self.tablename) 226 | 227 | def test_load_append_table_memory_chunk_csv(self): 228 | print('\n*** test_load_append_table_memory_chunk_csv ***\n') 229 | data = create_test_data() 230 | df = pd.DataFrame(data) 231 | self.assertEqual(len(df), self.nrows) 232 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 233 | for i in range(9): 234 | df.to_csv(self.csvfile, index=False, float_format='%.8f', 235 | sep=',', mode='a', header=False) 236 | self.assertTrue(os.path.exists(self.csvfile)) 237 | # memsize 238 | self.con.drop_table(self.tablename) 239 | self.assertTrue(self.con.load_table(self.csvfile, name=self.tablename, 240 | memsize=self.memsize, chunksize=self.chunk * 10)) 241 | cursor = self.con.cursor() 242 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 243 | fetch = temp.fetchall() 244 | self.assertEqual(len(fetch), self.nrows * 10) 245 | # appending 246 | self.assertTrue(self.con.append_table(self.csvfile, name=self.tablename, 247 | memsize=self.memsize, chunksize=self.chunk * 200)) 248 | cursor = self.con.cursor() 249 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 250 | fetch = temp.fetchall() 251 | self.assertEqual(len(fetch), self.nrows * 20) 252 | # end 253 | os.remove(self.csvfile) 254 | self.con.drop_table(self.tablename) 255 | 256 | def test_load_table_fits(self): 257 | print('\n*** test_load_table_fits ***\n') 258 | data = create_test_data() 259 | fitsio.write(self.fitsfile, data, clobber=True) 260 | self.assertTrue(os.path.exists(self.fitsfile)) 261 | self.con.drop_table(os.path.splitext(self.fitsfile)[0].upper()) 262 | # name from filename 263 | self.assertTrue(self.con.load_table(self.fitsfile)) 264 | cursor = self.con.cursor() 265 | temp = cursor.execute('select RA,DEC from %s' % os.path.splitext(self.fitsfile)[0].upper()) 266 | fetch = temp.fetchall() 267 | self.assertEqual(len(fetch), self.nrows) 268 | # appending 269 | self.assertTrue(self.con.append_table(self.fitsfile)) 270 | cursor = self.con.cursor() 271 | temp = cursor.execute('select RA,DEC from %s' % os.path.splitext(self.fitsfile)[0].upper()) 272 | fetch = temp.fetchall() 273 | self.assertEqual(len(fetch), self.nrows * 2) 274 | self.con.drop_table(os.path.splitext(self.fitsfile)[0].upper()) 275 | # name from tablename 276 | self.con.drop_table(self.tablename) 277 | self.assertTrue(self.con.load_table(self.fitsfile, name=self.tablename)) 278 | cursor = self.con.cursor() 279 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 280 | fetch = temp.fetchall() 281 | self.assertEqual(len(fetch), self.nrows) 282 | # appending 283 | self.assertTrue(self.con.append_table(self.fitsfile, name=self.tablename)) 284 | cursor = self.con.cursor() 285 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 286 | fetch = temp.fetchall() 287 | self.assertEqual(len(fetch), self.nrows * 2) 288 | # chunksize 289 | self.con.drop_table(self.tablename) 290 | self.assertTrue(self.con.load_table( 291 | self.fitsfile, name=self.tablename, chunksize=self.chunk)) 292 | cursor = self.con.cursor() 293 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 294 | fetch = temp.fetchall() 295 | self.assertEqual(len(fetch), self.nrows) 296 | # appending 297 | self.assertTrue(self.con.append_table( 298 | self.fitsfile, name=self.tablename, chunksize=self.chunk)) 299 | cursor = self.con.cursor() 300 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 301 | fetch = temp.fetchall() 302 | self.assertEqual(len(fetch), self.nrows * 2) 303 | self.con.drop_table(self.tablename) 304 | os.remove(self.fitsfile) 305 | 306 | def test_load_append_table_memory_fits(self): 307 | print('\n*** test_load_append_table_memory_fits ***\n') 308 | data = create_test_data() 309 | for i in range(4): 310 | data = np.concatenate((data, data)) 311 | fitsio.write(self.fitsfile, data, clobber=True) 312 | self.assertTrue(os.path.exists(self.fitsfile)) 313 | # memsize 314 | self.con.drop_table(self.tablename) 315 | self.assertTrue(self.con.load_table( 316 | self.fitsfile, name=self.tablename, memsize=self.memsize)) 317 | cursor = self.con.cursor() 318 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 319 | fetch = temp.fetchall() 320 | self.assertEqual(len(fetch), self.nrows * 16) 321 | # appending 322 | self.assertTrue(self.con.append_table( 323 | self.fitsfile, name=self.tablename, memsize=self.memsize)) 324 | cursor = self.con.cursor() 325 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 326 | fetch = temp.fetchall() 327 | self.assertEqual(len(fetch), self.nrows * 2 * 16) 328 | # end 329 | os.remove(self.fitsfile) 330 | self.con.drop_table(self.tablename) 331 | 332 | def test_load_append_table_memory_chunk_fits(self): 333 | print('\n*** test_load_append_table_memory_chunk_fits ***\n') 334 | data = create_test_data() 335 | for i in range(4): 336 | data = np.concatenate((data, data)) 337 | fitsio.write(self.fitsfile, data, clobber=True) 338 | self.assertTrue(os.path.exists(self.fitsfile)) 339 | # memsize 340 | self.con.drop_table(self.tablename) 341 | self.assertTrue(self.con.load_table(self.fitsfile, name=self.tablename, 342 | memsize=self.memsize, chunksize=self.chunk * 10)) 343 | cursor = self.con.cursor() 344 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 345 | fetch = temp.fetchall() 346 | self.assertEqual(len(fetch), self.nrows * 16) 347 | # appending 348 | self.assertTrue(self.con.append_table(self.fitsfile, name=self.tablename, 349 | memsize=self.memsize, chunksize=self.chunk * 200)) 350 | cursor = self.con.cursor() 351 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 352 | fetch = temp.fetchall() 353 | self.assertEqual(len(fetch), self.nrows * 2 * 16) 354 | # end 355 | os.remove(self.fitsfile) 356 | self.con.drop_table(self.tablename) 357 | 358 | def test_load_table_hdf5(self): 359 | print('\n*** test_load_table_hdf5 ***\n') 360 | data = create_test_data() 361 | df = pd.DataFrame(data) 362 | self.assertEqual(len(df), self.nrows) 363 | df.to_hdf(self.h5file, key='data') 364 | self.assertTrue(os.path.exists(self.h5file)) 365 | self.con.drop_table(os.path.splitext(self.h5file)[0].upper()) 366 | # name from filename 367 | self.assertTrue(self.con.load_table(self.h5file)) 368 | cursor = self.con.cursor() 369 | temp = cursor.execute('select RA,DEC from %s' % os.path.splitext(self.h5file)[0].upper()) 370 | fetch = temp.fetchall() 371 | self.assertEqual(len(fetch), self.nrows) 372 | # appending 373 | self.assertTrue(self.con.append_table(self.h5file)) 374 | cursor = self.con.cursor() 375 | temp = cursor.execute('select RA,DEC from %s' % os.path.splitext(self.h5file)[0].upper()) 376 | fetch = temp.fetchall() 377 | self.assertEqual(len(fetch), self.nrows * 2) 378 | self.con.drop_table(os.path.splitext(self.h5file)[0].upper()) 379 | # name from tablename 380 | self.con.drop_table(self.tablename) 381 | self.assertTrue(self.con.load_table(self.h5file, name=self.tablename)) 382 | cursor = self.con.cursor() 383 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 384 | fetch = temp.fetchall() 385 | self.assertEqual(len(fetch), self.nrows) 386 | # appending 387 | self.assertTrue(self.con.append_table(self.h5file, name=self.tablename)) 388 | cursor = self.con.cursor() 389 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 390 | fetch = temp.fetchall() 391 | self.assertEqual(len(fetch), self.nrows * 2) 392 | self.con.drop_table(self.tablename) 393 | os.remove(self.h5file) 394 | 395 | def test_query_and_save(self): 396 | print('\n*** test_query_and_save ***\n') 397 | data = create_test_data() 398 | df = pd.DataFrame(data) 399 | self.assertEqual(len(df), self.nrows) 400 | cursor = self.con.cursor() 401 | try: 402 | self.con.drop_table(self.tablename) 403 | except: 404 | pass 405 | self.assertTrue(self.con.pandas_to_db(df, tablename=self.tablename)) 406 | query = 'select RA,DEC from %s' % self.tablename.upper() 407 | self.con.query_and_save(query, self.csvfile, print_time=False) 408 | self.assertTrue(os.path.exists(self.csvfile)) 409 | self.con.query_and_save(query, self.fitsfile, print_time=False) 410 | self.assertTrue(os.path.exists(self.fitsfile)) 411 | self.con.query_and_save(query, self.h5file, print_time=False) 412 | self.assertTrue(os.path.exists(self.h5file)) 413 | os.remove(self.csvfile) 414 | os.remove(self.fitsfile) 415 | os.remove(self.h5file) 416 | for i in range(34): 417 | self.assertTrue(self.con.pandas_to_db(df, tablename=self.tablename, append=True)) 418 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 419 | fetch = temp.fetchall() 420 | self.assertEqual(len(fetch), self.nrows * 35) 421 | self.con.outfile_max_mb = 1 422 | self.con.query_and_save(query, self.csvfile, print_time=False) 423 | for i in range(4): 424 | self.assertTrue(os.path.exists(os.path.splitext( 425 | self.csvfile)[0] + '_00000' + str(i + 1) + '.csv')) 426 | os.remove(os.path.splitext(self.csvfile)[0] + '_00000' + str(i + 1) + '.csv') 427 | self.con.query_and_save(query, self.fitsfile, print_time=False) 428 | for i in range(4): 429 | self.assertTrue(os.path.exists(os.path.splitext(self.fitsfile) 430 | [0] + '_00000' + str(i + 1) + '.fits')) 431 | os.remove(os.path.splitext(self.fitsfile)[0] + '_00000' + str(i + 1) + '.fits') 432 | 433 | self.con.outfile_max_mb = 1000 434 | self.con.drop_table(self.tablename) 435 | 436 | @unittest.skip("need to reevaluate") 437 | def test_inline_functions(self): 438 | print('\n*** test_inline_functions ***\n') 439 | data = create_test_data() 440 | df = pd.DataFrame(data) 441 | self.assertEqual(len(df), self.nrows) 442 | cursor = self.con.cursor() 443 | try: 444 | self.con.drop_table(self.tablename) 445 | except: 446 | pass 447 | self.assertTrue(self.con.pandas_to_db(df, tablename=self.tablename)) 448 | query = 'select /*p: Y.my_sum(ra,dec) as testcol*/ from %s' % self.tablename 449 | self.con.ea_import('wrapped as Y') 450 | df = self.con.query_to_pandas(query) 451 | self.assertEqual(len(df), self.nrows) 452 | self.assertTrue('TESTCOL' in df.columns.values.tolist()) 453 | self.con.drop_table(self.tablename) 454 | 455 | 456 | if __name__ == '__main__': 457 | unittest.main() 458 | -------------------------------------------------------------------------------- /tests/test_connection.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import easyaccess as ea 3 | 4 | 5 | class TestConnection(unittest.TestCase): 6 | 7 | def test_connect_dessci(self): 8 | con = ea.connect('dessci', quiet=True) 9 | self.assertTrue(con.ping()) 10 | 11 | def test_connect_desoper(self): 12 | con = ea.connect('desoper', quiet=True) 13 | self.assertTrue(con.ping()) 14 | 15 | def test_connect_desdr(self): 16 | con = ea.connect('desdr', quiet=True) 17 | self.assertTrue(con.ping()) 18 | 19 | @unittest.skip("remove destest from tests") 20 | def test_connect_destest(self): 21 | con = ea.connect('destest', quiet=True) 22 | self.assertTrue(con.ping()) 23 | 24 | # @unittest.skip("Not implemented yet") 25 | # def test_connect_memsql(self): 26 | # con = ea.connect('memsql') 27 | # self.assertTrue(con) 28 | 29 | 30 | if __name__ == '__main__': 31 | unittest.main() 32 | # suite = unittest.TestLoader().loadTestsFromTestCase(TestConnection) 33 | # unittest.TextTestRunner(verbosity=2).run(suite) 34 | -------------------------------------------------------------------------------- /tests/test_interpreter.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import unittest 3 | import easyaccess as ea 4 | import numpy as np 5 | import pandas as pd 6 | import os 7 | import fitsio 8 | 9 | 10 | def create_test_data(): 11 | r = np.linspace(0, 360, 100) 12 | d = np.linspace(-90, 90, 100) 13 | ra, dec = np.meshgrid(r, d) 14 | dtype = [('RA', float), ('DEC', float)] 15 | return np.rec.fromarrays([ra.flat, dec.flat], dtype=dtype) 16 | 17 | 18 | class TestInterpreter(unittest.TestCase): 19 | 20 | conf = ea.config_mod.get_config(ea.config_file) 21 | conf.set('display', 'loading_bar', 'no') 22 | db = conf.get('easyaccess', 'database') 23 | desconf = ea.config_mod.get_desconfig(ea.desfile, db) 24 | con = ea.easy_or(conf, desconf, db, interactive=False, quiet=True, refresh=False) 25 | con2 = ea.connect(quiet=True) 26 | tablename = 'testtable' 27 | nrows = 10000 28 | prefetch = 4000 29 | chunk = 1000 30 | memsize = 1 31 | sqlfile = 'temp.sql' 32 | csvfile = 'temp.csv' 33 | fitsfile = 'temp.fits' 34 | h5file = 'temp.h5' 35 | 36 | def test_describe(self): 37 | print('\n*** test_describe ***\n') 38 | data = create_test_data() 39 | df = pd.DataFrame(data) 40 | self.assertEqual(len(df), self.nrows) 41 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 42 | self.con.drop_table(self.tablename) 43 | command = "load_table %s --tablename %s" % (self.csvfile, self.tablename) 44 | self.con.onecmd(command) 45 | cursor = self.con2.cursor() 46 | command = 'describe_table %s;' % self.tablename.upper() 47 | self.con.onecmd(command) 48 | self.con.drop_table(self.tablename) 49 | os.remove(self.csvfile) 50 | 51 | def test_add_comment(self): 52 | print('\n*** test_add_comment ***\n') 53 | data = create_test_data() 54 | df = pd.DataFrame(data) 55 | self.assertEqual(len(df), self.nrows) 56 | self.con.drop_table(self.tablename) 57 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 58 | command = "load_table %s --tablename %s" % (self.csvfile, self.tablename) 59 | self.con.onecmd(command) 60 | command = "add_comment table %s 'Test table'" % self.tablename.upper() 61 | self.con.onecmd(command) 62 | command = "add_comment column %s.RA 'Coordinate'" % self.tablename.upper() 63 | self.con.onecmd(command) 64 | command = 'describe_table %s;' % self.tablename.upper() 65 | self.con.onecmd(command) 66 | self.con.drop_table(self.tablename) 67 | os.remove(self.csvfile) 68 | 69 | def test_select(self): 70 | print('\n*** test_select ***\n') 71 | data = create_test_data() 72 | df = pd.DataFrame(data) 73 | self.assertEqual(len(df), self.nrows) 74 | self.con.drop_table(self.tablename) 75 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 76 | command = "load_table %s --tablename %s" % (self.csvfile, self.tablename) 77 | self.con.onecmd(command) 78 | command = "select RA,DEC from %s ;" % self.tablename.upper() 79 | self.con.onecmd(command) 80 | self.con.drop_table(self.tablename) 81 | os.remove(self.csvfile) 82 | 83 | def test_select_csv(self): 84 | print('\n*** test_select_csv ***\n') 85 | data = create_test_data() 86 | df = pd.DataFrame(data) 87 | self.assertEqual(len(df), self.nrows) 88 | self.con.drop_table(self.tablename) 89 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 90 | command = "load_table %s --tablename %s" % (self.csvfile, self.tablename) 91 | self.con.onecmd(command) 92 | command = "select RA,DEC from %s ; > %s" % (self.tablename.upper(), self.csvfile) 93 | self.con.onecmd(command) 94 | self.assertTrue(os.path.exists(self.csvfile)) 95 | os.remove(self.csvfile) 96 | self.con.drop_table(self.tablename) 97 | 98 | def test_select_fits(self): 99 | print('\n*** test_select_fits ***\n') 100 | data = create_test_data() 101 | df = pd.DataFrame(data) 102 | self.assertEqual(len(df), self.nrows) 103 | self.con.drop_table(self.tablename) 104 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 105 | command = "load_table %s --tablename %s" % (self.csvfile, self.tablename) 106 | self.con.onecmd(command) 107 | os.remove(self.csvfile) 108 | command = "select RA,DEC from %s ; > %s" % (self.tablename.upper(), self.fitsfile) 109 | self.con.onecmd(command) 110 | self.assertTrue(os.path.exists(self.fitsfile)) 111 | os.remove(self.fitsfile) 112 | self.con.drop_table(self.tablename) 113 | 114 | def test_select_hdf5(self): 115 | print('\n*** test_select_hdf5 ***\n') 116 | data = create_test_data() 117 | df = pd.DataFrame(data) 118 | self.assertEqual(len(df), self.nrows) 119 | self.con.drop_table(self.tablename) 120 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 121 | command = "load_table %s --tablename %s" % (self.csvfile, self.tablename) 122 | self.con.onecmd(command) 123 | os.remove(self.csvfile) 124 | command = "select RA,DEC from %s ; > %s" % (self.tablename.upper(), self.h5file) 125 | self.con.onecmd(command) 126 | self.assertTrue(os.path.exists(self.h5file)) 127 | os.remove(self.h5file) 128 | self.con.drop_table(self.tablename) 129 | 130 | def test_select_by_chunks(self): 131 | print('\n*** test_select_by_chunks ***\n') 132 | global load_bar 133 | load_bar = False 134 | data = create_test_data() 135 | df = pd.DataFrame(data) 136 | self.assertEqual(len(df), self.nrows) 137 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 138 | self.assertTrue(os.path.exists(self.csvfile)) 139 | self.con.drop_table(self.tablename) 140 | command = "load_table %s --tablename %s" % (self.csvfile, self.tablename) 141 | self.con.onecmd(command) 142 | cursor = self.con2.cursor() 143 | for i in range(34): 144 | command = "append_table %s --tablename %s" % (self.csvfile, self.tablename) 145 | self.con.onecmd(command) 146 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 147 | fetch = temp.fetchall() 148 | self.assertEqual(len(fetch), self.nrows * 35) 149 | command = "prefetch set 30000" 150 | self.con.onecmd(command) 151 | self.con.outfile_max_mb = 1 152 | command = "select RA,DEC from %s ; > %s" % (self.tablename.upper(), self.csvfile) 153 | self.con.onecmd(command) 154 | for i in range(6): 155 | self.assertTrue(os.path.exists(os.path.splitext( 156 | self.csvfile)[0] + '_00000' + str(i + 1) + '.csv')) 157 | os.remove(os.path.splitext(self.csvfile)[0] + '_00000' + str(i + 1) + '.csv') 158 | self.con.outfile_max_mb = 1000 159 | self.con.drop_table(self.tablename) 160 | if os.path.exists(self.csvfile): 161 | os.remove(self.csvfile) 162 | 163 | def test_load_append_table_csv(self): 164 | print('\n*** test_load_append_table_csv ***\n') 165 | data = create_test_data() 166 | df = pd.DataFrame(data) 167 | self.assertEqual(len(df), self.nrows) 168 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 169 | self.assertTrue(os.path.exists(self.csvfile)) 170 | self.con.drop_table(os.path.splitext(self.csvfile)[0].upper()) 171 | # name from filename 172 | command = "load_table %s " % self.csvfile 173 | self.con.onecmd(command) 174 | cursor = self.con2.cursor() 175 | temp = cursor.execute('select RA,DEC from %s' % os.path.splitext(self.csvfile)[0].upper()) 176 | fetch = temp.fetchall() 177 | self.assertEqual(len(fetch), self.nrows) 178 | 179 | # appending 180 | command = "append_table %s " % self.csvfile 181 | self.con.onecmd(command) 182 | cursor = self.con2.cursor() 183 | temp = cursor.execute('select RA,DEC from %s' % os.path.splitext(self.csvfile)[0].upper()) 184 | fetch = temp.fetchall() 185 | self.assertEqual(len(fetch), self.nrows * 2) 186 | self.con.drop_table(os.path.splitext(self.csvfile)[0].upper()) 187 | os.remove(self.csvfile) 188 | 189 | def test_load_append_table_name_csv(self): 190 | print('\n*** test_load_append_table_name_csv ***\n') 191 | data = create_test_data() 192 | df = pd.DataFrame(data) 193 | self.assertEqual(len(df), self.nrows) 194 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 195 | self.assertTrue(os.path.exists(self.csvfile)) 196 | # name from tablename 197 | self.con.drop_table(self.tablename) 198 | command = "load_table %s --tablename %s" % (self.csvfile, self.tablename) 199 | self.con.onecmd(command) 200 | cursor = self.con2.cursor() 201 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 202 | fetch = temp.fetchall() 203 | self.assertEqual(len(fetch), self.nrows) 204 | # appending 205 | command = "append_table %s --tablename %s" % (self.csvfile, self.tablename) 206 | self.con.onecmd(command) 207 | cursor = self.con2.cursor() 208 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 209 | fetch = temp.fetchall() 210 | self.assertEqual(len(fetch), self.nrows * 2) 211 | self.con.drop_table(self.tablename) 212 | os.remove(self.csvfile) 213 | 214 | def test_load_append_table_chunk_csv(self): 215 | print('\n*** test_load_append_table_chunk_csv ***\n') 216 | data = create_test_data() 217 | df = pd.DataFrame(data) 218 | self.assertEqual(len(df), self.nrows) 219 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 220 | self.assertTrue(os.path.exists(self.csvfile)) 221 | # chunksize 222 | self.con.drop_table(self.tablename) 223 | command = "load_table %s --tablename %s --chunksize %s" % ( 224 | self.csvfile, self.tablename, self.chunk) 225 | self.con.onecmd(command) 226 | cursor = self.con2.cursor() 227 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 228 | fetch = temp.fetchall() 229 | self.assertEqual(len(fetch), self.nrows) 230 | # appending 231 | command = "append_table %s --tablename %s --chunksize %s" % ( 232 | self.csvfile, self.tablename, self.chunk) 233 | self.con.onecmd(command) 234 | cursor = self.con2.cursor() 235 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 236 | fetch = temp.fetchall() 237 | self.assertEqual(len(fetch), self.nrows * 2) 238 | 239 | def test_load_append_table_memory_csv(self): 240 | print('\n*** test_load_append_table_memory_csv ***\n') 241 | data = create_test_data() 242 | df = pd.DataFrame(data) 243 | self.assertEqual(len(df), self.nrows) 244 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 245 | for i in range(9): 246 | df.to_csv(self.csvfile, index=False, float_format='%.8f', 247 | sep=',', mode='a', header=False) 248 | self.assertTrue(os.path.exists(self.csvfile)) 249 | # memsize 250 | self.con.drop_table(self.tablename) 251 | command = "load_table %s --tablename %s --memsize %s" % ( 252 | self.csvfile, self.tablename, self.memsize) 253 | self.con.onecmd(command) 254 | cursor = self.con2.cursor() 255 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 256 | fetch = temp.fetchall() 257 | self.assertEqual(len(fetch), self.nrows * 10) 258 | # appending 259 | command = "append_table %s --tablename %s --memsize %s" % ( 260 | self.csvfile, self.tablename, self.memsize) 261 | self.con.onecmd(command) 262 | cursor = self.con2.cursor() 263 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 264 | fetch = temp.fetchall() 265 | self.assertEqual(len(fetch), self.nrows * 20) 266 | # end 267 | os.remove(self.csvfile) 268 | self.con.drop_table(self.tablename) 269 | 270 | def test_load_append_table_memory_chunk_csv(self): 271 | print('\n*** test_load_append_table_memory_chunk_csv ***\n') 272 | data = create_test_data() 273 | df = pd.DataFrame(data) 274 | self.assertEqual(len(df), self.nrows) 275 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 276 | for i in range(9): 277 | df.to_csv(self.csvfile, index=False, float_format='%.8f', 278 | sep=',', mode='a', header=False) 279 | self.assertTrue(os.path.exists(self.csvfile)) 280 | # memsize 281 | self.con.drop_table(self.tablename) 282 | command = "load_table %s --tablename %s --memsize %s --chunksize %s" % ( 283 | self.csvfile, self.tablename, self.memsize, self.chunk * 10) 284 | self.con.onecmd(command) 285 | cursor = self.con2.cursor() 286 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 287 | fetch = temp.fetchall() 288 | self.assertEqual(len(fetch), self.nrows * 10) 289 | # appending 290 | command = "append_table %s --tablename %s --memsize %s --chunksize %s" % ( 291 | self.csvfile, self.tablename, self.memsize, self.chunk * 200) 292 | self.con.onecmd(command) 293 | cursor = self.con2.cursor() 294 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 295 | fetch = temp.fetchall() 296 | self.assertEqual(len(fetch), self.nrows * 20) 297 | # end 298 | os.remove(self.csvfile) 299 | self.con.drop_table(self.tablename) 300 | 301 | def test_load_append_table_fits(self): 302 | print('\n*** test_load_append_table_fits ***\n') 303 | data = create_test_data() 304 | fitsio.write(self.fitsfile, data, clobber=True) 305 | self.assertTrue(os.path.exists(self.fitsfile)) 306 | self.con.drop_table(os.path.splitext(self.fitsfile)[0].upper()) 307 | # name from filename 308 | command = "load_table %s " % self.fitsfile 309 | self.con.onecmd(command) 310 | cursor = self.con2.cursor() 311 | temp = cursor.execute('select RA,DEC from %s' % os.path.splitext(self.fitsfile)[0].upper()) 312 | fetch = temp.fetchall() 313 | self.assertEqual(len(fetch), self.nrows) 314 | 315 | # appending 316 | command = "append_table %s " % self.fitsfile 317 | self.con.onecmd(command) 318 | cursor = self.con2.cursor() 319 | temp = cursor.execute('select RA,DEC from %s' % os.path.splitext(self.fitsfile)[0].upper()) 320 | fetch = temp.fetchall() 321 | self.assertEqual(len(fetch), self.nrows * 2) 322 | self.con.drop_table(os.path.splitext(self.fitsfile)[0].upper()) 323 | os.remove(self.fitsfile) 324 | 325 | def test_load_append_table_name_fits(self): 326 | print('\n*** test_load_append_table_name_fits ***\n') 327 | data = create_test_data() 328 | fitsio.write(self.fitsfile, data, clobber=True) 329 | self.assertTrue(os.path.exists(self.fitsfile)) 330 | # name from tablename 331 | self.con.drop_table(self.tablename) 332 | command = "load_table %s --tablename %s" % (self.fitsfile, self.tablename) 333 | self.con.onecmd(command) 334 | cursor = self.con2.cursor() 335 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 336 | fetch = temp.fetchall() 337 | self.assertEqual(len(fetch), self.nrows) 338 | # appending 339 | command = "append_table %s --tablename %s" % (self.fitsfile, self.tablename) 340 | self.con.onecmd(command) 341 | cursor = self.con2.cursor() 342 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 343 | fetch = temp.fetchall() 344 | self.assertEqual(len(fetch), self.nrows * 2) 345 | os.remove(self.fitsfile) 346 | self.con.drop_table(self.tablename) 347 | 348 | def test_load_append_table_chunk_fits(self): 349 | print('\n*** test_load_append_table_chunk_fits ***\n') 350 | data = create_test_data() 351 | fitsio.write(self.fitsfile, data, clobber=True) 352 | self.assertTrue(os.path.exists(self.fitsfile)) 353 | # chunksize 354 | self.con.drop_table(self.tablename) 355 | command = "load_table %s --tablename %s --chunksize %s" % ( 356 | self.fitsfile, self.tablename, self.chunk) 357 | self.con.onecmd(command) 358 | cursor = self.con2.cursor() 359 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 360 | fetch = temp.fetchall() 361 | self.assertEqual(len(fetch), self.nrows) 362 | # appending 363 | command = "append_table %s --tablename %s --chunksize %s" % ( 364 | self.fitsfile, self.tablename, self.chunk) 365 | self.con.onecmd(command) 366 | cursor = self.con2.cursor() 367 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 368 | fetch = temp.fetchall() 369 | self.assertEqual(len(fetch), self.nrows * 2) 370 | os.remove(self.fitsfile) 371 | self.con.drop_table(self.tablename) 372 | 373 | def test_load_append_table_memory_fits(self): 374 | print('\n*** test_load_append_table_memory_fits ***\n') 375 | data = create_test_data() 376 | for i in range(4): 377 | data = np.concatenate((data, data)) 378 | fitsio.write(self.fitsfile, data, clobber=True) 379 | self.assertTrue(os.path.exists(self.fitsfile)) 380 | # memsize 381 | self.con.drop_table(self.tablename) 382 | command = "load_table %s --tablename %s --memsize %s" % ( 383 | self.fitsfile, self.tablename, self.memsize) 384 | self.con.onecmd(command) 385 | cursor = self.con2.cursor() 386 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 387 | fetch = temp.fetchall() 388 | self.assertEqual(len(fetch), self.nrows * 16) 389 | # appending 390 | command = "append_table %s --tablename %s --memsize %s" % ( 391 | self.fitsfile, self.tablename, self.memsize) 392 | self.con.onecmd(command) 393 | cursor = self.con2.cursor() 394 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 395 | fetch = temp.fetchall() 396 | self.assertEqual(len(fetch), self.nrows * 2 * 16) 397 | # end 398 | os.remove(self.fitsfile) 399 | self.con.drop_table(self.tablename) 400 | 401 | def test_load_append_table_memory_chunk_fits(self): 402 | print('\n*** test_load_append_table_memory_chunk_fits ***\n') 403 | data = create_test_data() 404 | for i in range(4): 405 | data = np.concatenate((data, data)) 406 | fitsio.write(self.fitsfile, data, clobber=True) 407 | self.assertTrue(os.path.exists(self.fitsfile)) 408 | # memsize 409 | self.con.drop_table(self.tablename) 410 | command = "load_table %s --tablename %s --memsize %s --chunksize %s" % ( 411 | self.fitsfile, self.tablename, self.memsize, self.chunk * 10) 412 | self.con.onecmd(command) 413 | cursor = self.con2.cursor() 414 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 415 | fetch = temp.fetchall() 416 | self.assertEqual(len(fetch), self.nrows * 16) 417 | # appending 418 | command = "append_table %s --tablename %s --memsize %s --chunksize %s" % ( 419 | self.fitsfile, self.tablename, self.memsize, self.chunk * 200) 420 | self.con.onecmd(command) 421 | cursor = self.con2.cursor() 422 | temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) 423 | fetch = temp.fetchall() 424 | self.assertEqual(len(fetch), self.nrows * 2 * 16) 425 | # end 426 | os.remove(self.fitsfile) 427 | self.con.drop_table(self.tablename) 428 | 429 | def test_loadsql(self): 430 | print('\n*** test_loadsql ***\n') 431 | data = create_test_data() 432 | df = pd.DataFrame(data) 433 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 434 | self.assertTrue(os.path.exists(self.csvfile)) 435 | self.con.drop_table(self.tablename) 436 | command = "load_table %s --tablename %s" % (self.csvfile, self.tablename) 437 | self.con.onecmd(command) 438 | os.remove(self.csvfile) 439 | query = """ 440 | -- This is a comment 441 | select RA, DEC from %s -- this is another comment 442 | ; > %s 443 | """ % (self.tablename, self.csvfile) 444 | with open(self.sqlfile, 'w') as F: 445 | F.write(query) 446 | 447 | command = "loadsql %s" % (self.sqlfile) 448 | self.con.onecmd(command) 449 | self.assertTrue(os.path.exists(self.csvfile)) 450 | df = pd.read_csv(self.csvfile, sep=',') 451 | self.assertEqual(len(df), self.nrows) 452 | os.remove(self.csvfile) 453 | self.assertFalse(os.path.exists(self.csvfile)) 454 | os.remove(self.sqlfile) 455 | 456 | @unittest.skip("Need to re evaluate") 457 | def test_inline(self): 458 | print('\n*** test_inline ***\n') 459 | data = create_test_data() 460 | df = pd.DataFrame(data) 461 | df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') 462 | self.assertTrue(os.path.exists(self.csvfile)) 463 | self.con.drop_table(self.tablename) 464 | command = "load_table %s --tablename %s" % (self.csvfile, self.tablename) 465 | self.con.onecmd(command) 466 | command = "import wrapped as Y" 467 | self.con.onecmd(command) 468 | command = "select /*p: Y.my_sum(ra,dec) as testcol */, dec from %s ; > %s" % ( 469 | self.tablename, self.csvfile) 470 | self.con.onecmd(command) 471 | self.assertTrue(os.path.exists(self.csvfile)) 472 | df = pd.read_csv(self.csvfile, sep=',') 473 | self.assertEqual(len(df), self.nrows) 474 | self.assertTrue('TESTCOL' in df.columns.values.tolist()) 475 | os.remove(self.csvfile) 476 | self.assertFalse(os.path.exists(self.csvfile)) 477 | self.con.drop_table(self.tablename) 478 | 479 | 480 | if __name__ == '__main__': 481 | unittest.main() 482 | -------------------------------------------------------------------------------- /tests/test_load_table.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script for testing table upload. 4 | """ 5 | __author__ = "Alex Drlica-Wagner" 6 | import os 7 | from os.path import splitext 8 | from glob import glob 9 | import subprocess as sub 10 | 11 | import numpy as np 12 | import pandas as pd 13 | import fitsio 14 | import easyaccess as ea 15 | 16 | BASENAME = 'load_table_test' 17 | BASENAME2 = BASENAME + '2' 18 | BASENAME3 = BASENAME + '3' 19 | 20 | 21 | def create_test_data(): 22 | r = np.linspace(0, 360, 180) 23 | d = np.linspace(-90, 90, 90) 24 | ra, dec = np.meshgrid(r, d) 25 | dtype = [('ra', float), ('dec', float)] 26 | return np.rec.fromarrays([ra.flat, dec.flat], dtype=dtype) 27 | 28 | 29 | def create_test_fits(filename=None, data=None): 30 | if filename is None: 31 | filename = BASENAME + '.fits' 32 | if data is None: 33 | data = create_test_data() 34 | fitsio.write(filename, data) 35 | return filename 36 | 37 | 38 | def create_test_csv(filename=None, data=None): 39 | if filename is None: 40 | filename = BASENAME + '.csv' 41 | if data is None: 42 | data = create_test_data() 43 | df = pd.DataFrame(data) 44 | df.to_csv(filename, index=False, float_format='%.8f', sep=',') 45 | return filename 46 | 47 | 48 | def create_test_tab(filename=None, data=None): 49 | if filename is None: 50 | filename = BASENAME + '.tab' 51 | if data is None: 52 | data = create_test_data() 53 | df = pd.DataFrame(data) 54 | df.to_csv(filename, index=False, float_format='%.8f', sep='\t') 55 | return filename 56 | 57 | 58 | if __name__ == "__main__": 59 | import argparse 60 | description = __doc__ 61 | parser = argparse.ArgumentParser(description=description) 62 | args = parser.parse_args() 63 | 64 | data = create_test_data() 65 | nrows = len(data) 66 | 67 | # Create the data files 68 | fitsfile = create_test_fits(data=data) 69 | csvfile = create_test_csv(data=data) 70 | tabfile = create_test_tab(data=data) 71 | 72 | filenames = [fitsfile, csvfile, tabfile] 73 | 74 | # Try loading through the python interface 75 | # NOTE: This requires a desservice.ini file 76 | conn = ea.connect() 77 | 78 | query = 'select * from %s' % BASENAME 79 | 80 | # Complains when the table doesn't exist, we could add: 81 | # if conn.check_table_exists(BASENAME): conn.drop_table(BASENAME) 82 | 83 | for filename in filenames: 84 | # First try loading through python interface 85 | print("*** TESTING PYTHON INTERFACE ***") 86 | conn.drop_table(BASENAME) 87 | conn.load_table(filename) 88 | df = conn.query_to_pandas(query) 89 | assert len(df) == nrows 90 | 91 | # Then try loading with explicit tablename 92 | print("*** TESTING PYTHON INTERFACE ***") 93 | conn.drop_table(BASENAME) 94 | conn.load_table(filename, BASENAME) 95 | df = conn.query_to_pandas(query) 96 | assert len(df) == nrows 97 | 98 | # Then try loading through interactive interface 99 | print("*** TESTING INTERACTIVE INTERFACE ***") 100 | conn.drop_table(BASENAME) 101 | cmd = 'load_table %s' % filename 102 | conn.onecmd(cmd) 103 | df = conn.query_to_pandas(query) 104 | assert len(df) == nrows 105 | 106 | # Then try from the command line 107 | print("*** TESTING COMMAND LINE INTERFACE ***") 108 | conn.drop_table(BASENAME) 109 | cmd = 'easyaccess --load_table %s' % filename 110 | print cmd 111 | sub.check_call(cmd, shell=True) 112 | df = conn.query_to_pandas(query) 113 | assert len(df) == nrows 114 | 115 | # Now try downloading a re-uploading 116 | print("*** TESTING RE-UPLOAD ***") 117 | filename2 = BASENAME2 + splitext(filename)[-1] 118 | conn.query_and_save(query, filename2) 119 | conn.drop_table(BASENAME2) 120 | conn.load_table(filename2) 121 | query2 = 'select * from %s' % (BASENAME2) 122 | df2 = conn.query_to_pandas(query2) 123 | assert len(df2) == len(data) 124 | 125 | # Now try grabbing from existing table 126 | nrows = 100000 127 | query = 'select RA,DEC from Y1A1_COADD_OBJECTS@DESSCI where rownum <= %s;' % nrows 128 | for ext in ('.fits', '.csv', '.tab'): 129 | print ("*** DOWNLOADING EXISTING TABLE ***") 130 | basename = BASENAME + '3' 131 | filename = basename + ext 132 | conn.query_and_save(query, filename) 133 | 134 | # Test through python interface 135 | print("*** TESTING PYTHON INTERFACE ***") 136 | conn.drop_table(basename) 137 | conn.load_table(filename) 138 | df = conn.query_to_pandas('select * from %s' % basename) 139 | assert len(df) == nrows 140 | 141 | # Then try loading through interactive interface 142 | print("*** TESTING INTERACTIVE INTERFACE ***") 143 | conn.drop_table(basename) 144 | cmd = 'load_table %s' % filename 145 | conn.onecmd(cmd) 146 | df = conn.query_to_pandas('select * from %s' % basename) 147 | assert len(df) == nrows 148 | 149 | # Then try from the command line 150 | print("*** TESTING COMMAND LINE INTERFACE ***") 151 | conn.drop_table(basename) 152 | cmd = 'easyaccess --load_table %s' % filename 153 | print cmd 154 | sub.check_call(cmd, shell=True) 155 | df = conn.query_to_pandas('select * from %s' % basename) 156 | assert len(df) == nrows 157 | 158 | # Clean up 159 | for table in [BASENAME, BASENAME2, BASENAME3]: 160 | print("*** DROPPING TABLE %s ***" % table) 161 | conn.drop_table(table) 162 | filenames = glob(BASENAME + '*.csv') + glob(BASENAME + '*.fits') + glob(BASENAME + '*.tab') 163 | for filename in filenames: 164 | print("*** REMOVING FILE %s ***" % filename) 165 | os.remove(filename) 166 | 167 | print('\n' + "*** TESTS COMPLETED SUCCESSFULLY ***") 168 | -------------------------------------------------------------------------------- /tests/wrapped.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import inspect 4 | import re 5 | from functools import wraps 6 | from easyaccess.eautils.fun_utils import toeasyaccess 7 | 8 | 9 | @toeasyaccess 10 | def my_func(a, b): 11 | """ 12 | Sum two colums, if max_values is defined the values are clipped 13 | to that value 14 | """ 15 | return (0. + a) * b 16 | 17 | 18 | @toeasyaccess 19 | def my_sum(a, b, min_value=None, max_value=None): 20 | """ 21 | Sum two colums, if max_values is defined the values are clipped 22 | to that value 23 | """ 24 | c = abs(a) + abs(b) 25 | if min_value is None: 26 | min_value = np.min(c) 27 | if max_value is None: 28 | max_value = np.max(c) 29 | return np.clip(c, float(min_value), float(max_value)) 30 | --------------------------------------------------------------------------------