├── .coveragerc ├── .github ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ └── feature_request.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── codacy-analysis.yml │ ├── codeql-analysis.yml │ ├── publish-release.yml │ └── test-changes.yml ├── .gitignore ├── .readthedocs.yaml ├── .vscode ├── extensions.json ├── launch.json ├── settings.json └── tasks.json ├── CODE_OF_CONDUCT.md ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── README.txt ├── bin └── petl ├── docker-compose.yml ├── docs ├── Makefile ├── acknowledgments.rst ├── changes.rst ├── conf.py ├── config.rst ├── contributing.rst ├── index.rst ├── install.rst ├── intro.rst ├── io.rst ├── make.bat ├── petl-architecture.png ├── related_work.rst ├── transform.rst └── util.rst ├── examples ├── comparison.py ├── intro.py ├── io │ ├── csv.py │ ├── html.py │ ├── json.py │ ├── numpy.py │ ├── pandas.py │ ├── pickle.py │ ├── pytables.py │ ├── sqlite3.py │ ├── text.py │ ├── whoosh.py │ └── xml.py ├── notes │ ├── .gitignore │ ├── 20140424_example.ipynb │ ├── 20140424_example.py │ ├── 20141022_example.ipynb │ ├── 20141110_example.ipynb │ ├── 20150319 resolve conflicts.ipynb │ ├── 20150331 split null.ipynb │ ├── case_study_1.ipynb │ ├── issue_219.ipynb │ ├── issue_219.py │ ├── issue_256.ipynb │ └── issue_256.py ├── transform │ ├── basics.py │ ├── conversions.py │ ├── dedup.py │ ├── fills.py │ ├── headers.py │ ├── intervals.py │ ├── joins.py │ ├── maps.py │ ├── reductions.py │ ├── regex.py │ ├── reshape.py │ ├── selects.py │ ├── setops.py │ ├── sorts.py │ ├── unpacks.py │ └── validation.py └── util │ ├── base.py │ ├── counting.py │ ├── lookups.py │ ├── materialise.py │ ├── misc.py │ ├── parsers.py │ ├── random.py │ ├── statistics.py │ ├── timing.py │ └── vis.py ├── petl ├── __init__.py ├── comparison.py ├── compat.py ├── config.py ├── errors.py ├── io │ ├── __init__.py │ ├── avro.py │ ├── base.py │ ├── bcolz.py │ ├── csv.py │ ├── csv_py2.py │ ├── csv_py3.py │ ├── db.py │ ├── db_create.py │ ├── db_utils.py │ ├── gsheet.py │ ├── html.py │ ├── json.py │ ├── numpy.py │ ├── pandas.py │ ├── pickle.py │ ├── pytables.py │ ├── remotes.py │ ├── sources.py │ ├── text.py │ ├── whoosh.py │ ├── xls.py │ ├── xlsx.py │ ├── xlutils_view.py │ └── xml.py ├── test │ ├── __init__.py │ ├── conftest.py │ ├── failonerror.py │ ├── helpers.py │ ├── io │ │ ├── __init__.py │ │ ├── test_avro.py │ │ ├── test_avro_schemas.py │ │ ├── test_bcolz.py │ │ ├── test_csv.py │ │ ├── test_csv_unicode.py │ │ ├── test_db.py │ │ ├── test_db_create.py │ │ ├── test_db_server.py │ │ ├── test_gsheet.py │ │ ├── test_html.py │ │ ├── test_html_unicode.py │ │ ├── test_json.py │ │ ├── test_json_unicode.py │ │ ├── test_jsonl.py │ │ ├── test_numpy.py │ │ ├── test_pandas.py │ │ ├── test_pickle.py │ │ ├── test_pytables.py │ │ ├── test_remotes.py │ │ ├── test_sources.py │ │ ├── test_sqlite3.py │ │ ├── test_tees.py │ │ ├── test_text.py │ │ ├── test_text_unicode.py │ │ ├── test_whoosh.py │ │ ├── test_xls.py │ │ ├── test_xlsx.py │ │ └── test_xml.py │ ├── resources │ │ ├── test.xls │ │ ├── test.xlsx │ │ └── test.xml │ ├── test_comparison.py │ ├── test_fluent.py │ ├── test_helpers.py │ ├── test_interactive.py │ ├── transform │ │ ├── __init__.py │ │ ├── test_basics.py │ │ ├── test_conversions.py │ │ ├── test_dedup.py │ │ ├── test_fills.py │ │ ├── test_headers.py │ │ ├── test_intervals.py │ │ ├── test_joins.py │ │ ├── test_maps.py │ │ ├── test_reductions.py │ │ ├── test_regex.py │ │ ├── test_reshape.py │ │ ├── test_selects.py │ │ ├── test_setops.py │ │ ├── test_sorts.py │ │ ├── test_unpacks.py │ │ └── test_validation.py │ └── util │ │ ├── __init__.py │ │ ├── test_base.py │ │ ├── test_counting.py │ │ ├── test_lookups.py │ │ ├── test_materialise.py │ │ ├── test_misc.py │ │ ├── test_parsers.py │ │ ├── test_random.py │ │ ├── test_statistics.py │ │ ├── test_timing.py │ │ └── test_vis.py ├── transform │ ├── __init__.py │ ├── basics.py │ ├── conversions.py │ ├── dedup.py │ ├── fills.py │ ├── hashjoins.py │ ├── headers.py │ ├── intervals.py │ ├── joins.py │ ├── maps.py │ ├── reductions.py │ ├── regex.py │ ├── reshape.py │ ├── selects.py │ ├── setops.py │ ├── sorts.py │ ├── unpacks.py │ └── validation.py └── util │ ├── __init__.py │ ├── base.py │ ├── counting.py │ ├── lookups.py │ ├── materialise.py │ ├── misc.py │ ├── parsers.py │ ├── random.py │ ├── statistics.py │ ├── timing.py │ └── vis.py ├── pyproject.toml ├── pytest.ini ├── repr_html.ipynb ├── requirements-database.txt ├── requirements-docs.txt ├── requirements-formats.txt ├── requirements-linting.txt ├── requirements-optional.txt ├── requirements-remote.txt ├── requirements-tests.txt ├── setup.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | exclude_lines = 3 | pragma: no cover 4 | pragma: ${PY_MAJOR_VERSION} no cover 5 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | 4 | Please see the [project documentation](http://petl.readthedocs.io/en/stable/contributing.html) for information about contributing to petl. 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Suggest an idea for this project 3 | title: "Feature Request: " 4 | labels: ["Feature"] 5 | # projects: ["petl-developers/petl"] 6 | # assignees: 7 | # - juarezr 8 | body: 9 | - type: markdown 10 | attributes: 11 | value: | 12 | ## Feature request 13 | 14 | Thanks for taking the time to fill out this feature request! 15 | - type: textarea 16 | id: request-objective 17 | attributes: 18 | label: Explain why petl needs this feature? 19 | description: | 20 | Please explain: 21 | - What would you want to achieve with this request? 22 | - Is your feature request related to a problem or shortcoming? 23 | - Why the current behavior is a problem? 24 | placeholder: Please, tell us a clear and concise description of the feature. 25 | validations: 26 | required: true 27 | - type: markdown 28 | attributes: 29 | value: | 30 | ## Desired Solution 31 | - type: textarea 32 | id: desired-solution 33 | attributes: 34 | label: Describe the solution you would like 35 | description: | 36 | Please explain: 37 | - How you expect the feature would work. 38 | - What is the expected output/behavior for this feature. 39 | - What you think that shouldn't be done. 40 | placeholder: Please, tell us what did you expect to happen, what's the intended behavior. 41 | validations: 42 | required: true 43 | - type: textarea 44 | id: solution-alternatives 45 | attributes: 46 | label: Describe alternatives solutions you would have considered 47 | description: | 48 | Please explain: 49 | - If there are any any alternative solutions tha might work. 50 | - If there is any workaround for the problem. 51 | - Why this alternatives aren't satisfactory? 52 | validations: 53 | required: false 54 | - type: markdown 55 | attributes: 56 | value: | 57 | ## Suggestions 58 | - type: textarea 59 | id: source-code-example 60 | attributes: 61 | label: Source Code Examples 62 | description: | 63 | Whenever relevant, please provide a code sample, of what would be the syntax, the way you meant to use. 64 | This will be automatically formatted into code, so no need for backticks. 65 | render: python 66 | validations: 67 | required: false 68 | - type: markdown 69 | attributes: 70 | value: | 71 | ## Additional context 72 | - type: textarea 73 | id: other-notes 74 | attributes: 75 | label: Additional Notes 76 | description: Anything not covered or N/A 77 | placeholder: n/a 78 | validations: 79 | required: false 80 | - type: checkboxes 81 | id: agree-to-code-of-conduct 82 | attributes: 83 | label: Code of Conduct 84 | description: By submitting this issue, you agree to follow the project [Code of Conduct](https://github.com/petl-developers/petl/blob/master/CODE_OF_CONDUCT.md). 85 | options: 86 | - label: I agree to follow this project's Code of Conduct 87 | required: true 88 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | This PR has the objective of . 3 | 4 | ## Changes 5 | 6 | 1. Added new feature for... 7 | 2. Fixed a bug in... 8 | 3. Changed the behavior of... 9 | 4. Improved the docs about... 10 | 11 | ## Checklist 12 | 13 | Use this checklist to ensure the quality of pull requests that include new code and/or make changes to existing code. 14 | 15 | * [ ] Source Code guidelines: 16 | * [ ] Includes unit tests 17 | * [ ] New functions have docstrings with examples that can be run with doctest 18 | * [ ] New functions are included in API docs 19 | * [ ] Docstrings include notes for any changes to API or behavior 20 | * [ ] All changes are documented in docs/changes.rst 21 | * [ ] Versioning and history tracking guidelines: 22 | * [ ] Using atomic commits whenever possible 23 | * [ ] Commits are reversible whenever possible 24 | * [ ] There are no incomplete changes in the pull request 25 | * [ ] There is no accidental garbage added to the source code 26 | * [ ] Testing guidelines: 27 | * [ ] Tested locally using `tox` / `pytest` 28 | * [ ] Rebased to `master` branch and tested before sending the PR 29 | * [ ] Automated testing passes (see [CI](https://github.com/petl-developers/petl/actions)) 30 | * [ ] Unit test coverage has not decreased (see [Coveralls](https://coveralls.io/github/petl-developers/petl)) 31 | * [ ] State of these changes is: 32 | * [ ] Just a proof of concept 33 | * [ ] Work in progress / Further changes needed 34 | * [ ] Ready to review 35 | * [ ] Ready to merge 36 | -------------------------------------------------------------------------------- /.github/workflows/codacy-analysis.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | # This workflow checks out code, performs a Codacy security scan 7 | # and integrates the results with the 8 | # GitHub Advanced Security code scanning feature. For more information on 9 | # the Codacy security scan action usage and parameters, see 10 | # https://github.com/codacy/codacy-analysis-cli-action. 11 | # For more information on Codacy Analysis CLI in general, see 12 | # https://github.com/codacy/codacy-analysis-cli. 13 | 14 | name: Codacy Security Scan 15 | 16 | on: 17 | schedule: 18 | - cron: '59 11 27 * *' 19 | push: 20 | branches: [ "master" ] 21 | pull_request: 22 | branches: [ "master" ] 23 | types: [opened, reopened, synchronize, ready_for_review] 24 | # workflow_run: 25 | # workflows: [Test Changes] 26 | # types: 27 | # - completed 28 | workflow_call: 29 | workflow_dispatch: 30 | inputs: 31 | logLevel: 32 | description: 'Log level' 33 | required: true 34 | default: 'warning' 35 | type: choice 36 | options: 37 | - info 38 | - warning 39 | - debug 40 | 41 | permissions: 42 | contents: read 43 | 44 | jobs: 45 | codacy-security-scan: 46 | permissions: 47 | contents: read # for actions/checkout to fetch code 48 | security-events: write # for github/codeql-action/upload-sarif to upload SARIF results 49 | actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status 50 | name: Codacy Security Scan 51 | runs-on: ubuntu-latest 52 | steps: 53 | # Checkout the repository to the GitHub Actions runner 54 | - name: Checkout code 55 | uses: actions/checkout@v3 56 | 57 | # Execute Codacy Analysis CLI and generate a SARIF output with the security issues identified during the analysis 58 | - name: Run Codacy Analysis CLI 59 | uses: codacy/codacy-analysis-cli-action@v4 60 | with: 61 | # Check https://github.com/codacy/codacy-analysis-cli#project-token to get your project token from your Codacy repository 62 | # You can also omit the token and run the tools that support default configurations 63 | project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} 64 | verbose: true 65 | output: results.sarif 66 | format: sarif 67 | # Adjust severity of non-security issues 68 | gh-code-scanning-compat: true 69 | # Force 0 exit code to allow SARIF file generation 70 | # This will handover control about PR rejection to the GitHub side 71 | max-allowed-issues: 2147483647 72 | 73 | # Upload the SARIF file generated in the previous step 74 | - name: Upload SARIF results file 75 | uses: github/codeql-action/upload-sarif@v2 76 | with: 77 | sarif_file: results.sarif 78 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | schedule: 16 | - cron: '59 10 27 * *' 17 | push: 18 | branches: [ "master" ] 19 | pull_request: 20 | branches: [ "master" ] 21 | types: [opened, reopened, synchronize, ready_for_review] 22 | # workflow_run: 23 | # workflows: [Test Changes] 24 | # types: 25 | # - completed 26 | workflow_call: 27 | workflow_dispatch: 28 | inputs: 29 | logLevel: 30 | description: 'Log level' 31 | required: true 32 | default: 'warning' 33 | type: choice 34 | options: 35 | - info 36 | - warning 37 | - debug 38 | 39 | jobs: 40 | analyze: 41 | name: Analyze 42 | # Runner size impacts CodeQL analysis time. To learn more, please see: 43 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 44 | # - https://gh.io/supported-runners-and-hardware-resources 45 | # - https://gh.io/using-larger-runners 46 | # Consider using larger runners for possible analysis time improvements. 47 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 48 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 49 | permissions: 50 | # required for all workflows 51 | security-events: write 52 | 53 | # only required for workflows in private repositories 54 | actions: read 55 | contents: read 56 | 57 | strategy: 58 | fail-fast: false 59 | matrix: 60 | language: [ 'python' ] 61 | # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] 62 | # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both 63 | # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both 64 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 65 | 66 | steps: 67 | - name: Checkout repository 68 | uses: actions/checkout@v4 69 | 70 | # Initializes the CodeQL tools for scanning. 71 | - name: Initialize CodeQL 72 | uses: github/codeql-action/init@v3 73 | with: 74 | languages: ${{ matrix.language }} 75 | # If you wish to specify custom queries, you can do so here or in a config file. 76 | # By default, queries listed here will override any specified in a config file. 77 | # Prefix the list here with "+" to use these queries and those in the config file. 78 | 79 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 80 | # queries: security-extended,security-and-quality 81 | 82 | 83 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). 84 | # If this step fails, then you should remove it and run the build manually (see below) 85 | - name: Autobuild 86 | uses: github/codeql-action/autobuild@v3 87 | 88 | # ℹ️ Command-line programs to run using the OS shell. 89 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 90 | 91 | # If the Autobuild fails above, remove it and uncomment the following three lines. 92 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 93 | 94 | # - run: | 95 | # echo "Run, Build Application using script" 96 | # ./location_of_script_within_repo/buildscript.sh 97 | 98 | - name: Perform CodeQL Analysis 99 | uses: github/codeql-action/analyze@v3 100 | with: 101 | category: "/language:${{matrix.language}}" 102 | -------------------------------------------------------------------------------- /.github/workflows/publish-release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | workflow_dispatch: 7 | inputs: 8 | logLevel: 9 | description: 'Log level' 10 | required: true 11 | default: 'warning' 12 | type: choice 13 | options: 14 | - info 15 | - warning 16 | - debug 17 | 18 | jobs: 19 | pypi: 20 | strategy: 21 | matrix: 22 | python: ['3.10'] 23 | runs-on: ubuntu-latest 24 | steps: 25 | - name: Checkout source code 26 | uses: actions/checkout@v4 27 | 28 | - name: Set up Python ${{ matrix.python }} 29 | uses: actions/setup-python@v5 30 | with: 31 | python-version: ${{ matrix.python }} 32 | 33 | - name: Install pypa/build 34 | run: | 35 | python -m pip install build --user 36 | 37 | - name: Build the petl package 38 | run: | 39 | python -m build --outdir dist/ . 40 | 41 | - name: Publish the package version ${{ github.event.release.tag_name }} to PyPI 42 | if: startsWith(github.ref, 'refs/tags') 43 | uses: pypa/gh-action-pypi-publish@release/v1 44 | with: 45 | password: ${{ secrets.PYPI_API_TOKEN }} 46 | print_hash: true 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore ## 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test 42 | .tox/ 43 | .nox/ 44 | .cache 45 | .hypothesis/ 46 | .pytest_cache/ 47 | nosetests.xml 48 | 49 | # Coverage reports 50 | .coverage 51 | .coverage.* 52 | *.cover 53 | *.py,cover 54 | lcov.info 55 | cov.xml 56 | coverage.xml 57 | cover/ 58 | coverage/ 59 | htmlcov/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | db.sqlite3-journal 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | .pybuilder/ 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # IPython 89 | profile_default/ 90 | ipython_config.py 91 | 92 | # pyenv 93 | # For a library or package, you might want to ignore these files since the code is 94 | # intended to run in multiple environments; otherwise, check them in: 95 | # .python-version 96 | 97 | # pipenv 98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 101 | # install all needed dependencies. 102 | #Pipfile.lock 103 | 104 | # poetry 105 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 106 | # This is especially recommended for binary packages to ensure reproducibility, and is more 107 | # commonly ignored for libraries. 108 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 109 | #poetry.lock 110 | 111 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 112 | __pypackages__/ 113 | 114 | # Celery stuff 115 | celerybeat-schedule 116 | celerybeat.pid 117 | 118 | # SageMath parsed files 119 | *.sage.py 120 | 121 | # Environments 122 | .env 123 | .venv 124 | env/ 125 | venv/ 126 | ENV/ 127 | env.bak/ 128 | venv.bak/ 129 | 130 | # Spyder project settings 131 | .spyderproject 132 | .spyproject 133 | 134 | # Rope project settings 135 | .ropeproject 136 | 137 | # mkdocs documentation 138 | /site 139 | 140 | # mypy 141 | .mypy_cache/ 142 | .dmypy.json 143 | dmypy.json 144 | 145 | # Pyre type checker 146 | .pyre/ 147 | 148 | # pytype static type analyzer 149 | .pytype/ 150 | 151 | # Cython debug symbols 152 | cython_debug/ 153 | 154 | # PyCharm 155 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 156 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 157 | # and can be added to the global gitignore or merged into this file. For a more nuclear 158 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 159 | #.idea/ 160 | 161 | ## Custom section for petl ## 162 | 163 | # Python generated files 164 | 165 | *.pyc 166 | 167 | # Jypyter notebooks tem files 168 | 169 | .ipynb_checkpoints/ 170 | **/.ipynb_checkpoints/* 171 | 172 | # Editor backup files 173 | *~ 174 | *.backup 175 | 176 | # Petl build generated files 177 | petl/version.py 178 | **/tmp/ 179 | 180 | # Petl doctest generated files 181 | example*.* 182 | 183 | # Ignore this patterns for develepment convenience 184 | 185 | sketch* 186 | 187 | 188 | # Ignore this folder not idea users 189 | 190 | .idea/ 191 | 192 | ## end of .gitignore file ## 193 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-lts-latest 10 | tools: 11 | # python: "3.12" 12 | python: "latest" 13 | # You can also specify other tool versions: 14 | # nodejs: "20" 15 | # rust: "1.70" 16 | # golang: "1.20" 17 | jobs: 18 | pre_build: 19 | - echo "Generating version number at 'pre_build' step" 20 | - python3 setup.py build 21 | 22 | # Build documentation in the "docs/" directory with Sphinx 23 | sphinx: 24 | configuration: docs/conf.py 25 | # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs 26 | # builder: "dirhtml" 27 | # Fail on all warnings to avoid broken references 28 | # fail_on_warning: true 29 | 30 | # Optionally build your docs in additional formats such as PDF and ePub 31 | formats: 32 | - pdf 33 | - epub 34 | 35 | # Optional but recommended, declare the Python requirements required 36 | # to build your documentation 37 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 38 | python: 39 | install: 40 | # - requirements: docs/requirements.txt 41 | - requirements: requirements-docs.txt 42 | 43 | # End of the config file # 44 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | // 4 | //-- Used for IDE, Workbench, Tools -------------------------------------------- 5 | // 6 | "editorconfig.editorconfig", 7 | "VisualStudioExptTeam.vscodeintellicode", 8 | // 9 | //-- Used for linters, formatters ---------------------------------------------- 10 | // 11 | "ms-python.python", 12 | "ms-python.vscode-pylance", 13 | "ms-python.debugpy", 14 | "charliermarsh.ruff", 15 | "njpwerner.autodocstring", 16 | "njqdev.vscode-python-typehint", 17 | // 18 | // "ms-python.pylint", 19 | // "ms-python.flake8", 20 | // "ms-python.mypy-type-checker", 21 | // "ms-python.isort", 22 | // 23 | //-- Used for: Git, Code Quality ----------------------------------------------- 24 | // 25 | "Wequick.coverage-gutters", 26 | "vivaxy.vscode-conventional-commits" 27 | ] 28 | } -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "python: with Args", 6 | "type": "debugpy", 7 | "request": "launch", 8 | "program": "${file}", 9 | "args": "${input:arguments}", 10 | "cwd": "${input:debug_working_dir}", 11 | "justMyCode": true, 12 | "autoReload": { 13 | "enable": true 14 | } 15 | }, 16 | { 17 | "name": "python: Within Libs", 18 | "type": "debugpy", 19 | "request": "launch", 20 | "program": "${file}", 21 | "args": "${input:last_arguments}", 22 | "cwd": "${input:debug_working_dir}", 23 | "justMyCode": false, 24 | "autoReload": { 25 | "enable": true 26 | } 27 | } 28 | ], 29 | "inputs": [ 30 | { 31 | // Usage: "args": "${input:arguments}", 32 | "id": "arguments", 33 | "type": "promptString", 34 | "description": "Which arguments to pass to the command?" 35 | }, 36 | { 37 | // Usage: "cwd": "${input:debug_working_dir}" 38 | "id": "debug_working_dir", 39 | "type": "pickString", 40 | "description": "Debug the python program in which of these folders?", 41 | "options": [ 42 | "${fileDirname}", 43 | "${fileWorkspaceFolder}", 44 | "${fileWorkspaceFolder}/petl", 45 | "${fileWorkspaceFolder}/petl/tests", 46 | "${fileWorkspaceFolder}/examples", 47 | "${relativeFileDirname}", 48 | "${userHome}", 49 | "${cwd}", 50 | "${selectedText}", 51 | "" 52 | ], 53 | "default": "${fileDirname}" 54 | }, 55 | ] 56 | } -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com/fwlink/?LinkId=733558 3 | // for the documentation about the tasks.json format 4 | "version": "2.0.0", 5 | "tasks": [ 6 | { 7 | "label": "Package: build", 8 | "command": "python2", 9 | "args": [ 10 | "setup.py", 11 | "build" 12 | ], 13 | "presentation": { 14 | "echo": true, 15 | "panel": "shared", 16 | "focus": true 17 | } 18 | }, 19 | { 20 | "label": "Package: install", 21 | "command": "python3", 22 | "group": { 23 | "kind": "build", 24 | "isDefault": true 25 | }, 26 | "args": [ 27 | "setup.py", 28 | "install" 29 | ], 30 | "presentation": { 31 | "echo": true, 32 | "panel": "shared", 33 | "focus": true 34 | } 35 | } 36 | ], 37 | "problemMatcher": [ 38 | { 39 | "fileLocation": "absolute", 40 | "pattern": [ 41 | { 42 | "regexp": "^\\s+File \"(.*)\", line (\\d+), in (.*)$", 43 | "file": 1, 44 | "line": 2 45 | }, 46 | { 47 | "regexp": "^\\s+(.*)$", 48 | "message": 1 49 | } 50 | ] 51 | } 52 | ] 53 | } -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Alistair Miles 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | recursive-include docs *.txt 3 | 4 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | petl - Extract, Transform and Load 2 | =================================================== 3 | 4 | ``petl`` is a general purpose Python package for extracting, transforming and 5 | loading tables of data. 6 | 7 | .. image:: docs/petl-architecture.png 8 | :align: center 9 | :alt: petl usage possibilities 10 | 11 | Resources 12 | --------- 13 | 14 | - Documentation: http://petl.readthedocs.org/ 15 | - PyPI: http://pypi.python.org/pypi/petl 16 | - Conda: https://anaconda.org/conda-forge/petl 17 | - Discussion: http://groups.google.com/group/python-etl 18 | 19 | DevOps Status 20 | ------------- 21 | 22 | |downloads| |monthly| 23 | 24 | |ci| |pypi| |conda| 25 | 26 | |coveralls| |readthedocs| |zenodo| 27 | 28 | .. |downloads| image:: https://static.pepy.tech/badge/petl 29 | :target: https://pepy.tech/project/petl 30 | :alt: Downloads 31 | 32 | .. |monthly| image:: https://static.pepy.tech/badge/petl/month 33 | :target: https://pepy.tech/project/petl 34 | :alt: Downloads/Month 35 | 36 | .. |ci| image:: https://github.com/petl-developers/petl/actions/workflows/test-changes.yml/badge.svg 37 | :target: https://github.com/petl-developers/petl/actions/workflows/test-changes.yml 38 | :alt: Continuous Integration build status 39 | 40 | .. |pypi| image:: https://github.com/petl-developers/petl/actions/workflows/publish-release.yml/badge.svg 41 | :target: https://github.com/petl-developers/petl/actions/workflows/publish-release.yml 42 | :alt: PyPI release status 43 | 44 | .. |conda| image:: https://github.com/conda-forge/petl-feedstock/actions/workflows/automerge.yml/badge.svg 45 | :target: https://github.com/conda-forge/petl-feedstock/actions/workflows/automerge.yml 46 | :alt: Conda Forge release status 47 | 48 | .. |readthedocs| image:: https://readthedocs.org/projects/petl/badge/?version=stable 49 | :target: http://petl.readthedocs.io/en/stable/?badge=stable 50 | :alt: readthedocs.org release status 51 | 52 | .. |coveralls| image:: https://coveralls.io/repos/github/petl-developers/petl/badge.svg?branch=master 53 | :target: https://coveralls.io/github/petl-developers/petl?branch=master 54 | :alt: Coveralls release status 55 | 56 | .. |zenodo| image:: https://zenodo.org/badge/2233194.svg 57 | :target: https://zenodo.org/badge/latestdoi/2233194 58 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | petl - Extract, Transform and Load 2 | ================================== 3 | 4 | ``petl`` is a general purpose Python package for extracting, transforming and 5 | loading tables of data. 6 | 7 | Resources 8 | --------- 9 | 10 | - Documentation: http://petl.readthedocs.org/ 11 | - Mailing List: http://groups.google.com/group/python-etl 12 | - Source Code: https://github.com/petl-developers/petl 13 | - Download: 14 | - PyPI: http://pypi.python.org/pypi/petl 15 | - Conda Forge:https://anaconda.org/conda-forge/petl 16 | 17 | Getting Help 18 | ------------- 19 | 20 | Please feel free to ask questions via the mailing list 21 | (python-etl@googlegroups.com). 22 | 23 | To report installation problems, bugs or any other issues please email 24 | python-etl@googlegroups.com or `raise an issue on GitHub 25 | `_. 26 | -------------------------------------------------------------------------------- /bin/petl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function, division, absolute_import 4 | import sys 5 | import os 6 | import os.path 7 | import glob 8 | from optparse import OptionParser 9 | 10 | from petl import __version__ 11 | from petl import * 12 | 13 | parser = OptionParser( 14 | usage="%prog [options] expression", 15 | description="Evaluate a Python expression. The expression will be " 16 | "evaluated using eval(), with petl functions imported.", 17 | version=__version__) 18 | 19 | options, args = parser.parse_args() 20 | 21 | try: 22 | (expression,) = args 23 | except ValueError: 24 | parser.error("invalid number of arguments (%s)" % len(args)) 25 | r = eval(expression) 26 | 27 | if r is not None: 28 | if isinstance(r, Table): 29 | print(look(r)) 30 | else: 31 | print(str(r)) 32 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S bash -x -c 'docker compose --file docker-compose.yml up --detach' 2 | 3 | #region commands ----------------------------------------------------------------------------------- 4 | 5 | #$ docker compose --file docker-compose.yml up --detach 6 | #$ docker compose --file docker-compose.yml down --remove-orphans -v --rmi local 7 | 8 | #$ docker exec --tty --interactive --privileged petl-xxxxxx /bin/bash 9 | 10 | #$ docker exec -it petl-postgres psql -U petl --dbname=petl 11 | #$ docker exec -it petl-msyql mysql --user=petl --database=petl --password=test 12 | 13 | #endregion ----------------------------------------------------------------------------------------- 14 | 15 | #region docker composer ---------------------------------------------------------------------------- 16 | 17 | --- 18 | services: 19 | postgres: 20 | container_name: petl-postgres 21 | hostname: petl_postgres 22 | image: postgres:latest 23 | environment: 24 | - POSTGRES_USER=petl 25 | - POSTGRES_PASSWORD=test 26 | - POSTGRES_DB=petl 27 | - POSTGRES_HOST_AUTH_METHOD=password 28 | ports: 29 | - "5432:5432/tcp" 30 | restart: "unless-stopped" 31 | stdin_open: true 32 | tty: true 33 | healthcheck: 34 | test: ["CMD", "psql", "--host=localhost", "--username=petl", "--dbname=petl", "-c", "select 1 as ok"] 35 | interval: 20s 36 | timeout: 10s 37 | retries: 5 38 | start_period: 2s 39 | 40 | mysql: 41 | container_name: petl-mysql 42 | hostname: petl_mysql 43 | image: mysql:latest 44 | ports: 45 | - "3306:3306/tcp" 46 | - "33060:33060/tcp" 47 | environment: 48 | MYSQL_ALLOW_EMPTY_PASSWORD: "yes" 49 | MYSQL_DATABASE: "petl" 50 | MYSQL_USER: "petl" 51 | MYSQL_PASSWORD: "test" 52 | MYSQL_ROOT_PASSWORD: "pass0" 53 | restart: "unless-stopped" 54 | stdin_open: true 55 | tty: true 56 | healthcheck: 57 | test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"] 58 | interval: 20s 59 | timeout: 10s 60 | retries: 5 61 | start_period: 2s 62 | 63 | samba: 64 | container_name: petl-samba 65 | hostname: petl_samba 66 | image: dperson/samba 67 | ports: 68 | - "137:137/udp" 69 | - "138:138/udp" 70 | - "139:139/tcp" 71 | - "445:445/tcp" 72 | tmpfs: 73 | - /tmp 74 | restart: unless-stopped 75 | stdin_open: true 76 | tty: true 77 | volumes: 78 | - /mnt:/mnt:z 79 | - /mnt2:/mnt2:z 80 | command: '-s "public;/mnt;yes;no;yes;all" -s "mount2;/mnt2" -u "petl;test" -p' 81 | 82 | sftp: 83 | container_name: petl-sftp 84 | hostname: petl_sftp 85 | image: atmoz/sftp 86 | ports: 87 | - "22:22/tcp" 88 | tmpfs: 89 | - /tmp 90 | restart: unless-stopped 91 | stdin_open: true 92 | tty: true 93 | command: 'petl:test:::public' 94 | 95 | #endregion ----------------------------------------------------------------------------------------- 96 | -------------------------------------------------------------------------------- /docs/acknowledgments.rst: -------------------------------------------------------------------------------- 1 | Acknowledgments 2 | =============== 3 | 4 | This is community-maintained software. The following people have contributed to 5 | the development of this package: 6 | 7 | * Alexander Stauber 8 | * Alistair Miles (`alimanfoo `_) 9 | * Andreas Porevopoulos (`sv1jsb `_) 10 | * Andrew Kim (`andrewakim `_) 11 | * Artur Poniński (`arturponinski `_) 12 | * Brad Maggard (`bmaggard `_) 13 | * Caleb Lloyd (`caleblloyd `_) 14 | * César Roldán (`ihuro `_) 15 | * Chris Lasher (`gotgenes `_) 16 | * Dean Way (`DeanWay `_) 17 | * Dustin Engstrom (`engstrom `_) 18 | * Fahad Siddiqui (`fahadsiddiqui `_) 19 | * Florent Xicluna (`florentx `_) 20 | * Henry Rizzi (`henryrizzi `_) 21 | * Jonathan Camile (`deytao `_) 22 | * Jonathan Moss (`a-musing-moose `_) 23 | * Juarez Rudsatz (`juarezr `_) 24 | * Kenneth Borthwick 25 | * Krisztián Fekete (`krisztianfekete `_) 26 | * Matt Katz (`mattkatz `_) 27 | * Matthew Scholefield (`MatthewScholefield `_) 28 | * Michał Karol (`MichalKarol `_) 29 | * Michael Rea (`rea725 `_) 30 | * Olivier Macchioni (`omacchioni `_) 31 | * Olivier Poitrey (`rs `_) 32 | * Pablo Castellano (`PabloCastellano `_) 33 | * Paul Jensen (`psnj `_) 34 | * Paulo Scardine (`scardine `_) 35 | * Peder Jakobsen (`pjakobsen `_) 36 | * Phillip Knaus (`phillipknaus `_) 37 | * Richard Pearson (`podpearson `_) 38 | * Robert DeSimone (`icenine457 `_) 39 | * Robin Moss (`LupusUmbrae `_) 40 | * Roger Woodley (`rogerkwoodley `_) 41 | * Tim Hebbeler (`timheb `_) 42 | * Tucker Beck (`dusktreader `_) 43 | * Viliam Segeďa (`vilos `_) 44 | * Zach Palchick (`palchicz `_) 45 | * `adamsdarlingtower `_ 46 | * `hugovk `_ 47 | * `imazor `_ 48 | * `james-unified `_ 49 | * `Mgutjahr `_ 50 | * `shayh `_ 51 | * `thatneat `_ 52 | * `titusz `_ 53 | * `zigen `_ 54 | 55 | Development of petl has been supported by an open source license for 56 | `PyCharm `_. 57 | -------------------------------------------------------------------------------- /docs/config.rst: -------------------------------------------------------------------------------- 1 | Configuration 2 | =============================== 3 | 4 | .. automodule:: petl.config 5 | :members: 6 | 7 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. module:: petl 2 | 3 | petl - Extract, Transform and Load 4 | =================================== 5 | 6 | :mod:`petl` is a general purpose Python package for extracting, transforming 7 | and loading tables of data. 8 | 9 | .. image:: petl-architecture.png 10 | :width: 750 11 | :align: center 12 | :alt: petl use cases diagram 13 | 14 | Resources 15 | --------- 16 | 17 | - Documentation: http://petl.readthedocs.org/ 18 | - Mailing List: http://groups.google.com/group/python-etl 19 | - Source Code: https://github.com/petl-developers/petl 20 | - Download: 21 | - PyPI: http://pypi.python.org/pypi/petl 22 | - Conda Forge:https://anaconda.org/conda-forge/petl 23 | 24 | .. note:: 25 | 26 | - Version 2.0 will be a major milestone for :mod:`petl`. 27 | - This version will introduce some changes that could affect current behaviour. 28 | - We will try to keep compatibility to the maximum possible, except 29 | when the current behavior is inconsistent or have shortcomings. 30 | - The biggest change is the end of support of Python `2.7`. 31 | - The minimum supported version will be Python `3.6`. 32 | 33 | Getting Help 34 | ------------- 35 | 36 | Please feel free to ask questions via the mailing list 37 | (python-etl@googlegroups.com). 38 | 39 | To report installation problems, bugs or any other issues please email 40 | python-etl@googlegroups.com or `raise an issue on GitHub 41 | `_. 42 | 43 | For an example of :mod:`petl` in use, see the `case study on comparing tables 44 | `_. 45 | 46 | Contents 47 | -------- 48 | 49 | For an alphabetic list of all functions in the package, 50 | see the :ref:`genindex`. 51 | 52 | .. toctree:: 53 | :maxdepth: 2 54 | 55 | install 56 | intro 57 | io 58 | transform 59 | util 60 | config 61 | contributing 62 | acknowledgments 63 | related_work 64 | changes 65 | 66 | Indices and tables 67 | ------------------ 68 | 69 | * :ref:`genindex` 70 | * :ref:`modindex` 71 | * :ref:`search` 72 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | .. _intro_installation: 5 | 6 | Getting Started 7 | --------------- 8 | 9 | This package is available from the `Python Package Index 10 | `_. If you have `pip 11 | `_ you should be able to do:: 12 | 13 | $ pip install petl 14 | 15 | You can also download manually, extract and run ``python setup.py 16 | install``. 17 | 18 | To verify the installation, the test suite can be run with `pytest 19 | `_, e.g.:: 20 | 21 | $ pip install pytest 22 | $ pytest -v petl 23 | 24 | :mod:`petl` has been tested with Python versions 2.7 and 3.6-3.13 25 | under Linux, MacOS, and Windows operating systems. 26 | 27 | .. _intro_dependencies: 28 | 29 | Dependencies and extensions 30 | --------------------------- 31 | 32 | This package is written in pure Python and has no installation requirements 33 | other than the Python core modules. 34 | 35 | Some domain-specific and/or experimental extensions to :mod:`petl` are 36 | available from the petlx_ package. 37 | 38 | .. _petlx: http://petlx.readthedocs.org 39 | 40 | Some of the functions in this package require installation of third party 41 | packages. These packages are indicated in the relevant parts of the 42 | documentation for each file format. 43 | 44 | Also is possible to install some of dependencies when installing `petl` by 45 | specifying optional extra features, e.g.:: 46 | 47 | $ pip install petl['avro', 'interval', 'remote'] 48 | 49 | The available extra features are: 50 | 51 | db 52 | For using records from :ref:`Databases ` with `SQLAlchemy`. 53 | 54 | Note that is also required installing the package for the desired database. 55 | 56 | interval 57 | For using :ref:`Interval transformations ` 58 | with `intervaltree` 59 | 60 | avro 61 | For using :ref:`Avro files ` with `fastavro` 62 | 63 | pandas 64 | For using :ref:`DataFrames ` with `pandas` 65 | 66 | numpy 67 | For using :ref:`Arrays ` with `numpy` 68 | 69 | xls 70 | For using :ref:`Excel/LO files ` with `xlrd`/`xlwt` 71 | 72 | xlsx 73 | For using :ref:`Excel/LO files ` with `openpyxl` 74 | 75 | xpath 76 | For using :ref:`XPath expressions ` with `lxml` 77 | 78 | bcolz 79 | For using :ref:`Bcolz ctables ` with `bcolz` 80 | 81 | whoosh 82 | For using :ref:`Text indexes ` with `whoosh` 83 | 84 | hdf5 85 | For using :ref:`HDF5 files ` with `PyTables`. 86 | 87 | Note that also are additional software to be installed. 88 | 89 | remote 90 | For reading and writing from :ref:`Remote Sources ` with `fsspec`. 91 | 92 | Note that `fsspec` also depends on other packages for providing support for 93 | each protocol as described in :class:`petl.io.remotes.RemoteSource`. 94 | -------------------------------------------------------------------------------- /docs/petl-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petl-developers/petl/43925d008bd1d98f90204b3db74d88b3fee27a69/docs/petl-architecture.png -------------------------------------------------------------------------------- /docs/util.rst: -------------------------------------------------------------------------------- 1 | .. module:: petl.util 2 | 3 | Utility functions 4 | ================= 5 | 6 | 7 | Basic utilities 8 | --------------- 9 | 10 | .. autofunction:: petl.util.base.header 11 | .. autofunction:: petl.util.base.fieldnames 12 | .. autofunction:: petl.util.base.data 13 | .. autofunction:: petl.util.base.values 14 | .. autofunction:: petl.util.base.dicts 15 | .. autofunction:: petl.util.base.namedtuples 16 | .. autofunction:: petl.util.base.records 17 | .. autofunction:: petl.util.base.expr 18 | .. autofunction:: petl.util.base.rowgroupby 19 | .. autofunction:: petl.util.base.empty 20 | 21 | 22 | Visualising tables 23 | ------------------ 24 | 25 | .. autofunction:: petl.util.vis.look 26 | .. autofunction:: petl.util.vis.lookall 27 | .. autofunction:: petl.util.vis.see 28 | .. autofunction:: petl.util.vis.display 29 | .. autofunction:: petl.util.vis.displayall 30 | 31 | 32 | Lookup data structures 33 | ---------------------- 34 | 35 | .. autofunction:: petl.util.lookups.lookup 36 | .. autofunction:: petl.util.lookups.lookupone 37 | .. autofunction:: petl.util.lookups.dictlookup 38 | .. autofunction:: petl.util.lookups.dictlookupone 39 | .. autofunction:: petl.util.lookups.recordlookup 40 | .. autofunction:: petl.util.lookups.recordlookupone 41 | 42 | 43 | Parsing string/text values 44 | -------------------------- 45 | 46 | .. autofunction:: petl.util.parsers.dateparser 47 | .. autofunction:: petl.util.parsers.timeparser 48 | .. autofunction:: petl.util.parsers.datetimeparser 49 | .. autofunction:: petl.util.parsers.boolparser 50 | .. autofunction:: petl.util.parsers.numparser 51 | 52 | 53 | Counting 54 | -------- 55 | 56 | .. autofunction:: petl.util.counting.nrows 57 | .. autofunction:: petl.util.counting.valuecount 58 | .. autofunction:: petl.util.counting.valuecounter 59 | .. autofunction:: petl.util.counting.valuecounts 60 | .. autofunction:: petl.util.counting.stringpatterncounter 61 | .. autofunction:: petl.util.counting.stringpatterns 62 | .. autofunction:: petl.util.counting.rowlengths 63 | .. autofunction:: petl.util.counting.typecounter 64 | .. autofunction:: petl.util.counting.typecounts 65 | .. autofunction:: petl.util.counting.parsecounter 66 | .. autofunction:: petl.util.counting.parsecounts 67 | 68 | 69 | Timing 70 | ------ 71 | 72 | .. autofunction:: petl.util.timing.progress 73 | .. autofunction:: petl.util.timing.log_progress 74 | .. autofunction:: petl.util.timing.clock 75 | 76 | 77 | Statistics 78 | ---------- 79 | 80 | .. autofunction:: petl.util.statistics.limits 81 | .. autofunction:: petl.util.statistics.stats 82 | 83 | 84 | Materialising tables 85 | -------------------- 86 | 87 | .. autofunction:: petl.util.materialise.columns 88 | .. autofunction:: petl.util.materialise.facetcolumns 89 | .. autofunction:: petl.util.materialise.listoflists 90 | .. autofunction:: petl.util.materialise.listoftuples 91 | .. autofunction:: petl.util.materialise.tupleoflists 92 | .. autofunction:: petl.util.materialise.tupleoftuples 93 | .. autofunction:: petl.util.materialise.cache 94 | 95 | 96 | Randomly generated tables 97 | ------------------------- 98 | 99 | .. autofunction:: petl.util.random.randomtable 100 | .. autofunction:: petl.util.random.dummytable 101 | 102 | 103 | Miscellaneous 104 | ------------- 105 | 106 | .. autofunction:: petl.util.misc.typeset 107 | .. autofunction:: petl.util.misc.diffheaders 108 | .. autofunction:: petl.util.misc.diffvalues 109 | .. autofunction:: petl.util.misc.strjoin 110 | .. autofunction:: petl.util.misc.nthword 111 | .. autofunction:: petl.util.misc.coalesce 112 | -------------------------------------------------------------------------------- /examples/comparison.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | import petl as etl 5 | table = [['foo', 'bar'], 6 | ['a', 1], 7 | ['b', None]] 8 | 9 | # raises exception under Python 3 10 | etl.select(table, 'bar', lambda v: v > 0) 11 | # no error under Python 3 12 | etl.selectgt(table, 'bar', 0) 13 | # or ... 14 | etl.select(table, 'bar', lambda v: v > etl.Comparable(0)) 15 | 16 | -------------------------------------------------------------------------------- /examples/intro.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | example_data = """foo,bar,baz 4 | a,1,3.4 5 | b,2,7.4 6 | c,6,2.2 7 | d,9,8.1 8 | """ 9 | with open('example.csv', 'w') as f: 10 | f.write(example_data) 11 | 12 | import petl as etl 13 | table1 = etl.fromcsv('example.csv') 14 | table2 = etl.convert(table1, 'foo', 'upper') 15 | table3 = etl.convert(table2, 'bar', int) 16 | table4 = etl.convert(table3, 'baz', float) 17 | table5 = etl.addfield(table4, 'quux', lambda row: row.bar * row.baz) 18 | table5 19 | 20 | table = ( 21 | etl 22 | .fromcsv('example.csv') 23 | .convert('foo', 'upper') 24 | .convert('bar', int) 25 | .convert('baz', float) 26 | .addfield('quux', lambda row: row.bar * row.baz) 27 | ) 28 | table 29 | 30 | l = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]] 31 | table = etl.wrap(l) 32 | table.look() 33 | 34 | l = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]] 35 | table = etl.wrap(l) 36 | table 37 | 38 | etl.config.look_index_header = True 39 | 40 | table 41 | -------------------------------------------------------------------------------- /examples/io/csv.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # fromcsv() 5 | ########### 6 | 7 | import petl as etl 8 | import csv 9 | # set up a CSV file to demonstrate with 10 | table1 = [['foo', 'bar'], 11 | ['a', 1], 12 | ['b', 2], 13 | ['c', 2]] 14 | with open('example.csv', 'w') as f: 15 | writer = csv.writer(f) 16 | writer.writerows(table1) 17 | 18 | # now demonstrate the use of fromcsv() 19 | table2 = etl.fromcsv('example.csv') 20 | table2 21 | 22 | 23 | # tocsv() 24 | ######### 25 | 26 | import petl as etl 27 | table1 = [['foo', 'bar'], 28 | ['a', 1], 29 | ['b', 2], 30 | ['c', 2]] 31 | etl.tocsv(table1, 'example.csv') 32 | # look what it did 33 | print(open('example.csv').read()) 34 | -------------------------------------------------------------------------------- /examples/io/html.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # tohtml() 5 | ########## 6 | 7 | 8 | import petl as etl 9 | table1 = [['foo', 'bar'], 10 | ['a', 1], 11 | ['b', 2], 12 | ['c', 2]] 13 | etl.tohtml(table1, 'example.html', caption='example table') 14 | print(open('example.html').read()) 15 | -------------------------------------------------------------------------------- /examples/io/json.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # fromjson() 5 | ############ 6 | 7 | import petl as etl 8 | data = ''' 9 | [{"foo": "a", "bar": 1}, 10 | {"foo": "b", "bar": 2}, 11 | {"foo": "c", "bar": 2}] 12 | ''' 13 | with open('example.json', 'w') as f: 14 | f.write(data) 15 | 16 | table1 = etl.fromjson('example.json') 17 | table1 18 | 19 | 20 | # fromdicts() 21 | ############# 22 | 23 | import petl as etl 24 | dicts = [{"foo": "a", "bar": 1}, 25 | {"foo": "b", "bar": 2}, 26 | {"foo": "c", "bar": 2}] 27 | table1 = etl.fromdicts(dicts) 28 | table1 29 | 30 | 31 | # tojson() 32 | ########## 33 | 34 | import petl as etl 35 | table1 = [['foo', 'bar'], 36 | ['a', 1], 37 | ['b', 2], 38 | ['c', 2]] 39 | etl.tojson(table1, 'example.json', sort_keys=True) 40 | # check what it did 41 | print(open('example.json').read()) 42 | 43 | 44 | # tojsonarrays() 45 | ################ 46 | 47 | import petl as etl 48 | table1 = [['foo', 'bar'], 49 | ['a', 1], 50 | ['b', 2], 51 | ['c', 2]] 52 | etl.tojsonarrays(table1, 'example.json') 53 | # check what it did 54 | print(open('example.json').read()) 55 | 56 | -------------------------------------------------------------------------------- /examples/io/numpy.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # toarray() 5 | ########### 6 | 7 | import petl as etl 8 | table = [('foo', 'bar', 'baz'), 9 | ('apples', 1, 2.5), 10 | ('oranges', 3, 4.4), 11 | ('pears', 7, .1)] 12 | a = etl.toarray(table) 13 | a 14 | # the dtype can be specified as a string 15 | a = etl.toarray(table, dtype='a4, i2, f4') 16 | a 17 | # the dtype can also be partially specified 18 | a = etl.toarray(table, dtype={'foo': 'a4'}) 19 | a 20 | 21 | 22 | # fromarray() 23 | ############# 24 | 25 | import petl as etl 26 | import numpy as np 27 | a = np.array([('apples', 1, 2.5), 28 | ('oranges', 3, 4.4), 29 | ('pears', 7, 0.1)], 30 | dtype='U8, i4,f4') 31 | table = etl.fromarray(a) 32 | table 33 | 34 | 35 | # valuestoarray() 36 | ################# 37 | 38 | import petl as etl 39 | table = [('foo', 'bar', 'baz'), 40 | ('apples', 1, 2.5), 41 | ('oranges', 3, 4.4), 42 | ('pears', 7, .1)] 43 | table = etl.wrap(table) 44 | table.values('bar').array() 45 | # specify dtype 46 | table.values('bar').array(dtype='i4') 47 | -------------------------------------------------------------------------------- /examples/io/pandas.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # todataframe() 5 | ############### 6 | 7 | import petl as etl 8 | table = [('foo', 'bar', 'baz'), 9 | ('apples', 1, 2.5), 10 | ('oranges', 3, 4.4), 11 | ('pears', 7, .1)] 12 | df = etl.todataframe(table) 13 | df 14 | 15 | 16 | # fromdataframe() 17 | ################# 18 | 19 | import petl as etl 20 | import pandas as pd 21 | records = [('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, 0.1)] 22 | df = pd.DataFrame.from_records(records, columns=('foo', 'bar', 'baz')) 23 | table = etl.fromdataframe(df) 24 | table 25 | -------------------------------------------------------------------------------- /examples/io/pickle.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # frompickle() 5 | ############## 6 | 7 | import petl as etl 8 | import pickle 9 | # set up a file to demonstrate with 10 | with open('example.p', 'wb') as f: 11 | pickle.dump(['foo', 'bar'], f) 12 | pickle.dump(['a', 1], f) 13 | pickle.dump(['b', 2], f) 14 | pickle.dump(['c', 2.5], f) 15 | 16 | # demonstrate the use of frompickle() 17 | table1 = etl.frompickle('example.p') 18 | table1 19 | 20 | 21 | # topickle() 22 | ############ 23 | 24 | import petl as etl 25 | table1 = [['foo', 'bar'], 26 | ['a', 1], 27 | ['b', 2], 28 | ['c', 2]] 29 | etl.topickle(table1, 'example.p') 30 | # look what it did 31 | table2 = etl.frompickle('example.p') 32 | table2 33 | 34 | 35 | -------------------------------------------------------------------------------- /examples/io/pytables.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | 5 | # fromhdf5() 6 | ############ 7 | 8 | import petl as etl 9 | import tables 10 | # set up a new hdf5 table to demonstrate with 11 | h5file = tables.openFile('example.h5', mode='w', title='Example file') 12 | h5file.createGroup('/', 'testgroup', 'Test Group') 13 | class FooBar(tables.IsDescription): 14 | foo = tables.Int32Col(pos=0) 15 | bar = tables.StringCol(6, pos=2) 16 | 17 | h5table = h5file.createTable('/testgroup', 'testtable', FooBar, 'Test Table') 18 | # load some data into the table 19 | table1 = (('foo', 'bar'), 20 | (1, b'asdfgh'), 21 | (2, b'qwerty'), 22 | (3, b'zxcvbn')) 23 | 24 | for row in table1[1:]: 25 | for i, f in enumerate(table1[0]): 26 | h5table.row[f] = row[i] 27 | h5table.row.append() 28 | 29 | h5file.flush() 30 | h5file.close() 31 | # 32 | # now demonstrate use of fromhdf5 33 | table1 = etl.fromhdf5('example.h5', '/testgroup', 'testtable') 34 | table1 35 | # alternatively just specify path to table node 36 | table1 = etl.fromhdf5('example.h5', '/testgroup/testtable') 37 | # ...or use an existing tables.File object 38 | h5file = tables.openFile('example.h5') 39 | table1 = etl.fromhdf5(h5file, '/testgroup/testtable') 40 | # ...or use an existing tables.Table object 41 | h5tbl = h5file.getNode('/testgroup/testtable') 42 | table1 = etl.fromhdf5(h5tbl) 43 | # use a condition to filter data 44 | table2 = etl.fromhdf5(h5tbl, condition='foo < 3') 45 | table2 46 | h5file.close() 47 | 48 | 49 | # fromhdf5sorted() 50 | ################## 51 | 52 | import petl as etl 53 | import tables 54 | # set up a new hdf5 table to demonstrate with 55 | h5file = tables.openFile('example.h5', mode='w', title='Test file') 56 | h5file.createGroup('/', 'testgroup', 'Test Group') 57 | class FooBar(tables.IsDescription): 58 | foo = tables.Int32Col(pos=0) 59 | bar = tables.StringCol(6, pos=2) 60 | 61 | h5table = h5file.createTable('/testgroup', 'testtable', FooBar, 'Test Table') 62 | # load some data into the table 63 | table1 = (('foo', 'bar'), 64 | (3, b'asdfgh'), 65 | (2, b'qwerty'), 66 | (1, b'zxcvbn')) 67 | for row in table1[1:]: 68 | for i, f in enumerate(table1[0]): 69 | h5table.row[f] = row[i] 70 | h5table.row.append() 71 | 72 | h5table.cols.foo.createCSIndex() # CS index is required 73 | h5file.flush() 74 | h5file.close() 75 | # 76 | # access the data, sorted by the indexed column 77 | table2 = etl.fromhdf5sorted('example.h5', '/testgroup', 'testtable', 78 | sortby='foo') 79 | table2 80 | 81 | 82 | # tohdf5() 83 | ########## 84 | 85 | import petl as etl 86 | table1 = (('foo', 'bar'), 87 | (1, b'asdfgh'), 88 | (2, b'qwerty'), 89 | (3, b'zxcvbn')) 90 | etl.tohdf5(table1, 'example.h5', '/testgroup', 'testtable', 91 | drop=True, create=True, createparents=True) 92 | etl.fromhdf5('example.h5', '/testgroup', 'testtable') 93 | -------------------------------------------------------------------------------- /examples/io/sqlite3.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import os 3 | 4 | 5 | # fromsqlite3() 6 | ############### 7 | 8 | os.remove('example.db') 9 | 10 | import petl as etl 11 | import sqlite3 12 | # set up a database to demonstrate with 13 | data = [['a', 1], 14 | ['b', 2], 15 | ['c', 2.0]] 16 | connection = sqlite3.connect('example.db') 17 | c = connection.cursor() 18 | _ = c.execute('drop table if exists foobar') 19 | _ = c.execute('create table foobar (foo, bar)') 20 | for row in data: 21 | _ = c.execute('insert into foobar values (?, ?)', row) 22 | 23 | connection.commit() 24 | c.close() 25 | # now demonstrate the petl.fromsqlite3 function 26 | table = etl.fromsqlite3('example.db', 'select * from foobar') 27 | table 28 | 29 | 30 | # tosqlite3() 31 | ############## 32 | 33 | os.remove('example.db') 34 | 35 | import petl as etl 36 | table1 = [['foo', 'bar'], 37 | ['a', 1], 38 | ['b', 2], 39 | ['c', 2]] 40 | _ = etl.tosqlite3(table1, 'example.db', 'foobar', create=True) 41 | # look what it did 42 | table2 = etl.fromsqlite3('example.db', 'select * from foobar') 43 | table2 44 | -------------------------------------------------------------------------------- /examples/io/text.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # fromtext() 5 | ############ 6 | 7 | import petl as etl 8 | # setup example file 9 | text = 'a,1\nb,2\nc,2\n' 10 | with open('example.txt', 'w') as f: 11 | f.write(text) 12 | 13 | table1 = etl.fromtext('example.txt') 14 | table1 15 | # post-process, e.g., with capture() 16 | table2 = table1.capture('lines', '(.*),(.*)$', ['foo', 'bar']) 17 | table2 18 | 19 | 20 | # totext() 21 | ########## 22 | 23 | import petl as etl 24 | table1 = [['foo', 'bar'], 25 | ['a', 1], 26 | ['b', 2], 27 | ['c', 2]] 28 | prologue = '''{| class="wikitable" 29 | |- 30 | ! foo 31 | ! bar 32 | ''' 33 | template = '''|- 34 | | {foo} 35 | | {bar} 36 | ''' 37 | epilogue = '|}' 38 | etl.totext(table1, 'example.txt', template, prologue, epilogue) 39 | # see what we did 40 | print(open('example.txt').read()) 41 | -------------------------------------------------------------------------------- /examples/io/whoosh.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | 5 | # fromtextindex() 6 | ################# 7 | 8 | import petl as etl 9 | import os 10 | # set up an index and load some documents via the Whoosh API 11 | from whoosh.index import create_in 12 | from whoosh.fields import * 13 | schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) 14 | dirname = 'example.whoosh' 15 | if not os.path.exists(dirname): 16 | os.mkdir(dirname) 17 | 18 | index = create_in(dirname, schema) 19 | writer = index.writer() 20 | writer.add_document(title=u"First document", path=u"/a", 21 | content=u"This is the first document we've added!") 22 | writer.add_document(title=u"Second document", path=u"/b", 23 | content=u"The second one is even more interesting!") 24 | writer.commit() 25 | # extract documents as a table 26 | table = etl.fromtextindex(dirname) 27 | table 28 | 29 | 30 | # totextindex() 31 | ############### 32 | 33 | import petl as etl 34 | import datetime 35 | import os 36 | # here is the table we want to load into an index 37 | table = (('f0', 'f1', 'f2', 'f3', 'f4'), 38 | ('AAA', 12, 4.3, True, datetime.datetime.now()), 39 | ('BBB', 6, 3.4, False, datetime.datetime(1900, 1, 31)), 40 | ('CCC', 42, 7.8, True, datetime.datetime(2100, 12, 25))) 41 | # define a schema for the index 42 | from whoosh.fields import * 43 | schema = Schema(f0=TEXT(stored=True), 44 | f1=NUMERIC(int, stored=True), 45 | f2=NUMERIC(float, stored=True), 46 | f3=BOOLEAN(stored=True), 47 | f4=DATETIME(stored=True)) 48 | # load index 49 | dirname = 'example.whoosh' 50 | if not os.path.exists(dirname): 51 | os.mkdir(dirname) 52 | 53 | etl.totextindex(table, dirname, schema=schema) 54 | 55 | 56 | # searchtextindex() 57 | ################### 58 | 59 | import petl as etl 60 | import os 61 | # set up an index and load some documents via the Whoosh API 62 | from whoosh.index import create_in 63 | from whoosh.fields import * 64 | schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) 65 | dirname = 'example.whoosh' 66 | if not os.path.exists(dirname): 67 | os.mkdir(dirname) 68 | 69 | index = create_in('example.whoosh', schema) 70 | writer = index.writer() 71 | writer.add_document(title=u"Oranges", path=u"/a", 72 | content=u"This is the first document we've added!") 73 | writer.add_document(title=u"Apples", path=u"/b", 74 | content=u"The second document is even more " 75 | u"interesting!") 76 | writer.commit() 77 | # demonstrate the use of searchtextindex() 78 | table1 = etl.searchtextindex('example.whoosh', 'oranges') 79 | table1 80 | table2 = etl.searchtextindex('example.whoosh', 'doc*') 81 | table2 82 | -------------------------------------------------------------------------------- /examples/io/xml.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | import petl as etl 5 | # setup a file to demonstrate with 6 | d = ''' 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
foobar
a1
b2
c2
''' 20 | with open('example1.xml', 'w') as f: 21 | f.write(d) 22 | 23 | table1 = etl.fromxml('example1.xml', 'tr', 'td') 24 | table1 25 | # if the data values are stored in an attribute, provide the attribute name 26 | # as an extra positional argument 27 | d = ''' 28 | 29 | 31 | 32 | 34 | 35 | 37 | 38 | 40 |
30 |
33 |
36 |
39 |
''' 41 | with open('example2.xml', 'w') as f: 42 | f.write(d) 43 | 44 | table2 = etl.fromxml('example2.xml', 'tr', 'td', 'v') 45 | table2 46 | # data values can also be extracted by providing a mapping of field 47 | # names to element paths 48 | d = ''' 49 | 50 | a 51 | 52 | 53 | b 54 | 55 | 56 | c 57 | 58 |
''' 59 | with open('example3.xml', 'w') as f: 60 | f.write(d) 61 | 62 | table3 = etl.fromxml('example3.xml', 'row', 63 | {'foo': 'foo', 'bar': ('baz/bar', 'v')}) 64 | table3 65 | -------------------------------------------------------------------------------- /examples/notes/.gitignore: -------------------------------------------------------------------------------- 1 | *.csv 2 | *.zip* 3 | -------------------------------------------------------------------------------- /examples/notes/20140424_example.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3.0 3 | 4 | # 5 | 6 | data = """type,price,quantity 7 | Apples 8 | Cortland,0.30,24 9 | Red Delicious,0.40,24 10 | Oranges 11 | Navel,0.50,12 12 | """ 13 | 14 | # 15 | 16 | import petl.interactive as etl 17 | from petl.io import StringSource 18 | 19 | # 20 | 21 | tbl1 = (etl 22 | .fromcsv(StringSource(data)) 23 | ) 24 | tbl1 25 | 26 | # 27 | 28 | # Option 1 - using existing petl functions 29 | 30 | # 31 | 32 | def make_room_for_category(row): 33 | if len(row) == 1: 34 | return (row[0], 'X', 'X', 'X') 35 | else: 36 | return (None,) + tuple(row) 37 | 38 | tbl2 = tbl1.rowmap(make_room_for_category, fields=['category', 'type', 'price', 'quantity']) 39 | tbl2 40 | 41 | # 42 | 43 | tbl3 = tbl2.filldown() 44 | tbl3 45 | 46 | # 47 | 48 | tbl4 = tbl3.ne('type', 'X') 49 | tbl4 50 | 51 | # 52 | 53 | # Option 2 - custom transformer 54 | 55 | # 56 | 57 | class CustomTransformer(object): 58 | 59 | def __init__(self, source): 60 | self.source = source 61 | 62 | def __iter__(self): 63 | it = iter(self.source) 64 | 65 | # construct new header 66 | source_fields = it.next() 67 | out_fields = ('category',) + tuple(source_fields) 68 | yield out_fields 69 | 70 | # transform data 71 | current_category = None 72 | for row in it: 73 | if len(row) == 1: 74 | current_category = row[0] 75 | else: 76 | yield (current_category,) + tuple(row) 77 | 78 | # 79 | 80 | tbl5 = CustomTransformer(tbl1) 81 | 82 | # 83 | 84 | # just so it formats nicely as HTML in the notebook... 85 | etl.wrap(tbl5) 86 | 87 | -------------------------------------------------------------------------------- /examples/notes/20150331 split null.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "text/plain": [ 13 | "sys.version_info(major=3, minor=4, micro=2, releaselevel='final', serial=0)" 14 | ] 15 | }, 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "output_type": "execute_result" 19 | } 20 | ], 21 | "source": [ 22 | "import sys\n", 23 | "sys.version_info" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": { 30 | "collapsed": false 31 | }, 32 | "outputs": [ 33 | { 34 | "data": { 35 | "text/plain": [ 36 | "'1.0.6'" 37 | ] 38 | }, 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "output_type": "execute_result" 42 | } 43 | ], 44 | "source": [ 45 | "import petl as etl\n", 46 | "etl.__version__" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "tbl1 = [['foo', 'bar'],\n", 58 | " ['a b c', 1],\n", 59 | " ['d e f', 2],\n", 60 | " [None, 3]]" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/html": [ 73 | "\n", 74 | "\n", 75 | "\n", 76 | "\n", 77 | "\n", 78 | "\n", 79 | "\n", 80 | "\n", 81 | "\n", 82 | "\n", 83 | "\n", 84 | "\n", 85 | "\n", 86 | "\n", 87 | "\n", 88 | "\n", 89 | "\n", 90 | "\n", 91 | "\n", 92 | "\n", 93 | "\n", 94 | "\n", 95 | "\n", 96 | "\n", 97 | "\n", 98 | "\n", 99 | "\n", 100 | "\n", 101 | "\n", 102 | "
barxyz
1abc
2def
3NoneNoneNone
\n" 103 | ], 104 | "text/plain": [ 105 | "+-----+------+------+------+\n", 106 | "| bar | x | y | z |\n", 107 | "+=====+======+======+======+\n", 108 | "| 1 | 'a' | 'b' | 'c' |\n", 109 | "+-----+------+------+------+\n", 110 | "| 2 | 'd' | 'e' | 'f' |\n", 111 | "+-----+------+------+------+\n", 112 | "| 3 | None | None | None |\n", 113 | "+-----+------+------+------+" 114 | ] 115 | }, 116 | "execution_count": 4, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "tbl2 = etl.wrap(tbl1).replace('foo', None, ' ').split('foo', ' ', ['x', 'y', 'z']).replaceall('', None)\n", 123 | "tbl2" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "collapsed": true 131 | }, 132 | "outputs": [], 133 | "source": [] 134 | } 135 | ], 136 | "metadata": { 137 | "kernelspec": { 138 | "display_name": "Python 3", 139 | "language": "python", 140 | "name": "python3" 141 | }, 142 | "language_info": { 143 | "codemirror_mode": { 144 | "name": "ipython", 145 | "version": 3 146 | }, 147 | "file_extension": ".py", 148 | "mimetype": "text/x-python", 149 | "name": "python", 150 | "nbconvert_exporter": "python", 151 | "pygments_lexer": "ipython3", 152 | "version": "3.4.2" 153 | } 154 | }, 155 | "nbformat": 4, 156 | "nbformat_minor": 0 157 | } 158 | -------------------------------------------------------------------------------- /examples/notes/issue_219.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3.0 3 | 4 | # 5 | 6 | # Using server-side cursors with PostgreSQL and MySQL 7 | 8 | # 9 | 10 | # see http://pynash.org/2013/03/06/timing-and-profiling.html for setup of profiling magics 11 | 12 | # 13 | 14 | import MySQLdb 15 | import psycopg2 16 | 17 | import petl 18 | from petl.fluent import etl 19 | 20 | # 21 | print(petl.VERSION) 22 | tbl_dummy_data = etl().dummytable(100000) 23 | tbl_dummy_data.look() 24 | 25 | # 26 | 27 | print(tbl_dummy_data.nrows()) 28 | 29 | # 30 | 31 | # PostgreSQL 32 | 33 | # 34 | 35 | psql_connection = psycopg2.connect(host='localhost', dbname='petl', user='petl', password='petl') 36 | 37 | # 38 | 39 | cursor = psql_connection.cursor() 40 | cursor.execute('DROP TABLE IF EXISTS issue_219;') 41 | cursor.execute('CREATE TABLE issue_219 (foo INTEGER, bar TEXT, baz FLOAT);') 42 | 43 | # 44 | 45 | tbl_dummy_data.progress(10000).todb(psql_connection, 'issue_219') 46 | 47 | # 48 | 49 | # memory usage using default cursor 50 | print(etl.fromdb(psql_connection, 'select * from issue_219 order by foo').look(2)) 51 | 52 | # 53 | 54 | # memory usage using server-side cursor 55 | print(etl.fromdb(lambda: psql_connection.cursor(name='server-side'), 'select * from issue_219 order by foo').look(2)) 56 | 57 | # 58 | 59 | # MySQL 60 | 61 | # 62 | 63 | mysql_connection = MySQLdb.connect(host='127.0.0.1', db='petl', user='petl', passwd='petl') 64 | 65 | # 66 | 67 | cursor = mysql_connection.cursor() 68 | cursor.execute('SET SQL_MODE=ANSI_QUOTES') 69 | cursor.execute('DROP TABLE IF EXISTS issue_219;') 70 | cursor.execute('CREATE TABLE issue_219 (foo INTEGER, bar TEXT, baz FLOAT);') 71 | 72 | # 73 | 74 | tbl_dummy_data.progress(10000).todb(mysql_connection, 'issue_219') 75 | 76 | # 77 | 78 | # memory usage with default cursor 79 | print(etl.fromdb(mysql_connection, 'select * from issue_219 order by foo').look(2)) 80 | 81 | # 82 | 83 | # memory usage with server-side cursor 84 | print(etl.fromdb(lambda: mysql_connection.cursor(MySQLdb.cursors.SSCursor), 'select * from issue_219 order by foo').look(2)) 85 | 86 | -------------------------------------------------------------------------------- /examples/notes/issue_256.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3.0 3 | 4 | # 5 | 6 | # Notes supporting [issue #256](https://github.com/alimanfoo/petl/issues/256). 7 | 8 | # 9 | 10 | import petl.interactive as etl 11 | 12 | # 13 | 14 | t1 = etl.wrap([['foo', 'bar'], [1, 'a'], [2, 'b']]) 15 | t1 16 | 17 | # 18 | 19 | t2 = etl.wrap([['foo', 'bar'], [1, 'a'], [2, 'c']]) 20 | t2 21 | 22 | # 23 | 24 | t3 = etl.merge(t1, t2, key='foo') 25 | t3 26 | 27 | # 28 | 29 | # The problem with the above is that you cannot tell from inspecting *t3* alone which conflicting value comes from which source. 30 | # 31 | # A workaround as suggested by [@pawl](https://github.com/pawl) is to use the [*conflicts()*](http://petl.readthedocs.org/en/latest/#petl.conflicts) function, e.g.: 32 | 33 | # 34 | 35 | t4 = (etl 36 | .cat( 37 | t1.addfield('source', 1), 38 | t2.addfield('source', 2) 39 | ) 40 | .conflicts(key='foo', exclude='source') 41 | ) 42 | t4 43 | 44 | -------------------------------------------------------------------------------- /examples/transform/conversions.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # convert() 5 | ########### 6 | 7 | import petl as etl 8 | table1 = [['foo', 'bar', 'baz'], 9 | ['A', '2.4', 12], 10 | ['B', '5.7', 34], 11 | ['C', '1.2', 56]] 12 | # using a built-in function: 13 | table2 = etl.convert(table1, 'bar', float) 14 | table2 15 | # using a lambda function:: 16 | table3 = etl.convert(table1, 'baz', lambda v: v*2) 17 | table3 18 | # a method of the data value can also be invoked by passing 19 | # the method name 20 | table4 = etl.convert(table1, 'foo', 'lower') 21 | table4 22 | # arguments to the method invocation can also be given 23 | table5 = etl.convert(table1, 'foo', 'replace', 'A', 'AA') 24 | table5 25 | # values can also be translated via a dictionary 26 | table7 = etl.convert(table1, 'foo', {'A': 'Z', 'B': 'Y'}) 27 | table7 28 | # the same conversion can be applied to multiple fields 29 | table8 = etl.convert(table1, ('foo', 'bar', 'baz'), str) 30 | table8 31 | # multiple conversions can be specified at the same time 32 | table9 = etl.convert(table1, {'foo': 'lower', 33 | 'bar': float, 34 | 'baz': lambda v: v * 2}) 35 | table9 36 | # ...or alternatively via a list 37 | table10 = etl.convert(table1, ['lower', float, lambda v: v*2]) 38 | table10 39 | # conversion can be conditional 40 | table11 = etl.convert(table1, 'baz', lambda v: v * 2, 41 | where=lambda r: r.foo == 'B') 42 | table11 43 | # conversion can access other values from the same row 44 | table12 = etl.convert(table1, 'baz', 45 | lambda v, row: v * float(row.bar), 46 | pass_row=True) 47 | table12 48 | 49 | 50 | # convertnumbers() 51 | ################## 52 | 53 | import petl as etl 54 | table1 = [['foo', 'bar', 'baz', 'quux'], 55 | ['1', '3.0', '9+3j', 'aaa'], 56 | ['2', '1.3', '7+2j', None]] 57 | table2 = etl.convertnumbers(table1) 58 | table2 59 | 60 | 61 | -------------------------------------------------------------------------------- /examples/transform/dedup.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # duplicates() 5 | ############## 6 | 7 | import petl as etl 8 | table1 = [['foo', 'bar', 'baz'], 9 | ['A', 1, 2.0], 10 | ['B', 2, 3.4], 11 | ['D', 6, 9.3], 12 | ['B', 3, 7.8], 13 | ['B', 2, 12.3], 14 | ['E', None, 1.3], 15 | ['D', 4, 14.5]] 16 | table2 = etl.duplicates(table1, 'foo') 17 | table2 18 | # compound keys are supported 19 | table3 = etl.duplicates(table1, key=['foo', 'bar']) 20 | table3 21 | 22 | 23 | # unique() 24 | ########## 25 | 26 | import petl as etl 27 | table1 = [['foo', 'bar', 'baz'], 28 | ['A', 1, 2], 29 | ['B', '2', '3.4'], 30 | ['D', 'xyz', 9.0], 31 | ['B', u'3', u'7.8'], 32 | ['B', '2', 42], 33 | ['E', None, None], 34 | ['D', 4, 12.3], 35 | ['F', 7, 2.3]] 36 | table2 = etl.unique(table1, 'foo') 37 | table2 38 | 39 | 40 | # conflicts() 41 | ############# 42 | 43 | import petl as etl 44 | table1 = [['foo', 'bar', 'baz'], 45 | ['A', 1, 2.7], 46 | ['B', 2, None], 47 | ['D', 3, 9.4], 48 | ['B', None, 7.8], 49 | ['E', None], 50 | ['D', 3, 12.3], 51 | ['A', 2, None]] 52 | table2 = etl.conflicts(table1, 'foo') 53 | table2 54 | 55 | 56 | # isunique() 57 | ############ 58 | 59 | import petl as etl 60 | table1 = [['foo', 'bar'], 61 | ['a', 1], 62 | ['b'], 63 | ['b', 2], 64 | ['c', 3, True]] 65 | etl.isunique(table1, 'foo') 66 | etl.isunique(table1, 'bar') 67 | 68 | 69 | -------------------------------------------------------------------------------- /examples/transform/fills.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # filldown() 5 | ############ 6 | 7 | import petl as etl 8 | table1 = [['foo', 'bar', 'baz'], 9 | [1, 'a', None], 10 | [1, None, .23], 11 | [1, 'b', None], 12 | [2, None, None], 13 | [2, None, .56], 14 | [2, 'c', None], 15 | [None, 'c', .72]] 16 | table2 = etl.filldown(table1) 17 | table2.lookall() 18 | table3 = etl.filldown(table1, 'bar') 19 | table3.lookall() 20 | table4 = etl.filldown(table1, 'bar', 'baz') 21 | table4.lookall() 22 | 23 | 24 | # fillright() 25 | ############# 26 | 27 | import petl as etl 28 | table1 = [['foo', 'bar', 'baz'], 29 | [1, 'a', None], 30 | [1, None, .23], 31 | [1, 'b', None], 32 | [2, None, None], 33 | [2, None, .56], 34 | [2, 'c', None], 35 | [None, 'c', .72]] 36 | table2 = etl.fillright(table1) 37 | table2.lookall() 38 | 39 | 40 | # fillleft() 41 | ############ 42 | 43 | import petl as etl 44 | table1 = [['foo', 'bar', 'baz'], 45 | [1, 'a', None], 46 | [1, None, .23], 47 | [1, 'b', None], 48 | [2, None, None], 49 | [2, None, .56], 50 | [2, 'c', None], 51 | [None, 'c', .72]] 52 | table2 = etl.fillleft(table1) 53 | table2.lookall() 54 | -------------------------------------------------------------------------------- /examples/transform/headers.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # rename() 5 | ########## 6 | 7 | import petl as etl 8 | table1 = [['sex', 'age'], 9 | ['m', 12], 10 | ['f', 34], 11 | ['-', 56]] 12 | # rename a single field 13 | table2 = etl.rename(table1, 'sex', 'gender') 14 | table2 15 | # rename multiple fields by passing a dictionary as the second argument 16 | table3 = etl.rename(table1, {'sex': 'gender', 'age': 'age_years'}) 17 | table3 18 | 19 | 20 | # setheader() 21 | ############# 22 | 23 | import petl as etl 24 | table1 = [['foo', 'bar'], 25 | ['a', 1], 26 | ['b', 2]] 27 | table2 = etl.setheader(table1, ['foofoo', 'barbar']) 28 | table2 29 | 30 | 31 | # extendheader() 32 | ################ 33 | 34 | import petl as etl 35 | table1 = [['foo'], 36 | ['a', 1, True], 37 | ['b', 2, False]] 38 | table2 = etl.extendheader(table1, ['bar', 'baz']) 39 | table2 40 | 41 | 42 | # pushheader() 43 | ############## 44 | 45 | import petl as etl 46 | table1 = [['a', 1], 47 | ['b', 2]] 48 | table2 = etl.pushheader(table1, ['foo', 'bar']) 49 | table2 50 | 51 | 52 | # skip() 53 | ######### 54 | 55 | import petl as etl 56 | table1 = [['#aaa', 'bbb', 'ccc'], 57 | ['#mmm'], 58 | ['foo', 'bar'], 59 | ['a', 1], 60 | ['b', 2]] 61 | table2 = etl.skip(table1, 2) 62 | table2 63 | 64 | 65 | -------------------------------------------------------------------------------- /examples/transform/intervals.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # intervallookup() 5 | ################## 6 | 7 | import petl as etl 8 | table = [['start', 'stop', 'value'], 9 | [1, 4, 'foo'], 10 | [3, 7, 'bar'], 11 | [4, 9, 'baz']] 12 | lkp = etl.intervallookup(table, 'start', 'stop') 13 | lkp.search(0, 1) 14 | lkp.search(1, 2) 15 | lkp.search(2, 4) 16 | lkp.search(2, 5) 17 | lkp.search(9, 14) 18 | lkp.search(19, 140) 19 | lkp.search(0) 20 | lkp.search(1) 21 | lkp.search(2) 22 | lkp.search(4) 23 | lkp.search(5) 24 | 25 | import petl as etl 26 | table = [['start', 'stop', 'value'], 27 | [1, 4, 'foo'], 28 | [3, 7, 'bar'], 29 | [4, 9, 'baz']] 30 | lkp = etl.intervallookup(table, 'start', 'stop', include_stop=True, 31 | value='value') 32 | lkp.search(0, 1) 33 | lkp.search(1, 2) 34 | lkp.search(2, 4) 35 | lkp.search(2, 5) 36 | lkp.search(9, 14) 37 | lkp.search(19, 140) 38 | lkp.search(0) 39 | lkp.search(1) 40 | lkp.search(2) 41 | lkp.search(4) 42 | lkp.search(5) 43 | 44 | 45 | # intervallookupone() 46 | ##################### 47 | 48 | import petl as etl 49 | table = [['start', 'stop', 'value'], 50 | [1, 4, 'foo'], 51 | [3, 7, 'bar'], 52 | [4, 9, 'baz']] 53 | lkp = etl.intervallookupone(table, 'start', 'stop', strict=False) 54 | lkp.search(0, 1) 55 | lkp.search(1, 2) 56 | lkp.search(2, 4) 57 | lkp.search(2, 5) 58 | lkp.search(9, 14) 59 | lkp.search(19, 140) 60 | lkp.search(0) 61 | lkp.search(1) 62 | lkp.search(2) 63 | lkp.search(4) 64 | lkp.search(5) 65 | 66 | 67 | # facetintervallookup() 68 | ####################### 69 | 70 | import petl as etl 71 | table = (('type', 'start', 'stop', 'value'), 72 | ('apple', 1, 4, 'foo'), 73 | ('apple', 3, 7, 'bar'), 74 | ('orange', 4, 9, 'baz')) 75 | lkp = etl.facetintervallookup(table, key='type', start='start', stop='stop') 76 | lkp['apple'].search(1, 2) 77 | lkp['apple'].search(2, 4) 78 | lkp['apple'].search(2, 5) 79 | lkp['orange'].search(2, 5) 80 | lkp['orange'].search(9, 14) 81 | lkp['orange'].search(19, 140) 82 | lkp['apple'].search(1) 83 | lkp['apple'].search(2) 84 | lkp['apple'].search(4) 85 | lkp['apple'].search(5) 86 | lkp['orange'].search(5) 87 | 88 | 89 | # intervaljoin() 90 | ################ 91 | 92 | import petl as etl 93 | left = [['begin', 'end', 'quux'], 94 | [1, 2, 'a'], 95 | [2, 4, 'b'], 96 | [2, 5, 'c'], 97 | [9, 14, 'd'], 98 | [1, 1, 'e'], 99 | [10, 10, 'f']] 100 | right = [['start', 'stop', 'value'], 101 | [1, 4, 'foo'], 102 | [3, 7, 'bar'], 103 | [4, 9, 'baz']] 104 | table1 = etl.intervaljoin(left, right, 105 | lstart='begin', lstop='end', 106 | rstart='start', rstop='stop') 107 | table1.lookall() 108 | # include stop coordinate in intervals 109 | table2 = etl.intervaljoin(left, right, 110 | lstart='begin', lstop='end', 111 | rstart='start', rstop='stop', 112 | include_stop=True) 113 | table2.lookall() 114 | 115 | # with facet key 116 | import petl as etl 117 | left = (('fruit', 'begin', 'end'), 118 | ('apple', 1, 2), 119 | ('apple', 2, 4), 120 | ('apple', 2, 5), 121 | ('orange', 2, 5), 122 | ('orange', 9, 14), 123 | ('orange', 19, 140), 124 | ('apple', 1, 1)) 125 | right = (('type', 'start', 'stop', 'value'), 126 | ('apple', 1, 4, 'foo'), 127 | ('apple', 3, 7, 'bar'), 128 | ('orange', 4, 9, 'baz')) 129 | table3 = etl.intervaljoin(left, right, 130 | lstart='begin', lstop='end', lkey='fruit', 131 | rstart='start', rstop='stop', rkey='type') 132 | table3.lookall() 133 | 134 | # intervalleftjoin() 135 | #################### 136 | 137 | import petl as etl 138 | left = [['begin', 'end', 'quux'], 139 | [1, 2, 'a'], 140 | [2, 4, 'b'], 141 | [2, 5, 'c'], 142 | [9, 14, 'd'], 143 | [1, 1, 'e'], 144 | [10, 10, 'f']] 145 | right = [['start', 'stop', 'value'], 146 | [1, 4, 'foo'], 147 | [3, 7, 'bar'], 148 | [4, 9, 'baz']] 149 | table1 = etl.intervalleftjoin(left, right, 150 | lstart='begin', lstop='end', 151 | rstart='start', rstop='stop') 152 | table1.lookall() 153 | -------------------------------------------------------------------------------- /examples/transform/joins.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | # join() 5 | ######## 6 | 7 | import petl as etl 8 | table1 = [['id', 'colour'], 9 | [1, 'blue'], 10 | [2, 'red'], 11 | [3, 'purple']] 12 | table2 = [['id', 'shape'], 13 | [1, 'circle'], 14 | [3, 'square'], 15 | [4, 'ellipse']] 16 | table3 = etl.join(table1, table2, key='id') 17 | table3 18 | # if no key is given, a natural join is tried 19 | table4 = etl.join(table1, table2) 20 | table4 21 | # note behaviour if the key is not unique in either or both tables 22 | table5 = [['id', 'colour'], 23 | [1, 'blue'], 24 | [1, 'red'], 25 | [2, 'purple']] 26 | table6 = [['id', 'shape'], 27 | [1, 'circle'], 28 | [1, 'square'], 29 | [2, 'ellipse']] 30 | table7 = etl.join(table5, table6, key='id') 31 | table7 32 | # compound keys are supported 33 | table8 = [['id', 'time', 'height'], 34 | [1, 1, 12.3], 35 | [1, 2, 34.5], 36 | [2, 1, 56.7]] 37 | table9 = [['id', 'time', 'weight'], 38 | [1, 2, 4.5], 39 | [2, 1, 6.7], 40 | [2, 2, 8.9]] 41 | table10 = etl.join(table8, table9, key=['id', 'time']) 42 | table10 43 | 44 | 45 | # leftjoin() 46 | ############ 47 | 48 | import petl as etl 49 | table1 = [['id', 'colour'], 50 | [1, 'blue'], 51 | [2, 'red'], 52 | [3, 'purple']] 53 | table2 = [['id', 'shape'], 54 | [1, 'circle'], 55 | [3, 'square'], 56 | [4, 'ellipse']] 57 | table3 = etl.leftjoin(table1, table2, key='id') 58 | table3 59 | 60 | 61 | # rightjoin() 62 | ############# 63 | 64 | import petl as etl 65 | table1 = [['id', 'colour'], 66 | [1, 'blue'], 67 | [2, 'red'], 68 | [3, 'purple']] 69 | table2 = [['id', 'shape'], 70 | [1, 'circle'], 71 | [3, 'square'], 72 | [4, 'ellipse']] 73 | table3 = etl.rightjoin(table1, table2, key='id') 74 | table3 75 | 76 | 77 | # outerjoin() 78 | ############# 79 | 80 | import petl as etl 81 | table1 = [['id', 'colour'], 82 | [1, 'blue'], 83 | [2, 'red'], 84 | [3, 'purple']] 85 | table2 = [['id', 'shape'], 86 | [1, 'circle'], 87 | [3, 'square'], 88 | [4, 'ellipse']] 89 | table3 = etl.outerjoin(table1, table2, key='id') 90 | table3 91 | 92 | 93 | # crossjoin() 94 | ############# 95 | 96 | import petl as etl 97 | table1 = [['id', 'colour'], 98 | [1, 'blue'], 99 | [2, 'red']] 100 | table2 = [['id', 'shape'], 101 | [1, 'circle'], 102 | [3, 'square']] 103 | table3 = etl.crossjoin(table1, table2) 104 | table3 105 | 106 | 107 | # antijoin() 108 | ############ 109 | 110 | import petl as etl 111 | table1 = [['id', 'colour'], 112 | [0, 'black'], 113 | [1, 'blue'], 114 | [2, 'red'], 115 | [4, 'yellow'], 116 | [5, 'white']] 117 | table2 = [['id', 'shape'], 118 | [1, 'circle'], 119 | [3, 'square']] 120 | table3 = etl.antijoin(table1, table2, key='id') 121 | table3 122 | 123 | 124 | # lookupjoin() 125 | ############## 126 | 127 | import petl as etl 128 | table1 = [['id', 'color', 'cost'], 129 | [1, 'blue', 12], 130 | [2, 'red', 8], 131 | [3, 'purple', 4]] 132 | table2 = [['id', 'shape', 'size'], 133 | [1, 'circle', 'big'], 134 | [1, 'circle', 'small'], 135 | [2, 'square', 'tiny'], 136 | [2, 'square', 'big'], 137 | [3, 'ellipse', 'small'], 138 | [3, 'ellipse', 'tiny']] 139 | table3 = etl.lookupjoin(table1, table2, key='id') 140 | table3 141 | 142 | 143 | # unjoin() 144 | ########## 145 | 146 | import petl as etl 147 | # join key is present in the table 148 | table1 = (('foo', 'bar', 'baz'), 149 | ('A', 1, 'apple'), 150 | ('B', 1, 'apple'), 151 | ('C', 2, 'orange')) 152 | table2, table3 = etl.unjoin(table1, 'baz', key='bar') 153 | table2 154 | table3 155 | # an integer join key can also be reconstructed 156 | table4 = (('foo', 'bar'), 157 | ('A', 'apple'), 158 | ('B', 'apple'), 159 | ('C', 'orange')) 160 | table5, table6 = etl.unjoin(table4, 'bar') 161 | table5 162 | table6 163 | 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /examples/transform/maps.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | # fieldmap() 5 | ############ 6 | 7 | import petl as etl 8 | from collections import OrderedDict 9 | table1 = [['id', 'sex', 'age', 'height', 'weight'], 10 | [1, 'male', 16, 1.45, 62.0], 11 | [2, 'female', 19, 1.34, 55.4], 12 | [3, 'female', 17, 1.78, 74.4], 13 | [4, 'male', 21, 1.33, 45.2], 14 | [5, '-', 25, 1.65, 51.9]] 15 | mappings = OrderedDict() 16 | # rename a field 17 | mappings['subject_id'] = 'id' 18 | # translate a field 19 | mappings['gender'] = 'sex', {'male': 'M', 'female': 'F'} 20 | # apply a calculation to a field 21 | mappings['age_months'] = 'age', lambda v: v * 12 22 | # apply a calculation to a combination of fields 23 | mappings['bmi'] = lambda rec: rec['weight'] / rec['height']**2 24 | # transform and inspect the output 25 | table2 = etl.fieldmap(table1, mappings) 26 | table2 27 | 28 | 29 | # rowmap() 30 | ########## 31 | 32 | 33 | import petl as etl 34 | table1 = [['id', 'sex', 'age', 'height', 'weight'], 35 | [1, 'male', 16, 1.45, 62.0], 36 | [2, 'female', 19, 1.34, 55.4], 37 | [3, 'female', 17, 1.78, 74.4], 38 | [4, 'male', 21, 1.33, 45.2], 39 | [5, '-', 25, 1.65, 51.9]] 40 | def rowmapper(row): 41 | transmf = {'male': 'M', 'female': 'F'} 42 | return [row[0], 43 | transmf[row['sex']] if row['sex'] in transmf else None, 44 | row.age * 12, 45 | row.height / row.weight ** 2] 46 | 47 | table2 = etl.rowmap(table1, rowmapper, 48 | fields=['subject_id', 'gender', 'age_months', 'bmi']) 49 | table2 50 | 51 | 52 | # rowmapmany() 53 | ############## 54 | 55 | import petl as etl 56 | table1 = [['id', 'sex', 'age', 'height', 'weight'], 57 | [1, 'male', 16, 1.45, 62.0], 58 | [2, 'female', 19, 1.34, 55.4], 59 | [3, '-', 17, 1.78, 74.4], 60 | [4, 'male', 21, 1.33]] 61 | def rowgenerator(row): 62 | transmf = {'male': 'M', 'female': 'F'} 63 | yield [row[0], 'gender', 64 | transmf[row['sex']] if row['sex'] in transmf else None] 65 | yield [row[0], 'age_months', row.age * 12] 66 | yield [row[0], 'bmi', row.height / row.weight ** 2] 67 | 68 | table2 = etl.rowmapmany(table1, rowgenerator, 69 | fields=['subject_id', 'variable', 'value']) 70 | table2.lookall() 71 | 72 | 73 | -------------------------------------------------------------------------------- /examples/transform/reductions.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | # rowreduce() 5 | ############# 6 | 7 | import petl as etl 8 | table1 = [['foo', 'bar'], 9 | ['a', 3], 10 | ['a', 7], 11 | ['b', 2], 12 | ['b', 1], 13 | ['b', 9], 14 | ['c', 4]] 15 | def sumbar(key, rows): 16 | return [key, sum(row[1] for row in rows)] 17 | 18 | table2 = etl.rowreduce(table1, key='foo', reducer=sumbar, 19 | fields=['foo', 'barsum']) 20 | table2 21 | 22 | 23 | # aggregate() 24 | ############# 25 | 26 | import petl as etl 27 | 28 | table1 = [['foo', 'bar', 'baz'], 29 | ['a', 3, True], 30 | ['a', 7, False], 31 | ['b', 2, True], 32 | ['b', 2, False], 33 | ['b', 9, False], 34 | ['c', 4, True]] 35 | # aggregate whole rows 36 | table2 = etl.aggregate(table1, 'foo', len) 37 | table2 38 | # aggregate single field 39 | table3 = etl.aggregate(table1, 'foo', sum, 'bar') 40 | table3 41 | # alternative signature using keyword args 42 | table4 = etl.aggregate(table1, key=('foo', 'bar'), 43 | aggregation=list, value=('bar', 'baz')) 44 | table4 45 | # aggregate multiple fields 46 | from collections import OrderedDict 47 | import petl as etl 48 | 49 | aggregation = OrderedDict() 50 | aggregation['count'] = len 51 | aggregation['minbar'] = 'bar', min 52 | aggregation['maxbar'] = 'bar', max 53 | aggregation['sumbar'] = 'bar', sum 54 | # default aggregation function is list 55 | aggregation['listbar'] = 'bar' 56 | aggregation['listbarbaz'] = ('bar', 'baz'), list 57 | aggregation['bars'] = 'bar', etl.strjoin(', ') 58 | table5 = etl.aggregate(table1, 'foo', aggregation) 59 | table5 60 | 61 | 62 | # mergeduplicates() 63 | ################### 64 | 65 | import petl as etl 66 | table1 = [['foo', 'bar', 'baz'], 67 | ['A', 1, 2.7], 68 | ['B', 2, None], 69 | ['D', 3, 9.4], 70 | ['B', None, 7.8], 71 | ['E', None, 42.], 72 | ['D', 3, 12.3], 73 | ['A', 2, None]] 74 | table2 = etl.mergeduplicates(table1, 'foo') 75 | table2 76 | 77 | 78 | # merge() 79 | ######### 80 | 81 | import petl as etl 82 | table1 = [['foo', 'bar', 'baz'], 83 | [1, 'A', True], 84 | [2, 'B', None], 85 | [4, 'C', True]] 86 | table2 = [['bar', 'baz', 'quux'], 87 | ['A', True, 42.0], 88 | ['B', False, 79.3], 89 | ['C', False, 12.4]] 90 | table3 = etl.merge(table1, table2, key='bar') 91 | table3 92 | 93 | 94 | # fold() 95 | ######## 96 | 97 | import petl as etl 98 | table1 = [['id', 'count'], 99 | [1, 3], 100 | [1, 5], 101 | [2, 4], 102 | [2, 8]] 103 | import operator 104 | table2 = etl.fold(table1, 'id', operator.add, 'count', 105 | presorted=True) 106 | table2 107 | -------------------------------------------------------------------------------- /examples/transform/regex.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | # capture() 5 | ############ 6 | 7 | import petl as etl 8 | table1 = [['id', 'variable', 'value'], 9 | ['1', 'A1', '12'], 10 | ['2', 'A2', '15'], 11 | ['3', 'B1', '18'], 12 | ['4', 'C12', '19']] 13 | table2 = etl.capture(table1, 'variable', '(\\w)(\\d+)', 14 | ['treat', 'time']) 15 | table2 16 | # using the include_original argument 17 | table3 = etl.capture(table1, 'variable', '(\\w)(\\d+)', 18 | ['treat', 'time'], 19 | include_original=True) 20 | table3 21 | 22 | 23 | # split() 24 | ######### 25 | 26 | import petl as etl 27 | table1 = [['id', 'variable', 'value'], 28 | ['1', 'parad1', '12'], 29 | ['2', 'parad2', '15'], 30 | ['3', 'tempd1', '18'], 31 | ['4', 'tempd2', '19']] 32 | table2 = etl.split(table1, 'variable', 'd', ['variable', 'day']) 33 | table2 34 | 35 | 36 | # search() 37 | ########## 38 | 39 | import petl as etl 40 | table1 = [['foo', 'bar', 'baz'], 41 | ['orange', 12, 'oranges are nice fruit'], 42 | ['mango', 42, 'I like them'], 43 | ['banana', 74, 'lovely too'], 44 | ['cucumber', 41, 'better than mango']] 45 | # search any field 46 | table2 = etl.search(table1, '.g.') 47 | table2 48 | # search a specific field 49 | table3 = etl.search(table1, 'foo', '.g.') 50 | table3 51 | 52 | 53 | # searchcomplement() 54 | #################### 55 | 56 | import petl as etl 57 | table1 = [['foo', 'bar', 'baz'], 58 | ['orange', 12, 'oranges are nice fruit'], 59 | ['mango', 42, 'I like them'], 60 | ['banana', 74, 'lovely too'], 61 | ['cucumber', 41, 'better than mango']] 62 | # search any field 63 | table2 = etl.searchcomplement(table1, '.g.') 64 | table2 65 | # search a specific field 66 | table3 = etl.searchcomplement(table1, 'foo', '.g.') 67 | table3 68 | 69 | -------------------------------------------------------------------------------- /examples/transform/reshape.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | # melt() 5 | ######## 6 | 7 | import petl as etl 8 | table1 = [['id', 'gender', 'age'], 9 | [1, 'F', 12], 10 | [2, 'M', 17], 11 | [3, 'M', 16]] 12 | table2 = etl.melt(table1, 'id') 13 | table2.lookall() 14 | # compound keys are supported 15 | table3 = [['id', 'time', 'height', 'weight'], 16 | [1, 11, 66.4, 12.2], 17 | [2, 16, 53.2, 17.3], 18 | [3, 12, 34.5, 9.4]] 19 | table4 = etl.melt(table3, key=['id', 'time']) 20 | table4.lookall() 21 | # a subset of variable fields can be selected 22 | table5 = etl.melt(table3, key=['id', 'time'], 23 | variables=['height']) 24 | table5.lookall() 25 | 26 | 27 | # recast() 28 | ########## 29 | 30 | import petl as etl 31 | table1 = [['id', 'variable', 'value'], 32 | [3, 'age', 16], 33 | [1, 'gender', 'F'], 34 | [2, 'gender', 'M'], 35 | [2, 'age', 17], 36 | [1, 'age', 12], 37 | [3, 'gender', 'M']] 38 | table2 = etl.recast(table1) 39 | table2 40 | # specifying variable and value fields 41 | table3 = [['id', 'vars', 'vals'], 42 | [3, 'age', 16], 43 | [1, 'gender', 'F'], 44 | [2, 'gender', 'M'], 45 | [2, 'age', 17], 46 | [1, 'age', 12], 47 | [3, 'gender', 'M']] 48 | table4 = etl.recast(table3, variablefield='vars', valuefield='vals') 49 | table4 50 | # if there are multiple values for each key/variable pair, and no 51 | # reducers function is provided, then all values will be listed 52 | table6 = [['id', 'time', 'variable', 'value'], 53 | [1, 11, 'weight', 66.4], 54 | [1, 14, 'weight', 55.2], 55 | [2, 12, 'weight', 53.2], 56 | [2, 16, 'weight', 43.3], 57 | [3, 12, 'weight', 34.5], 58 | [3, 17, 'weight', 49.4]] 59 | table7 = etl.recast(table6, key='id') 60 | table7 61 | # multiple values can be reduced via an aggregation function 62 | def mean(values): 63 | return float(sum(values)) / len(values) 64 | 65 | table8 = etl.recast(table6, key='id', reducers={'weight': mean}) 66 | table8 67 | # missing values are padded with whatever is provided via the 68 | # missing keyword argument (None by default) 69 | table9 = [['id', 'variable', 'value'], 70 | [1, 'gender', 'F'], 71 | [2, 'age', 17], 72 | [1, 'age', 12], 73 | [3, 'gender', 'M']] 74 | table10 = etl.recast(table9, key='id') 75 | table10 76 | 77 | 78 | # transpose() 79 | ############# 80 | 81 | import petl as etl 82 | table1 = [['id', 'colour'], 83 | [1, 'blue'], 84 | [2, 'red'], 85 | [3, 'purple'], 86 | [5, 'yellow'], 87 | [7, 'orange']] 88 | table2 = etl.transpose(table1) 89 | table2 90 | 91 | 92 | # pivot() 93 | ######### 94 | 95 | import petl as etl 96 | table1 = [['region', 'gender', 'style', 'units'], 97 | ['east', 'boy', 'tee', 12], 98 | ['east', 'boy', 'golf', 14], 99 | ['east', 'boy', 'fancy', 7], 100 | ['east', 'girl', 'tee', 3], 101 | ['east', 'girl', 'golf', 8], 102 | ['east', 'girl', 'fancy', 18], 103 | ['west', 'boy', 'tee', 12], 104 | ['west', 'boy', 'golf', 15], 105 | ['west', 'boy', 'fancy', 8], 106 | ['west', 'girl', 'tee', 6], 107 | ['west', 'girl', 'golf', 16], 108 | ['west', 'girl', 'fancy', 1]] 109 | table2 = etl.pivot(table1, 'region', 'gender', 'units', sum) 110 | table2 111 | table3 = etl.pivot(table1, 'region', 'style', 'units', sum) 112 | table3 113 | table4 = etl.pivot(table1, 'gender', 'style', 'units', sum) 114 | table4 115 | 116 | 117 | # flatten() 118 | ########### 119 | 120 | import petl as etl 121 | table1 = [['foo', 'bar', 'baz'], 122 | ['A', 1, True], 123 | ['C', 7, False], 124 | ['B', 2, False], 125 | ['C', 9, True]] 126 | list(etl.flatten(table1)) 127 | 128 | 129 | # unflatten() 130 | ############# 131 | 132 | import petl as etl 133 | a = ['A', 1, True, 'C', 7, False, 'B', 2, False, 'C', 9] 134 | table1 = etl.unflatten(a, 3) 135 | table1 136 | # a table and field name can also be provided as arguments 137 | table2 = [['lines'], 138 | ['A'], 139 | [1], 140 | [True], 141 | ['C'], 142 | [7], 143 | [False], 144 | ['B'], 145 | [2], 146 | [False], 147 | ['C'], 148 | [9]] 149 | table3 = etl.unflatten(table2, 'lines', 3) 150 | table3 151 | -------------------------------------------------------------------------------- /examples/transform/selects.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | # select() 5 | ########## 6 | 7 | import petl as etl 8 | table1 = [['foo', 'bar', 'baz'], 9 | ['a', 4, 9.3], 10 | ['a', 2, 88.2], 11 | ['b', 1, 23.3], 12 | ['c', 8, 42.0], 13 | ['d', 7, 100.9], 14 | ['c', 2]] 15 | # the second positional argument can be a function accepting 16 | # a row 17 | table2 = etl.select(table1, 18 | lambda rec: rec.foo == 'a' and rec.baz > 88.1) 19 | table2 20 | # the second positional argument can also be an expression 21 | # string, which will be converted to a function using petl.expr() 22 | table3 = etl.select(table1, "{foo} == 'a' and {baz} > 88.1") 23 | table3 24 | # the condition can also be applied to a single field 25 | table4 = etl.select(table1, 'foo', lambda v: v == 'a') 26 | table4 27 | 28 | 29 | # selectre() 30 | ############ 31 | 32 | import petl as etl 33 | table1 = [['foo', 'bar', 'baz'], 34 | ['aa', 4, 9.3], 35 | ['aaa', 2, 88.2], 36 | ['b', 1, 23.3], 37 | ['ccc', 8, 42.0], 38 | ['bb', 7, 100.9], 39 | ['c', 2]] 40 | table2 = etl.selectre(table1, 'foo', '[ab]{2}') 41 | table2 42 | 43 | 44 | # selectusingcontext() 45 | ###################### 46 | 47 | import petl as etl 48 | table1 = [['foo', 'bar'], 49 | ['A', 1], 50 | ['B', 4], 51 | ['C', 5], 52 | ['D', 9]] 53 | def query(prv, cur, nxt): 54 | return ((prv is not None and (cur.bar - prv.bar) < 2) 55 | or (nxt is not None and (nxt.bar - cur.bar) < 2)) 56 | 57 | table2 = etl.selectusingcontext(table1, query) 58 | table2 59 | 60 | 61 | # facet() 62 | ######### 63 | 64 | import petl as etl 65 | table1 = [['foo', 'bar', 'baz'], 66 | ['a', 4, 9.3], 67 | ['a', 2, 88.2], 68 | ['b', 1, 23.3], 69 | ['c', 8, 42.0], 70 | ['d', 7, 100.9], 71 | ['c', 2]] 72 | foo = etl.facet(table1, 'foo') 73 | sorted(foo.keys()) 74 | foo['a'] 75 | foo['c'] 76 | 77 | 78 | -------------------------------------------------------------------------------- /examples/transform/setops.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | # complement() 5 | ############## 6 | 7 | import petl as etl 8 | a = [['foo', 'bar', 'baz'], 9 | ['A', 1, True], 10 | ['C', 7, False], 11 | ['B', 2, False], 12 | ['C', 9, True]] 13 | b = [['x', 'y', 'z'], 14 | ['B', 2, False], 15 | ['A', 9, False], 16 | ['B', 3, True], 17 | ['C', 9, True]] 18 | aminusb = etl.complement(a, b) 19 | aminusb 20 | bminusa = etl.complement(b, a) 21 | bminusa 22 | 23 | 24 | # recordcomplement() 25 | #################### 26 | 27 | import petl as etl 28 | a = [['foo', 'bar', 'baz'], 29 | ['A', 1, True], 30 | ['C', 7, False], 31 | ['B', 2, False], 32 | ['C', 9, True]] 33 | b = [['bar', 'foo', 'baz'], 34 | [2, 'B', False], 35 | [9, 'A', False], 36 | [3, 'B', True], 37 | [9, 'C', True]] 38 | aminusb = etl.recordcomplement(a, b) 39 | aminusb 40 | bminusa = etl.recordcomplement(b, a) 41 | bminusa 42 | 43 | 44 | # diff() 45 | ######## 46 | 47 | import petl as etl 48 | a = [['foo', 'bar', 'baz'], 49 | ['A', 1, True], 50 | ['C', 7, False], 51 | ['B', 2, False], 52 | ['C', 9, True]] 53 | b = [['x', 'y', 'z'], 54 | ['B', 2, False], 55 | ['A', 9, False], 56 | ['B', 3, True], 57 | ['C', 9, True]] 58 | added, subtracted = etl.diff(a, b) 59 | # rows in b not in a 60 | added 61 | # rows in a not in b 62 | subtracted 63 | 64 | 65 | # recorddiff() 66 | ############## 67 | 68 | import petl as etl 69 | a = [['foo', 'bar', 'baz'], 70 | ['A', 1, True], 71 | ['C', 7, False], 72 | ['B', 2, False], 73 | ['C', 9, True]] 74 | b = [['bar', 'foo', 'baz'], 75 | [2, 'B', False], 76 | [9, 'A', False], 77 | [3, 'B', True], 78 | [9, 'C', True]] 79 | added, subtracted = etl.recorddiff(a, b) 80 | added 81 | subtracted 82 | 83 | 84 | # intersection() 85 | ################ 86 | 87 | import petl as etl 88 | table1 = [['foo', 'bar', 'baz'], 89 | ['A', 1, True], 90 | ['C', 7, False], 91 | ['B', 2, False], 92 | ['C', 9, True]] 93 | table2 = [['x', 'y', 'z'], 94 | ['B', 2, False], 95 | ['A', 9, False], 96 | ['B', 3, True], 97 | ['C', 9, True]] 98 | table3 = etl.intersection(table1, table2) 99 | table3 100 | 101 | 102 | -------------------------------------------------------------------------------- /examples/transform/sorts.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | # sort() 5 | ######## 6 | 7 | import petl as etl 8 | table1 = [['foo', 'bar'], 9 | ['C', 2], 10 | ['A', 9], 11 | ['A', 6], 12 | ['F', 1], 13 | ['D', 10]] 14 | table2 = etl.sort(table1, 'foo') 15 | table2 16 | # sorting by compound key is supported 17 | table3 = etl.sort(table1, key=['foo', 'bar']) 18 | table3 19 | # if no key is specified, the default is a lexical sort 20 | table4 = etl.sort(table1) 21 | table4 22 | 23 | 24 | # mergesort() 25 | ############# 26 | 27 | import petl as etl 28 | table1 = [['foo', 'bar'], 29 | ['A', 9], 30 | ['C', 2], 31 | ['D', 10], 32 | ['A', 6], 33 | ['F', 1]] 34 | table2 = [['foo', 'bar'], 35 | ['B', 3], 36 | ['D', 10], 37 | ['A', 10], 38 | ['F', 4]] 39 | table3 = etl.mergesort(table1, table2, key='foo') 40 | table3.lookall() 41 | 42 | 43 | # issorted() 44 | ############ 45 | 46 | import petl as etl 47 | table1 = [['foo', 'bar', 'baz'], 48 | ['a', 1, True], 49 | ['b', 3, True], 50 | ['b', 2]] 51 | etl.issorted(table1, key='foo') 52 | etl.issorted(table1, key='bar') 53 | etl.issorted(table1, key='foo', strict=True) 54 | etl.issorted(table1, key='foo', reverse=True) 55 | 56 | -------------------------------------------------------------------------------- /examples/transform/unpacks.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | # unpack() 5 | ########## 6 | 7 | import petl as etl 8 | table1 = [['foo', 'bar'], 9 | [1, ['a', 'b']], 10 | [2, ['c', 'd']], 11 | [3, ['e', 'f']]] 12 | table2 = etl.unpack(table1, 'bar', ['baz', 'quux']) 13 | table2 14 | 15 | 16 | # unpackdict() 17 | ############## 18 | 19 | import petl as etl 20 | table1 = [['foo', 'bar'], 21 | [1, {'baz': 'a', 'quux': 'b'}], 22 | [2, {'baz': 'c', 'quux': 'd'}], 23 | [3, {'baz': 'e', 'quux': 'f'}]] 24 | table2 = etl.unpackdict(table1, 'bar') 25 | table2 26 | -------------------------------------------------------------------------------- /examples/transform/validation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | 5 | # validate() 6 | ############ 7 | 8 | import petl as etl 9 | # define some validation constraints 10 | header = ('foo', 'bar', 'baz') 11 | constraints = [ 12 | dict(name='foo_int', field='foo', test=int), 13 | dict(name='bar_date', field='bar', test=etl.dateparser('%Y-%m-%d')), 14 | dict(name='baz_enum', field='baz', assertion=lambda v: v in ['Y', 'N']), 15 | dict(name='not_none', assertion=lambda row: None not in row) 16 | ] 17 | # now validate a table 18 | table = (('foo', 'bar', 'bazzz'), 19 | (1, '2000-01-01', 'Y'), 20 | ('x', '2010-10-10', 'N'), 21 | (2, '2000/01/01', 'Y'), 22 | (3, '2015-12-12', 'x'), 23 | (4, None, 'N'), 24 | ('y', '1999-99-99', 'z'), 25 | (6, '2000-01-01'), 26 | (7, '2001-02-02', 'N', True)) 27 | problems = etl.validate(table, constraints=constraints, header=header) 28 | problems.lookall() 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /examples/util/base.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import, \ 2 | unicode_literals 3 | 4 | 5 | # values() 6 | ########## 7 | 8 | import petl as etl 9 | table1 = [['foo', 'bar'], 10 | ['a', True], 11 | ['b'], 12 | ['b', True], 13 | ['c', False]] 14 | foo = etl.values(table1, 'foo') 15 | foo 16 | list(foo) 17 | bar = etl.values(table1, 'bar') 18 | bar 19 | list(bar) 20 | # values from multiple fields 21 | table2 = [['foo', 'bar', 'baz'], 22 | [1, 'a', True], 23 | [2, 'bb', True], 24 | [3, 'd', False]] 25 | foobaz = etl.values(table2, 'foo', 'baz') 26 | foobaz 27 | list(foobaz) 28 | 29 | 30 | # header() 31 | ########## 32 | 33 | 34 | import petl as etl 35 | table = [['foo', 'bar'], ['a', 1], ['b', 2]] 36 | etl.header(table) 37 | 38 | 39 | # fieldnames() 40 | ############## 41 | 42 | import petl as etl 43 | table = [['foo', 'bar'], ['a', 1], ['b', 2]] 44 | etl.fieldnames(table) 45 | etl.header(table) 46 | 47 | 48 | # data() 49 | ######## 50 | 51 | import petl as etl 52 | table = [['foo', 'bar'], ['a', 1], ['b', 2]] 53 | d = etl.data(table) 54 | list(d) 55 | 56 | 57 | # dicts() 58 | ######### 59 | 60 | import petl as etl 61 | table = [['foo', 'bar'], ['a', 1], ['b', 2]] 62 | d = etl.dicts(table) 63 | d 64 | list(d) 65 | 66 | 67 | # namedtuples() 68 | ############### 69 | 70 | import petl as etl 71 | table = [['foo', 'bar'], ['a', 1], ['b', 2]] 72 | d = etl.namedtuples(table) 73 | d 74 | list(d) 75 | 76 | 77 | # records() 78 | ############### 79 | 80 | import petl as etl 81 | table = [['foo', 'bar'], ['a', 1], ['b', 2]] 82 | d = etl.records(table) 83 | d 84 | list(d) 85 | 86 | 87 | # rowgroupby() 88 | ############## 89 | 90 | import petl as etl 91 | table1 = [['foo', 'bar', 'baz'], 92 | ['a', 1, True], 93 | ['b', 3, True], 94 | ['b', 2]] 95 | # group entire rows 96 | for key, group in etl.rowgroupby(table1, 'foo'): 97 | print(key, list(group)) 98 | 99 | # group specific values 100 | for key, group in etl.rowgroupby(table1, 'foo', 'bar'): 101 | print(key, list(group)) 102 | 103 | 104 | # empty() 105 | ######### 106 | 107 | import petl as etl 108 | table = ( 109 | etl 110 | .empty() 111 | .addcolumn('foo', ['A', 'B']) 112 | .addcolumn('bar', [1, 2]) 113 | ) 114 | table 115 | -------------------------------------------------------------------------------- /examples/util/counting.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # nrows() 5 | ######### 6 | 7 | import petl as etl 8 | table = [['foo', 'bar'], ['a', 1], ['b', 2]] 9 | etl.nrows(table) 10 | 11 | 12 | # valuecount() 13 | ############## 14 | 15 | import petl as etl 16 | table = [['foo', 'bar'], 17 | ['a', 1], 18 | ['b', 2], 19 | ['b', 7]] 20 | etl.valuecount(table, 'foo', 'b') 21 | 22 | 23 | # valuecounter() 24 | ################ 25 | 26 | import petl as etl 27 | table = [['foo', 'bar'], 28 | ['a', True], 29 | ['b'], 30 | ['b', True], 31 | ['c', False]] 32 | etl.valuecounter(table, 'foo').most_common() 33 | 34 | 35 | # valuecounts() 36 | ############### 37 | 38 | import petl as etl 39 | table = [['foo', 'bar', 'baz'], 40 | ['a', True, 0.12], 41 | ['a', True, 0.17], 42 | ['b', False, 0.34], 43 | ['b', False, 0.44], 44 | ['b']] 45 | etl.valuecounts(table, 'foo') 46 | etl.valuecounts(table, 'foo', 'bar') 47 | 48 | 49 | # parsecounter() 50 | ################ 51 | 52 | import petl as etl 53 | table = [['foo', 'bar', 'baz'], 54 | ['A', 'aaa', 2], 55 | ['B', u'2', '3.4'], 56 | [u'B', u'3', u'7.8', True], 57 | ['D', '3.7', 9.0], 58 | ['E', 42]] 59 | counter, errors = etl.parsecounter(table, 'bar') 60 | counter.most_common() 61 | errors.most_common() 62 | 63 | 64 | # parsecounts() 65 | ############### 66 | 67 | import petl as etl 68 | table = [['foo', 'bar', 'baz'], 69 | ['A', 'aaa', 2], 70 | ['B', u'2', '3.4'], 71 | [u'B', u'3', u'7.8', True], 72 | ['D', '3.7', 9.0], 73 | ['E', 42]] 74 | etl.parsecounts(table, 'bar') 75 | 76 | 77 | # typecounter() 78 | ############### 79 | 80 | import petl as etl 81 | table = [['foo', 'bar', 'baz'], 82 | ['A', 1, 2], 83 | ['B', u'2', '3.4'], 84 | [u'B', u'3', u'7.8', True], 85 | ['D', u'xyz', 9.0], 86 | ['E', 42]] 87 | etl.typecounter(table, 'foo').most_common() 88 | etl.typecounter(table, 'bar').most_common() 89 | etl.typecounter(table, 'baz').most_common() 90 | 91 | 92 | # typecounts() 93 | ############## 94 | 95 | import petl as etl 96 | table = [['foo', 'bar', 'baz'], 97 | [b'A', 1, 2], 98 | [b'B', '2', b'3.4'], 99 | ['B', '3', '7.8', True], 100 | ['D', u'xyz', 9.0], 101 | ['E', 42]] 102 | etl.typecounts(table, 'foo') 103 | etl.typecounts(table, 'bar') 104 | etl.typecounts(table, 'baz') 105 | 106 | 107 | # stringpatterns() 108 | ################## 109 | 110 | import petl as etl 111 | table = [['foo', 'bar'], 112 | ['Mr. Foo', '123-1254'], 113 | ['Mrs. Bar', '234-1123'], 114 | ['Mr. Spo', '123-1254'], 115 | [u'Mr. Baz', u'321 1434'], 116 | [u'Mrs. Baz', u'321 1434'], 117 | ['Mr. Quux', '123-1254-XX']] 118 | etl.stringpatterns(table, 'foo') 119 | etl.stringpatterns(table, 'bar') 120 | 121 | 122 | # rowlengths() 123 | ############### 124 | 125 | import petl as etl 126 | table = [['foo', 'bar', 'baz'], 127 | ['A', 1, 2], 128 | ['B', '2', '3.4'], 129 | [u'B', u'3', u'7.8', True], 130 | ['D', 'xyz', 9.0], 131 | ['E', None], 132 | ['F', 9]] 133 | etl.rowlengths(table) 134 | -------------------------------------------------------------------------------- /examples/util/lookups.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | # lookup() 4 | ########## 5 | import petl as etl 6 | 7 | table1 = [['foo', 'bar'], 8 | ['a', 1], 9 | ['b', 2], 10 | ['b', 3]] 11 | lkp = etl.lookup(table1, 'foo', 'bar') 12 | lkp['a'] 13 | lkp['b'] 14 | # if no valuespec argument is given, defaults to the whole 15 | # row (as a tuple) 16 | lkp = etl.lookup(table1, 'foo') 17 | lkp['a'] 18 | lkp['b'] 19 | # compound keys are supported 20 | table2 = [['foo', 'bar', 'baz'], 21 | ['a', 1, True], 22 | ['b', 2, False], 23 | ['b', 3, True], 24 | ['b', 3, False]] 25 | lkp = etl.lookup(table2, ('foo', 'bar'), 'baz') 26 | lkp[('a', 1)] 27 | lkp[('b', 2)] 28 | lkp[('b', 3)] 29 | # data can be loaded into an existing dictionary-like 30 | # object, including persistent dictionaries created via the 31 | # shelve module 32 | import shelve 33 | 34 | lkp = shelve.open('example1.dat', flag='n') 35 | lkp = etl.lookup(table1, 'foo', 'bar', lkp) 36 | lkp.close() 37 | lkp = shelve.open('example1.dat', flag='r') 38 | lkp['a'] 39 | lkp['b'] 40 | 41 | 42 | # lookupone() 43 | ############# 44 | 45 | import petl as etl 46 | 47 | table1 = [['foo', 'bar'], 48 | ['a', 1], 49 | ['b', 2], 50 | ['b', 3]] 51 | # if the specified key is not unique and strict=False (default), 52 | # the first value wins 53 | lkp = etl.lookupone(table1, 'foo', 'bar') 54 | lkp['a'] 55 | lkp['b'] 56 | # if the specified key is not unique and strict=True, will raise 57 | # DuplicateKeyError 58 | try: 59 | lkp = etl.lookupone(table1, 'foo', strict=True) 60 | except etl.errors.DuplicateKeyError as e: 61 | print(e) 62 | 63 | # compound keys are supported 64 | table2 = [['foo', 'bar', 'baz'], 65 | ['a', 1, True], 66 | ['b', 2, False], 67 | ['b', 3, True], 68 | ['b', 3, False]] 69 | lkp = etl.lookupone(table2, ('foo', 'bar'), 'baz') 70 | lkp[('a', 1)] 71 | lkp[('b', 2)] 72 | lkp[('b', 3)] 73 | # data can be loaded into an existing dictionary-like 74 | # object, including persistent dictionaries created via the 75 | # shelve module 76 | import shelve 77 | 78 | lkp = shelve.open('example2.dat', flag='n') 79 | lkp = etl.lookupone(table1, 'foo', 'bar', lkp) 80 | lkp.close() 81 | lkp = shelve.open('example2.dat', flag='r') 82 | lkp['a'] 83 | lkp['b'] 84 | 85 | 86 | # dictlookup() 87 | ############## 88 | 89 | import petl as etl 90 | 91 | table1 = [['foo', 'bar'], 92 | ['a', 1], 93 | ['b', 2], 94 | ['b', 3]] 95 | lkp = etl.dictlookup(table1, 'foo') 96 | lkp['a'] 97 | lkp['b'] 98 | # compound keys are supported 99 | table2 = [['foo', 'bar', 'baz'], 100 | ['a', 1, True], 101 | ['b', 2, False], 102 | ['b', 3, True], 103 | ['b', 3, False]] 104 | lkp = etl.dictlookup(table2, ('foo', 'bar')) 105 | lkp[('a', 1)] 106 | lkp[('b', 2)] 107 | lkp[('b', 3)] 108 | # data can be loaded into an existing dictionary-like 109 | # object, including persistent dictionaries created via the 110 | # shelve module 111 | import shelve 112 | 113 | lkp = shelve.open('example3.dat', flag='n') 114 | lkp = etl.dictlookup(table1, 'foo', lkp) 115 | lkp.close() 116 | lkp = shelve.open('example3.dat', flag='r') 117 | lkp['a'] 118 | lkp['b'] 119 | 120 | 121 | # dictlookupone() 122 | ################# 123 | 124 | import petl as etl 125 | 126 | table1 = [['foo', 'bar'], 127 | ['a', 1], 128 | ['b', 2], 129 | ['b', 3]] 130 | # if the specified key is not unique and strict=False (default), 131 | # the first value wins 132 | lkp = etl.dictlookupone(table1, 'foo') 133 | lkp['a'] 134 | lkp['b'] 135 | # if the specified key is not unique and strict=True, will raise 136 | # DuplicateKeyError 137 | try: 138 | lkp = etl.dictlookupone(table1, 'foo', strict=True) 139 | except etl.errors.DuplicateKeyError as e: 140 | print(e) 141 | 142 | # compound keys are supported 143 | table2 = [['foo', 'bar', 'baz'], 144 | ['a', 1, True], 145 | ['b', 2, False], 146 | ['b', 3, True], 147 | ['b', 3, False]] 148 | lkp = etl.dictlookupone(table2, ('foo', 'bar')) 149 | lkp[('a', 1)] 150 | lkp[('b', 2)] 151 | lkp[('b', 3)] 152 | # data can be loaded into an existing dictionary-like 153 | # object, including persistent dictionaries created via the 154 | # shelve module 155 | import shelve 156 | 157 | lkp = shelve.open('example4.dat', flag='n') 158 | lkp = etl.dictlookupone(table1, 'foo', lkp) 159 | lkp.close() 160 | lkp = shelve.open('example4.dat', flag='r') 161 | lkp['a'] 162 | lkp['b'] 163 | 164 | 165 | -------------------------------------------------------------------------------- /examples/util/materialise.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # columns() 5 | ########### 6 | 7 | import petl as etl 8 | table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]] 9 | cols = etl.columns(table) 10 | cols['foo'] 11 | cols['bar'] 12 | 13 | 14 | # facetcolumns() 15 | ################ 16 | 17 | import petl as etl 18 | table = [['foo', 'bar', 'baz'], 19 | ['a', 1, True], 20 | ['b', 2, True], 21 | ['b', 3]] 22 | fc = etl.facetcolumns(table, 'foo') 23 | fc['a'] 24 | fc['b'] 25 | -------------------------------------------------------------------------------- /examples/util/misc.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # typeset() 5 | ########### 6 | 7 | import petl as etl 8 | table = [['foo', 'bar', 'baz'], 9 | ['A', 1, '2'], 10 | ['B', u'2', '3.4'], 11 | [u'B', u'3', '7.8', True], 12 | ['D', u'xyz', 9.0], 13 | ['E', 42]] 14 | sorted(etl.typeset(table, 'foo')) 15 | sorted(etl.typeset(table, 'bar')) 16 | sorted(etl.typeset(table, 'baz')) 17 | 18 | 19 | # diffheaders() 20 | ############### 21 | 22 | import petl as etl 23 | table1 = [['foo', 'bar', 'baz'], 24 | ['a', 1, .3]] 25 | table2 = [['baz', 'bar', 'quux'], 26 | ['a', 1, .3]] 27 | add, sub = etl.diffheaders(table1, table2) 28 | add 29 | sub 30 | 31 | 32 | # diffvalues() 33 | ############## 34 | 35 | import petl as etl 36 | table1 = [['foo', 'bar'], 37 | ['a', 1], 38 | ['b', 3]] 39 | table2 = [['bar', 'foo'], 40 | [1, 'a'], 41 | [3, 'c']] 42 | add, sub = etl.diffvalues(table1, table2, 'foo') 43 | add 44 | sub 45 | 46 | 47 | # nthword() 48 | ########### 49 | 50 | import petl as etl 51 | s = 'foo bar' 52 | f = etl.nthword(0) 53 | f(s) 54 | g = etl.nthword(1) 55 | g(s) 56 | 57 | -------------------------------------------------------------------------------- /examples/util/parsers.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # datetimeparser() 5 | ################## 6 | 7 | from petl import datetimeparser 8 | isodatetime = datetimeparser('%Y-%m-%dT%H:%M:%S') 9 | isodatetime('2002-12-25T00:00:00') 10 | try: 11 | isodatetime('2002-12-25T00:00:99') 12 | except ValueError as e: 13 | print(e) 14 | 15 | 16 | # dateparser() 17 | ############## 18 | 19 | from petl import dateparser 20 | isodate = dateparser('%Y-%m-%d') 21 | isodate('2002-12-25') 22 | try: 23 | isodate('2002-02-30') 24 | except ValueError as e: 25 | print(e) 26 | 27 | 28 | # timeparser() 29 | ############## 30 | 31 | from petl import timeparser 32 | isotime = timeparser('%H:%M:%S') 33 | isotime('00:00:00') 34 | isotime('13:00:00') 35 | try: 36 | isotime('12:00:99') 37 | except ValueError as e: 38 | print(e) 39 | 40 | try: 41 | isotime('25:00:00') 42 | except ValueError as e: 43 | print(e) 44 | 45 | 46 | # boolparser() 47 | ############## 48 | 49 | from petl import boolparser 50 | mybool = boolparser(true_strings=['yes', 'y'], false_strings=['no', 'n']) 51 | mybool('y') 52 | mybool('yes') 53 | mybool('Y') 54 | mybool('No') 55 | try: 56 | mybool('foo') 57 | except ValueError as e: 58 | print(e) 59 | 60 | try: 61 | mybool('True') 62 | except ValueError as e: 63 | print(e) 64 | -------------------------------------------------------------------------------- /examples/util/random.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # randomtable() 5 | ############### 6 | 7 | import petl as etl 8 | table = etl.randomtable(3, 100, seed=42) 9 | table 10 | 11 | 12 | # dummytable() 13 | ############## 14 | 15 | import petl as etl 16 | table1 = etl.dummytable(100, seed=42) 17 | table1 18 | # customise fields 19 | import random 20 | from functools import partial 21 | fields = [('foo', random.random), 22 | ('bar', partial(random.randint, 0, 500)), 23 | ('baz', partial(random.choice, 24 | ['chocolate', 'strawberry', 'vanilla']))] 25 | table2 = etl.dummytable(100, fields=fields, seed=42) 26 | table2 27 | -------------------------------------------------------------------------------- /examples/util/statistics.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # limits() 5 | ########## 6 | 7 | import petl as etl 8 | table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]] 9 | minv, maxv = etl.limits(table, 'bar') 10 | minv 11 | maxv 12 | 13 | 14 | # stats() 15 | ######### 16 | 17 | import petl as etl 18 | table = [['foo', 'bar', 'baz'], 19 | ['A', 1, 2], 20 | ['B', '2', '3.4'], 21 | [u'B', u'3', u'7.8', True], 22 | ['D', 'xyz', 9.0], 23 | ['E', None]] 24 | etl.stats(table, 'bar') 25 | 26 | 27 | -------------------------------------------------------------------------------- /examples/util/timing.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # progress() 5 | ############ 6 | 7 | import petl as etl 8 | table = etl.dummytable(100000) 9 | table.progress(10000).tocsv('example.csv') 10 | 11 | 12 | # clock() 13 | ######### 14 | 15 | import petl as etl 16 | t1 = etl.dummytable(100000) 17 | c1 = etl.clock(t1) 18 | t2 = etl.convert(c1, 'foo', lambda v: v**2) 19 | c2 = etl.clock(t2) 20 | p = etl.progress(c2, 10000) 21 | etl.tocsv(p, 'example.csv') 22 | # time consumed retrieving rows from t1 23 | c1.time 24 | # time consumed retrieving rows from t2 25 | c2.time 26 | # actual time consumed by the convert step 27 | c2.time - c1.time 28 | 29 | 30 | -------------------------------------------------------------------------------- /examples/util/vis.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | # look() 5 | ######## 6 | 7 | import petl as etl 8 | table1 = [['foo', 'bar'], 9 | ['a', 1], 10 | ['b', 2]] 11 | etl.look(table1) 12 | # alternative formatting styles 13 | etl.look(table1, style='simple') 14 | etl.look(table1, style='minimal') 15 | # any irregularities in the length of header and/or data 16 | # rows will appear as blank cells 17 | table2 = [['foo', 'bar'], 18 | ['a'], 19 | ['b', 2, True]] 20 | etl.look(table2) 21 | 22 | 23 | # see() 24 | ####### 25 | 26 | import petl as etl 27 | table = [['foo', 'bar'], ['a', 1], ['b', 2]] 28 | etl.see(table) 29 | -------------------------------------------------------------------------------- /petl/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | from petl.version import version as __version__ 5 | from petl import comparison 6 | from petl.comparison import Comparable 7 | from petl import util 8 | from petl.util import * 9 | from petl import io 10 | from petl.io import * 11 | from petl import transform 12 | from petl.transform import * 13 | from petl import config 14 | from petl import errors 15 | from petl.errors import * 16 | -------------------------------------------------------------------------------- /petl/comparison.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | import operator 5 | from functools import partial 6 | 7 | from petl.compat import text_type, binary_type, numeric_types 8 | 9 | 10 | class Comparable(object): 11 | """Wrapper class to allow for flexible comparison of objects of different 12 | types, preserving the relaxed sorting behaviour of Python 2 with 13 | additional flexibility to allow for comparison of arbitrary objects with 14 | the `None` value (for example, the date and time objects from the standard 15 | library cannot be directly compared with `None` in Python 2). 16 | 17 | """ 18 | 19 | __slots__ = ['obj', 'inner'] 20 | 21 | def __init__(self, obj): 22 | # store wrapped object unchanged 23 | self.inner = obj 24 | # handle lists and tuples 25 | if isinstance(obj, (list, tuple)): 26 | obj = tuple(Comparable(o) for o in obj) 27 | self.obj = obj 28 | 29 | def __lt__(self, other): 30 | 31 | # convenience 32 | obj = self.obj 33 | if isinstance(other, Comparable): 34 | other = other.obj 35 | 36 | # None < everything else 37 | if other is None: 38 | return False 39 | if obj is None: 40 | return True 41 | 42 | # numbers < everything else (except None) 43 | if isinstance(obj, numeric_types) \ 44 | and not isinstance(other, numeric_types): 45 | return True 46 | if not isinstance(obj, numeric_types) \ 47 | and isinstance(other, numeric_types): 48 | return False 49 | 50 | # binary < unicode 51 | if isinstance(obj, text_type) and isinstance(other, binary_type): 52 | return False 53 | if isinstance(obj, binary_type) and isinstance(other, text_type): 54 | return True 55 | 56 | try: 57 | # attempt native comparison 58 | return obj < other 59 | 60 | except TypeError: 61 | # fall back to comparing type names 62 | return _typestr(obj) < _typestr(other) 63 | 64 | def __eq__(self, other): 65 | if isinstance(other, Comparable): 66 | return self.obj == other.obj 67 | return self.obj == other 68 | 69 | def __le__(self, other): 70 | return self < other or self == other 71 | 72 | def __gt__(self, other): 73 | return not (self < other or self == other) 74 | 75 | def __ge__(self, other): 76 | return not (self < other) 77 | 78 | def __str__(self): 79 | return str(self.obj) 80 | 81 | def __unicode__(self): 82 | return text_type(self.obj) 83 | 84 | def __repr__(self): 85 | return 'Comparable(' + repr(self.obj) + ')' 86 | 87 | def __iter__(self, *args, **kwargs): 88 | return iter(self.obj, *args, **kwargs) 89 | 90 | def __len__(self): 91 | return len(self.obj) 92 | 93 | def __getitem__(self, item): 94 | return self.obj.__getitem__(item) 95 | 96 | 97 | def _typestr(x): 98 | # attempt to preserve Python 2 name orderings 99 | if isinstance(x, binary_type): 100 | return 'str' 101 | if isinstance(x, text_type): 102 | return 'unicode' 103 | return type(x).__name__ 104 | 105 | 106 | def comparable_itemgetter(*args): 107 | getter = operator.itemgetter(*args) 108 | getter_with_default = _itemgetter_with_default(*args) 109 | 110 | def _getter_with_fallback(obj): 111 | try: 112 | return getter(obj) 113 | except (IndexError, KeyError): 114 | return getter_with_default(obj) 115 | g = lambda x: Comparable(_getter_with_fallback(x)) 116 | return g 117 | 118 | 119 | def _itemgetter_with_default(*args): 120 | """ itemgetter compatible with `operator.itemgetter` behavior, filling missing 121 | values with default instead of raising IndexError or KeyError """ 122 | def _get_default(obj, item, default): 123 | try: 124 | return obj[item] 125 | except (IndexError, KeyError): 126 | return default 127 | if len(args) == 1: 128 | return partial(_get_default, item=args[0], default=None) 129 | return lambda obj: tuple(_get_default(obj, item=item, default=None) for item in args) 130 | -------------------------------------------------------------------------------- /petl/compat.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | import sys 5 | 6 | 7 | 8 | ########################## 9 | # Python 3 compatibility # 10 | ########################## 11 | 12 | PY2 = sys.version_info.major == 2 13 | PY3 = sys.version_info.major == 3 14 | 15 | if PY2: 16 | from itertools import ifilter, ifilterfalse, imap, izip, izip_longest 17 | from string import maketrans 18 | from decimal import Decimal 19 | string_types = basestring, 20 | integer_types = int, long 21 | numeric_types = bool, int, long, float, Decimal 22 | text_type = unicode 23 | binary_type = str 24 | from urllib2 import urlopen 25 | try: 26 | from cStringIO import StringIO 27 | except ImportError: 28 | from StringIO import StringIO 29 | BytesIO = StringIO 30 | try: 31 | import cPickle as pickle 32 | except ImportError: 33 | import pickle 34 | maxint = sys.maxint 35 | long = long 36 | xrange = xrange 37 | reduce = reduce 38 | 39 | else: 40 | ifilter = filter 41 | imap = map 42 | izip = zip 43 | xrange = range 44 | from decimal import Decimal 45 | from itertools import filterfalse as ifilterfalse 46 | from itertools import zip_longest as izip_longest 47 | from functools import reduce 48 | maketrans = str.maketrans 49 | string_types = str, 50 | integer_types = int, 51 | numeric_types = bool, int, float, Decimal 52 | class_types = type, 53 | text_type = str 54 | binary_type = bytes 55 | long = int 56 | from urllib.request import urlopen 57 | from io import StringIO, BytesIO 58 | import pickle 59 | maxint = sys.maxsize 60 | 61 | try: 62 | advance_iterator = next 63 | except NameError: 64 | def advance_iterator(it): 65 | return it.next() 66 | next = advance_iterator 67 | 68 | try: 69 | callable = callable 70 | except NameError: 71 | def callable(obj): 72 | return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) 73 | -------------------------------------------------------------------------------- /petl/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | 4 | from petl.compat import text_type 5 | 6 | 7 | look_style = 'grid' # alternatives: 'simple', 'minimal' 8 | look_limit = 5 9 | look_index_header = False 10 | look_vrepr = repr 11 | look_width = None 12 | see_limit = 5 13 | see_index_header = False 14 | see_vrepr = repr 15 | display_limit = 5 16 | display_index_header = False 17 | display_vrepr = text_type 18 | sort_buffersize = 100000 19 | failonerror=False # False, True, 'inline' 20 | """ 21 | Controls what happens when unhandled exceptions are raised in a 22 | transformation: 23 | 24 | - If `False`, exceptions are suppressed. If present, the value 25 | provided in the `errorvalue` argument is returned. 26 | 27 | - If `True`, the first unhandled exception is raised. 28 | 29 | - If `'inline'`, unhandled exceptions are returned. 30 | """ 31 | -------------------------------------------------------------------------------- /petl/errors.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | class DuplicateKeyError(Exception): 5 | 6 | def __init__(self, key): 7 | self.key = key 8 | 9 | def __str__(self): 10 | return 'duplicate key: %r' % self.key 11 | 12 | 13 | class FieldSelectionError(Exception): 14 | 15 | def __init__(self, value): 16 | self.value = value 17 | 18 | def __str__(self): 19 | return 'selection is not a field or valid field index: %r' % self.value 20 | 21 | 22 | class ArgumentError(Exception): 23 | 24 | def __init__(self, message): 25 | self.message = message 26 | 27 | def __str__(self): 28 | return 'argument error: %s' % self.message 29 | -------------------------------------------------------------------------------- /petl/io/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | from petl.io.base import fromcolumns 4 | 5 | from petl.io.sources import FileSource, GzipSource, BZ2Source, ZipSource, \ 6 | StdinSource, StdoutSource, URLSource, StringSource, PopenSource, \ 7 | MemorySource 8 | 9 | from petl.io.csv import fromcsv, fromtsv, tocsv, appendcsv, totsv, appendtsv, \ 10 | teecsv, teetsv 11 | 12 | from petl.io.pickle import frompickle, topickle, appendpickle, teepickle 13 | 14 | from petl.io.text import fromtext, totext, appendtext, teetext 15 | 16 | from petl.io.xml import fromxml, toxml 17 | 18 | from petl.io.html import tohtml, teehtml 19 | 20 | from petl.io.json import fromjson, tojson, tojsonarrays, fromdicts 21 | 22 | from petl.io.db import fromdb, todb, appenddb 23 | 24 | from petl.io.xls import fromxls, toxls 25 | 26 | from petl.io.xlsx import fromxlsx, toxlsx, appendxlsx 27 | 28 | from petl.io.numpy import fromarray, toarray, torecarray 29 | 30 | from petl.io.pandas import fromdataframe, todataframe 31 | 32 | from petl.io.pytables import fromhdf5, fromhdf5sorted, tohdf5, appendhdf5 33 | 34 | from petl.io.whoosh import fromtextindex, searchtextindex, \ 35 | searchtextindexpage, totextindex, appendtextindex 36 | 37 | from petl.io.bcolz import frombcolz, tobcolz, appendbcolz 38 | 39 | from petl.io.avro import fromavro, toavro, appendavro 40 | 41 | from petl.io.sources import register_codec, register_reader, register_writer 42 | 43 | from petl.io.remotes import RemoteSource 44 | 45 | from petl.io.remotes import SMBSource 46 | 47 | from petl.io.gsheet import fromgsheet, togsheet, appendgsheet 48 | -------------------------------------------------------------------------------- /petl/io/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | 4 | 5 | import locale 6 | import codecs 7 | from petl.compat import izip_longest 8 | 9 | from petl.util.base import Table 10 | 11 | 12 | def getcodec(encoding): 13 | if encoding is None: 14 | encoding = locale.getpreferredencoding() 15 | codec = codecs.lookup(encoding) 16 | return codec 17 | 18 | 19 | def fromcolumns(cols, header=None, missing=None): 20 | """View a sequence of columns as a table, e.g.:: 21 | 22 | >>> import petl as etl 23 | >>> cols = [[0, 1, 2], ['a', 'b', 'c']] 24 | >>> tbl = etl.fromcolumns(cols) 25 | >>> tbl 26 | +----+-----+ 27 | | f0 | f1 | 28 | +====+=====+ 29 | | 0 | 'a' | 30 | +----+-----+ 31 | | 1 | 'b' | 32 | +----+-----+ 33 | | 2 | 'c' | 34 | +----+-----+ 35 | 36 | If columns are not the same length, values will be padded to the length 37 | of the longest column with `missing`, which is None by default, e.g.:: 38 | 39 | >>> cols = [[0, 1, 2], ['a', 'b']] 40 | >>> tbl = etl.fromcolumns(cols, missing='NA') 41 | >>> tbl 42 | +----+------+ 43 | | f0 | f1 | 44 | +====+======+ 45 | | 0 | 'a' | 46 | +----+------+ 47 | | 1 | 'b' | 48 | +----+------+ 49 | | 2 | 'NA' | 50 | +----+------+ 51 | 52 | See also :func:`petl.io.json.fromdicts`. 53 | 54 | .. versionadded:: 1.1.0 55 | 56 | """ 57 | 58 | return ColumnsView(cols, header=header, missing=missing) 59 | 60 | 61 | class ColumnsView(Table): 62 | 63 | def __init__(self, cols, header=None, missing=None): 64 | self.cols = cols 65 | self.header = header 66 | self.missing = missing 67 | 68 | def __iter__(self): 69 | return itercolumns(self.cols, self.header, self.missing) 70 | 71 | 72 | def itercolumns(cols, header, missing): 73 | if header is None: 74 | header = ['f%s' % i for i in range(len(cols))] 75 | yield tuple(header) 76 | for row in izip_longest(*cols, **dict(fillvalue=missing)): 77 | yield row 78 | -------------------------------------------------------------------------------- /petl/io/csv_py3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import io 3 | import csv 4 | import logging 5 | 6 | 7 | from petl.util.base import Table, data 8 | 9 | 10 | logger = logging.getLogger(__name__) 11 | warning = logger.warning 12 | info = logger.info 13 | debug = logger.debug 14 | 15 | 16 | def fromcsv_impl(source, **kwargs): 17 | return CSVView(source, **kwargs) 18 | 19 | 20 | class CSVView(Table): 21 | 22 | def __init__(self, source, encoding, errors, header, **csvargs): 23 | self.source = source 24 | self.encoding = encoding 25 | self.errors = errors 26 | self.csvargs = csvargs 27 | self.header = header 28 | 29 | def __iter__(self): 30 | if self.header is not None: 31 | yield tuple(self.header) 32 | with self.source.open('rb') as buf: 33 | csvfile = io.TextIOWrapper(buf, encoding=self.encoding, 34 | errors=self.errors, newline='') 35 | try: 36 | reader = csv.reader(csvfile, **self.csvargs) 37 | for row in reader: 38 | yield tuple(row) 39 | finally: 40 | csvfile.detach() 41 | 42 | 43 | def tocsv_impl(table, source, **kwargs): 44 | _writecsv(table, source=source, mode='wb', **kwargs) 45 | 46 | 47 | def appendcsv_impl(table, source, **kwargs): 48 | _writecsv(table, source=source, mode='ab', **kwargs) 49 | 50 | 51 | def _writecsv(table, source, mode, write_header, encoding, errors, **csvargs): 52 | rows = table if write_header else data(table) 53 | with source.open(mode) as buf: 54 | # wrap buffer for text IO 55 | csvfile = io.TextIOWrapper(buf, encoding=encoding, errors=errors, 56 | newline='') 57 | try: 58 | writer = csv.writer(csvfile, **csvargs) 59 | for row in rows: 60 | writer.writerow(row) 61 | csvfile.flush() 62 | finally: 63 | csvfile.detach() 64 | 65 | 66 | def teecsv_impl(table, source, **kwargs): 67 | return TeeCSVView(table, source=source, **kwargs) 68 | 69 | 70 | class TeeCSVView(Table): 71 | 72 | def __init__(self, table, source=None, encoding=None, 73 | errors='strict', write_header=True, **csvargs): 74 | self.table = table 75 | self.source = source 76 | self.write_header = write_header 77 | self.encoding = encoding 78 | self.errors = errors 79 | self.csvargs = csvargs 80 | 81 | def __iter__(self): 82 | with self.source.open('wb') as buf: 83 | # wrap buffer for text IO 84 | csvfile = io.TextIOWrapper(buf, encoding=self.encoding, 85 | errors=self.errors, newline='') 86 | try: 87 | writer = csv.writer(csvfile, **self.csvargs) 88 | it = iter(self.table) 89 | try: 90 | hdr = next(it) 91 | except StopIteration: 92 | return 93 | if self.write_header: 94 | writer.writerow(hdr) 95 | yield tuple(hdr) 96 | for row in it: 97 | writer.writerow(row) 98 | yield tuple(row) 99 | csvfile.flush() 100 | finally: 101 | csvfile.detach() 102 | -------------------------------------------------------------------------------- /petl/io/db_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | 5 | import logging 6 | 7 | 8 | from petl.compat import callable 9 | 10 | 11 | logger = logging.getLogger(__name__) 12 | debug = logger.debug 13 | 14 | 15 | def _is_dbapi_connection(dbo): 16 | return _hasmethod(dbo, 'cursor') 17 | 18 | 19 | def _is_clikchouse_dbapi_connection(dbo): 20 | return 'clickhouse_driver' in str(type(dbo)) 21 | 22 | 23 | def _is_dbapi_cursor(dbo): 24 | return _hasmethods(dbo, 'execute', 'executemany', 'fetchone', 'fetchmany', 25 | 'fetchall') 26 | 27 | 28 | def _is_sqlalchemy_engine(dbo): 29 | return (_hasmethods(dbo, 'execute', 'connect', 'raw_connection') 30 | and _hasprop(dbo, 'driver')) 31 | 32 | 33 | def _is_sqlalchemy_session(dbo): 34 | return _hasmethods(dbo, 'execute', 'connection', 'get_bind') 35 | 36 | 37 | def _is_sqlalchemy_connection(dbo): 38 | # N.B., this are not completely selective conditions, this test needs 39 | # to be applied after ruling out DB-API cursor 40 | return _hasmethod(dbo, 'execute') and _hasprop(dbo, 'connection') 41 | 42 | 43 | def _hasmethod(o, n): 44 | return hasattr(o, n) and callable(getattr(o, n)) 45 | 46 | 47 | def _hasmethods(o, *l): 48 | return all(_hasmethod(o, n) for n in l) 49 | 50 | 51 | def _hasprop(o, n): 52 | return hasattr(o, n) and not callable(getattr(o, n)) 53 | 54 | 55 | # default DB quote char per SQL-92 56 | quotechar = '"' 57 | 58 | 59 | def _quote(s): 60 | # crude way to sanitise table and field names 61 | # conform with the SQL-92 standard. See http://stackoverflow.com/a/214344 62 | return quotechar + s.replace(quotechar, quotechar+quotechar) + quotechar 63 | 64 | 65 | def _placeholders(connection, names): 66 | # discover the paramstyle 67 | if connection is None: 68 | # default to using question mark 69 | debug('connection is None, default to using qmark paramstyle') 70 | placeholders = ', '.join(['?'] * len(names)) 71 | else: 72 | mod = __import__(connection.__class__.__module__) 73 | 74 | if not hasattr(mod, 'paramstyle'): 75 | debug('module %r from connection %r has no attribute paramstyle, ' 76 | 'defaulting to qmark', mod, connection) 77 | # default to using question mark 78 | placeholders = ', '.join(['?'] * len(names)) 79 | 80 | elif mod.paramstyle == 'qmark': 81 | debug('found paramstyle qmark') 82 | placeholders = ', '.join(['?'] * len(names)) 83 | 84 | elif mod.paramstyle in ('format', 'pyformat'): 85 | debug('found paramstyle pyformat') 86 | placeholders = ', '.join(['%s'] * len(names)) 87 | 88 | elif mod.paramstyle == 'numeric': 89 | debug('found paramstyle numeric') 90 | placeholders = ', '.join([':' + str(i + 1) 91 | for i in range(len(names))]) 92 | 93 | elif mod.paramstyle == 'named': 94 | debug('found paramstyle named') 95 | placeholders = ', '.join([':%s' % name 96 | for name in names]) 97 | 98 | else: 99 | debug('found unexpected paramstyle %r, defaulting to qmark', 100 | mod.paramstyle) 101 | placeholders = ', '.join(['?'] * len(names)) 102 | 103 | return placeholders 104 | -------------------------------------------------------------------------------- /petl/io/pandas.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | 4 | 5 | import inspect 6 | 7 | 8 | from petl.util.base import Table 9 | 10 | 11 | def todataframe(table, index=None, exclude=None, columns=None, 12 | coerce_float=False, nrows=None): 13 | """ 14 | Load data from the given `table` into a 15 | `pandas `_ DataFrame. E.g.:: 16 | 17 | >>> import petl as etl 18 | >>> table = [('foo', 'bar', 'baz'), 19 | ... ('apples', 1, 2.5), 20 | ... ('oranges', 3, 4.4), 21 | ... ('pears', 7, .1)] 22 | >>> df = etl.todataframe(table) 23 | >>> df 24 | foo bar baz 25 | 0 apples 1 2.5 26 | 1 oranges 3 4.4 27 | 2 pears 7 0.1 28 | 29 | """ 30 | import pandas as pd 31 | it = iter(table) 32 | try: 33 | header = next(it) 34 | except StopIteration: 35 | header = None # Will create an Empty DataFrame 36 | if columns is None: 37 | columns = header 38 | return pd.DataFrame.from_records(it, index=index, exclude=exclude, 39 | columns=columns, coerce_float=coerce_float, 40 | nrows=nrows) 41 | 42 | 43 | Table.todataframe = todataframe 44 | Table.todf = todataframe 45 | 46 | 47 | def fromdataframe(df, include_index=False): 48 | """ 49 | Extract a table from a `pandas `_ DataFrame. 50 | E.g.:: 51 | 52 | >>> import petl as etl 53 | >>> import pandas as pd 54 | >>> records = [('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, 0.1)] 55 | >>> df = pd.DataFrame.from_records(records, columns=('foo', 'bar', 'baz')) 56 | >>> table = etl.fromdataframe(df) 57 | >>> table 58 | +-----------+-----+-----+ 59 | | foo | bar | baz | 60 | +===========+=====+=====+ 61 | | 'apples' | 1 | 2.5 | 62 | +-----------+-----+-----+ 63 | | 'oranges' | 3 | 4.4 | 64 | +-----------+-----+-----+ 65 | | 'pears' | 7 | 0.1 | 66 | +-----------+-----+-----+ 67 | 68 | """ 69 | 70 | return DataFrameView(df, include_index=include_index) 71 | 72 | 73 | class DataFrameView(Table): 74 | 75 | def __init__(self, df, include_index=False): 76 | assert hasattr(df, 'columns') \ 77 | and hasattr(df, 'iterrows') \ 78 | and inspect.ismethod(df.iterrows), \ 79 | 'bad argument, expected pandas.DataFrame, found %r' % df 80 | self.df = df 81 | self.include_index = include_index 82 | 83 | def __iter__(self): 84 | if self.include_index: 85 | yield ('index',) + tuple(self.df.columns) 86 | for i, row in self.df.iterrows(): 87 | yield (i,) + tuple(row) 88 | else: 89 | yield tuple(self.df.columns) 90 | for _, row in self.df.iterrows(): 91 | yield tuple(row) 92 | -------------------------------------------------------------------------------- /petl/io/xls.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | 4 | 5 | import locale 6 | 7 | 8 | from petl.compat import izip_longest, next, xrange, BytesIO 9 | from petl.util.base import Table 10 | from petl.io.sources import read_source_from_arg, write_source_from_arg 11 | 12 | 13 | def fromxls(filename, sheet=None, use_view=True, **kwargs): 14 | """ 15 | Extract a table from a sheet in an Excel .xls file. 16 | 17 | Sheet is identified by its name or index number. 18 | 19 | N.B., the sheet name is case sensitive. 20 | 21 | """ 22 | 23 | return XLSView(filename, sheet=sheet, use_view=use_view, **kwargs) 24 | 25 | 26 | class XLSView(Table): 27 | 28 | def __init__(self, filename, sheet=None, use_view=True, **kwargs): 29 | self.filename = filename 30 | self.sheet = sheet 31 | self.use_view = use_view 32 | self.kwargs = kwargs 33 | 34 | def __iter__(self): 35 | 36 | # prefer implementation using xlutils.view as dates are automatically 37 | # converted 38 | if self.use_view: 39 | from petl.io import xlutils_view 40 | source = read_source_from_arg(self.filename) 41 | with source.open('rb') as source2: 42 | source3 = source2.read() 43 | wb = xlutils_view.View(source3, **self.kwargs) 44 | if self.sheet is None: 45 | ws = wb[0] 46 | else: 47 | ws = wb[self.sheet] 48 | for row in ws: 49 | yield tuple(row) 50 | else: 51 | import xlrd 52 | source = read_source_from_arg(self.filename) 53 | with source.open('rb') as source2: 54 | source3 = source2.read() 55 | with xlrd.open_workbook(file_contents=source3, 56 | on_demand=True, **self.kwargs) as wb: 57 | if self.sheet is None: 58 | ws = wb.sheet_by_index(0) 59 | elif isinstance(self.sheet, int): 60 | ws = wb.sheet_by_index(self.sheet) 61 | else: 62 | ws = wb.sheet_by_name(str(self.sheet)) 63 | for rownum in xrange(ws.nrows): 64 | yield tuple(ws.row_values(rownum)) 65 | 66 | 67 | def toxls(tbl, filename, sheet, encoding=None, style_compression=0, 68 | styles=None): 69 | """ 70 | Write a table to a new Excel .xls file. 71 | 72 | """ 73 | 74 | import xlwt 75 | if encoding is None: 76 | encoding = locale.getpreferredencoding() 77 | wb = xlwt.Workbook(encoding=encoding, style_compression=style_compression) 78 | ws = wb.add_sheet(sheet) 79 | 80 | if styles is None: 81 | # simple version, don't worry about styles 82 | for r, row in enumerate(tbl): 83 | for c, v in enumerate(row): 84 | ws.write(r, c, label=v) 85 | else: 86 | # handle styles 87 | it = iter(tbl) 88 | try: 89 | hdr = next(it) 90 | flds = list(map(str, hdr)) 91 | for c, f in enumerate(flds): 92 | ws.write(0, c, label=f) 93 | if f not in styles or styles[f] is None: 94 | styles[f] = xlwt.Style.default_style 95 | except StopIteration: 96 | pass # no header written 97 | # convert to list for easy zipping 98 | styles = [styles[f] for f in flds] 99 | for r, row in enumerate(it): 100 | for c, (v, style) in enumerate(izip_longest(row, styles, 101 | fillvalue=None)): 102 | ws.write(r+1, c, label=v, style=style) 103 | 104 | target = write_source_from_arg(filename) 105 | with target.open('wb') as target2: 106 | wb.save(target2) 107 | 108 | 109 | Table.toxls = toxls 110 | -------------------------------------------------------------------------------- /petl/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petl-developers/petl/43925d008bd1d98f90204b3db74d88b3fee27a69/petl/test/__init__.py -------------------------------------------------------------------------------- /petl/test/conftest.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def pytest_configure(): 5 | org = logging.Logger.debug 6 | 7 | def debug(self, msg, *args, **kwargs): 8 | org(self, str(msg), *args, **kwargs) 9 | 10 | logging.Logger.debug = debug 11 | -------------------------------------------------------------------------------- /petl/test/failonerror.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from petl.test.helpers import ieq, eq_ 4 | import petl.config as config 5 | 6 | 7 | 8 | def assert_failonerror(input_fn, expected_output): 9 | """In the input rows, the first row should process through the 10 | transformation cleanly. The second row should generate an 11 | exception. There are no requirements for any other rows.""" 12 | #========================================================= 13 | # Test function parameters with default config settings 14 | #========================================================= 15 | # test the default config setting: failonerror == False 16 | eq_(config.failonerror, False) 17 | 18 | # By default, a bad conversion does not raise an exception, and 19 | # values for the failed conversion are returned as None 20 | table2 = input_fn() 21 | ieq(expected_output, table2) 22 | ieq(expected_output, table2) 23 | 24 | # When called with failonerror is False or None, a bad conversion 25 | # does not raise an exception, and values for the failed conversion 26 | # are returned as None 27 | table3 = input_fn(failonerror=False) 28 | ieq(expected_output, table3) 29 | ieq(expected_output, table3) 30 | table3 = input_fn(failonerror=None) 31 | ieq(expected_output, table3) 32 | ieq(expected_output, table3) 33 | 34 | # When called with failonerror=True, a bad conversion raises an 35 | # exception 36 | with pytest.raises(Exception): 37 | table4 = input_fn(failonerror=True) 38 | table4.nrows() 39 | 40 | # When called with failonerror='inline', a bad conversion 41 | # does not raise an exception, and an Exception for the failed 42 | # conversion is returned in the result. 43 | expect5 = expected_output[0], expected_output[1] 44 | table5 = input_fn(failonerror='inline') 45 | ieq(expect5, table5.head(1)) 46 | ieq(expect5, table5.head(1)) 47 | excp = table5[2][0] 48 | assert isinstance(excp, Exception) 49 | 50 | #========================================================= 51 | # Test config settings 52 | #========================================================= 53 | # Save config setting 54 | saved_config_failonerror = config.failonerror 55 | 56 | # When config.failonerror == True, a bad conversion raises an 57 | # exception 58 | config.failonerror = True 59 | with pytest.raises(Exception): 60 | table6 = input_fn() 61 | table6.nrows() 62 | 63 | # When config.failonerror == 'inline', a bad conversion 64 | # does not raise an exception, and an Exception for the failed 65 | # conversion is returned in the result. 66 | expect7 = expected_output[0], expected_output[1] 67 | config.failonerror = 'inline' 68 | table7 = input_fn() 69 | ieq(expect7, table7.head(1)) 70 | ieq(expect7, table7.head(1)) 71 | excp = table7[2][0] 72 | assert isinstance(excp, Exception) 73 | 74 | # When config.failonerror is an invalid value, but still truthy, it 75 | # behaves the same as if == True 76 | config.failonerror = 'invalid' 77 | with pytest.raises(Exception): 78 | table8 = input_fn() 79 | table8.nrows() 80 | 81 | # When config.failonerror is None, it behaves the same as if 82 | # config.failonerror is False 83 | config.failonerror = None 84 | table9 = input_fn() 85 | ieq(expected_output, table9) 86 | ieq(expected_output, table9) 87 | 88 | # A False keyword parameter overrides config.failonerror == True 89 | config.failonerror = True 90 | table10 = input_fn(failonerror=False) 91 | ieq(expected_output, table10) 92 | ieq(expected_output, table10) 93 | 94 | # A None keyword parameter uses config.failonerror == True 95 | config.failonerror = True 96 | with pytest.raises(Exception): 97 | table11 = input_fn(failonerror=None) 98 | table11.nrows() 99 | 100 | # restore config setting 101 | config.failonerror = saved_config_failonerror 102 | 103 | -------------------------------------------------------------------------------- /petl/test/helpers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | import os 4 | import sys 5 | 6 | import pytest 7 | 8 | from petl.compat import izip_longest 9 | 10 | 11 | def eq_(expect, actual, msg=None): 12 | """Test when two values from a python variable are exactly equals (==)""" 13 | assert expect == actual, msg or ('%r != %s' % (expect, actual)) 14 | 15 | 16 | def assert_almost_equal(first, second, places=None, msg=None): 17 | """Test when the values are aproximatedly equals by a places exponent""" 18 | vabs = None if places is None else 10 ** (- places) 19 | assert pytest.approx(first, second, abs=vabs), msg 20 | 21 | 22 | def ieq(expect, actual, cast=None): 23 | """Test when values of a iterable are equals for each row and column""" 24 | ie = iter(expect) 25 | ia = iter(actual) 26 | ir = 0 27 | for re, ra in izip_longest(ie, ia, fillvalue=None): 28 | if cast: 29 | ra = cast(ra) 30 | if re is None and ra is None: 31 | continue 32 | if type(re) in (int, float, bool, str): 33 | eq_(re, ra) 34 | continue 35 | _ieq_row(re, ra, ir) 36 | ir = ir + 1 37 | 38 | 39 | def _ieq_row(re, ra, ir): 40 | assert ra is not None, "Expected row #%d is None, but result row is not None" % ir 41 | assert re is not None, "Expected row #%d is not None, but result row is None" % ir 42 | ic = 0 43 | for ve, va in izip_longest(re, ra, fillvalue=None): 44 | if isinstance(ve, list): 45 | for je, ja in izip_longest(ve, va, fillvalue=None): 46 | _ieq_col(je, ja, re, ra, ir, ic) 47 | elif not isinstance(ve, dict): 48 | _ieq_col(ve, va, re, ra, ir, ic) 49 | ic = ic + 1 50 | 51 | 52 | def _ieq_col(ve, va, re, ra, ir, ic): 53 | """Print two values when they aren't exactly equals (==)""" 54 | try: 55 | eq_(ve, va) 56 | except AssertionError as ea: 57 | # Show the values but only when they differ 58 | print('\nrow #%d' % ir, re, ' != ', ra, file=sys.stderr) 59 | print('col #%d: ' % ic, ve, ' != ', va, file=sys.stderr) 60 | raise ea 61 | 62 | 63 | def ieq2(expect, actual, cast=None): 64 | """Test when iterables values are equals twice looking for side effects""" 65 | ieq(expect, actual, cast) 66 | ieq(expect, actual, cast) 67 | 68 | 69 | def get_env_vars_named(prefix, remove_prefix=True): 70 | """Get all named variables starting with prefix""" 71 | res = {} 72 | varlen = len(prefix) 73 | for varname, varvalue in os.environ.items(): 74 | if varname.upper().startswith(prefix.upper()): 75 | if remove_prefix: 76 | varname = varname[varlen:] 77 | res[varname] = varvalue 78 | if len(res) == 0: 79 | return None 80 | return res 81 | -------------------------------------------------------------------------------- /petl/test/io/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | -------------------------------------------------------------------------------- /petl/test/io/test_bcolz.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | import tempfile 4 | 5 | import pytest 6 | 7 | from petl.test.helpers import ieq, eq_ 8 | from petl.io.bcolz import frombcolz, tobcolz, appendbcolz 9 | 10 | 11 | try: 12 | import bcolz 13 | except ImportError as e: 14 | pytest.skip('SKIP bcolz tests: %s' % e, allow_module_level=True) 15 | else: 16 | 17 | def test_frombcolz(): 18 | 19 | cols = [ 20 | ['apples', 'oranges', 'pears'], 21 | [1, 3, 7], 22 | [2.5, 4.4, .1] 23 | ] 24 | names = ('foo', 'bar', 'baz') 25 | rootdir = tempfile.mkdtemp() 26 | ctbl = bcolz.ctable(cols, names=names, rootdir=rootdir, mode='w') 27 | ctbl.flush() 28 | 29 | expect = [names] + list(zip(*cols)) 30 | 31 | # from ctable object 32 | actual = frombcolz(ctbl) 33 | ieq(expect, actual) 34 | ieq(expect, actual) 35 | 36 | # from rootdir 37 | actual = frombcolz(rootdir) 38 | ieq(expect, actual) 39 | ieq(expect, actual) 40 | 41 | def test_tobcolz(): 42 | t = [('foo', 'bar', 'baz'), 43 | ('apples', 1, 2.5), 44 | ('oranges', 3, 4.4), 45 | ('pears', 7, .1)] 46 | 47 | ctbl = tobcolz(t) 48 | assert isinstance(ctbl, bcolz.ctable) 49 | eq_(t[0], tuple(ctbl.names)) 50 | ieq(t[1:], (tuple(r) for r in ctbl.iter())) 51 | 52 | ctbl = tobcolz(t, chunklen=2) 53 | assert isinstance(ctbl, bcolz.ctable) 54 | eq_(t[0], tuple(ctbl.names)) 55 | ieq(t[1:], (tuple(r) for r in ctbl.iter())) 56 | eq_(2, ctbl.cols[ctbl.names[0]].chunklen) 57 | 58 | def test_appendbcolz(): 59 | t = [('foo', 'bar', 'baz'), 60 | ('apples', 1, 2.5), 61 | ('oranges', 3, 4.4), 62 | ('pears', 7, .1)] 63 | 64 | # append to in-memory ctable 65 | ctbl = tobcolz(t) 66 | appendbcolz(t, ctbl) 67 | eq_(t[0], tuple(ctbl.names)) 68 | ieq(t[1:] + t[1:], (tuple(r) for r in ctbl.iter())) 69 | 70 | # append to on-disk ctable 71 | rootdir = tempfile.mkdtemp() 72 | tobcolz(t, rootdir=rootdir) 73 | appendbcolz(t, rootdir) 74 | ctbl = bcolz.open(rootdir, mode='r') 75 | eq_(t[0], tuple(ctbl.names)) 76 | ieq(t[1:] + t[1:], (tuple(r) for r in ctbl.iter())) 77 | -------------------------------------------------------------------------------- /petl/test/io/test_csv_unicode.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | 5 | import io 6 | from tempfile import NamedTemporaryFile 7 | 8 | 9 | from petl.test.helpers import ieq, eq_ 10 | from petl.io.csv import fromcsv, tocsv, appendcsv 11 | 12 | 13 | def test_fromcsv(): 14 | 15 | data = ( 16 | u"name,id\n" 17 | u"Արամ Խաչատրյան,1\n" 18 | u"Johann Strauß,2\n" 19 | u"Вагиф Сәмәдоғлу,3\n" 20 | u"章子怡,4\n" 21 | ) 22 | fn = NamedTemporaryFile().name 23 | uf = io.open(fn, encoding='utf-8', mode='wt') 24 | uf.write(data) 25 | uf.close() 26 | 27 | actual = fromcsv(fn, encoding='utf-8') 28 | expect = ((u'name', u'id'), 29 | (u'Արամ Խաչատրյան', u'1'), 30 | (u'Johann Strauß', u'2'), 31 | (u'Вагиф Сәмәдоғлу', u'3'), 32 | (u'章子怡', u'4')) 33 | ieq(expect, actual) 34 | ieq(expect, actual) # verify can iterate twice 35 | 36 | 37 | def test_fromcsv_lineterminators(): 38 | data = (u'name,id', 39 | u'Արամ Խաչատրյան,1', 40 | u'Johann Strauß,2', 41 | u'Вагиф Сәмәдоғлу,3', 42 | u'章子怡,4') 43 | expect = ((u'name', u'id'), 44 | (u'Արամ Խաչատրյան', u'1'), 45 | (u'Johann Strauß', u'2'), 46 | (u'Вагиф Сәмәдоғлу', u'3'), 47 | (u'章子怡', u'4')) 48 | 49 | for lt in u'\r', u'\n', u'\r\n': 50 | fn = NamedTemporaryFile().name 51 | uf = io.open(fn, encoding='utf-8', mode='wt', newline='') 52 | uf.write(lt.join(data)) 53 | uf.close() 54 | actual = fromcsv(fn, encoding='utf-8') 55 | ieq(expect, actual) 56 | 57 | 58 | def test_tocsv(): 59 | 60 | tbl = ((u'name', u'id'), 61 | (u'Արամ Խաչատրյան', 1), 62 | (u'Johann Strauß', 2), 63 | (u'Вагиф Сәмәдоғлу', 3), 64 | (u'章子怡', 4)) 65 | fn = NamedTemporaryFile().name 66 | tocsv(tbl, fn, encoding='utf-8', lineterminator='\n') 67 | 68 | expect = ( 69 | u"name,id\n" 70 | u"Արամ Խաչատրյան,1\n" 71 | u"Johann Strauß,2\n" 72 | u"Вагиф Сәмәдоғлу,3\n" 73 | u"章子怡,4\n" 74 | ) 75 | uf = io.open(fn, encoding='utf-8', mode='rt', newline='') 76 | actual = uf.read() 77 | eq_(expect, actual) 78 | 79 | # Test with write_header=False 80 | tbl = ((u'name', u'id'), 81 | (u'Արամ Խաչատրյան', 1), 82 | (u'Johann Strauß', 2), 83 | (u'Вагиф Сәмәдоғлу', 3), 84 | (u'章子怡', 4)) 85 | tocsv(tbl, fn, encoding='utf-8', lineterminator='\n', write_header=False) 86 | 87 | expect = ( 88 | u"Արամ Խաչատրյան,1\n" 89 | u"Johann Strauß,2\n" 90 | u"Вагиф Сәмәдоғлу,3\n" 91 | u"章子怡,4\n" 92 | ) 93 | uf = io.open(fn, encoding='utf-8', mode='rt', newline='') 94 | actual = uf.read() 95 | eq_(expect, actual) 96 | 97 | 98 | def test_appendcsv(): 99 | 100 | data = ( 101 | u"name,id\n" 102 | u"Արամ Խաչատրյան,1\n" 103 | u"Johann Strauß,2\n" 104 | u"Вагиф Сәмәдоғлу,3\n" 105 | u"章子怡,4\n" 106 | ) 107 | fn = NamedTemporaryFile().name 108 | uf = io.open(fn, encoding='utf-8', mode='wt') 109 | uf.write(data) 110 | uf.close() 111 | 112 | tbl = ((u'name', u'id'), 113 | (u'ኃይሌ ገብረሥላሴ', 5), 114 | (u'ედუარდ შევარდნაძე', 6)) 115 | appendcsv(tbl, fn, encoding='utf-8', lineterminator='\n') 116 | 117 | expect = ( 118 | u"name,id\n" 119 | u"Արամ Խաչատրյան,1\n" 120 | u"Johann Strauß,2\n" 121 | u"Вагиф Сәмәдоғлу,3\n" 122 | u"章子怡,4\n" 123 | u"ኃይሌ ገብረሥላሴ,5\n" 124 | u"ედუარდ შევარდნაძე,6\n" 125 | ) 126 | uf = io.open(fn, encoding='utf-8', mode='rt') 127 | actual = uf.read() 128 | eq_(expect, actual) 129 | 130 | 131 | def test_tocsv_none(): 132 | 133 | tbl = ((u'col1', u'colNone'), 134 | (u'a', 1), 135 | (u'b', None), 136 | (u'c', None), 137 | (u'd', 4)) 138 | fn = NamedTemporaryFile().name 139 | tocsv(tbl, fn, encoding='utf-8', lineterminator='\n') 140 | 141 | expect = ( 142 | u'col1,colNone\n' 143 | u'a,1\n' 144 | u'b,\n' 145 | u'c,\n' 146 | u'd,4\n' 147 | ) 148 | 149 | uf = io.open(fn, encoding='utf-8', mode='rt', newline='') 150 | actual = uf.read() 151 | eq_(expect, actual) 152 | -------------------------------------------------------------------------------- /petl/test/io/test_html.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | 5 | from tempfile import NamedTemporaryFile 6 | import io 7 | from petl.test.helpers import eq_ 8 | 9 | 10 | from petl.io.html import tohtml 11 | 12 | 13 | def test_tohtml(): 14 | 15 | # exercise function 16 | table = (('foo', 'bar'), 17 | ('a', 1), 18 | ('b', (1, 2)), 19 | ('c', False)) 20 | 21 | f = NamedTemporaryFile(delete=False) 22 | tohtml(table, f.name, encoding='ascii', lineterminator='\n') 23 | 24 | # check what it did 25 | with io.open(f.name, mode='rt', encoding='ascii', newline='') as o: 26 | actual = o.read() 27 | expect = ( 28 | u"\n" 29 | u"\n" 30 | u"\n" 31 | u"\n" 32 | u"\n" 33 | u"\n" 34 | u"\n" 35 | u"\n" 36 | u"\n" 37 | u"\n" 38 | u"\n" 39 | u"\n" 40 | u"\n" 41 | u"\n" 42 | u"\n" 43 | u"\n" 44 | u"\n" 45 | u"\n" 46 | u"\n" 47 | u"\n" 48 | u"\n" 49 | u"
foobar
a1
b(1, 2)
cFalse
\n" 50 | ) 51 | eq_(expect, actual) 52 | 53 | 54 | def test_tohtml_caption(): 55 | 56 | # exercise function 57 | table = (('foo', 'bar'), 58 | ('a', 1), 59 | ('b', (1, 2))) 60 | f = NamedTemporaryFile(delete=False) 61 | tohtml(table, f.name, encoding='ascii', caption='my table', 62 | lineterminator='\n') 63 | 64 | # check what it did 65 | with io.open(f.name, mode='rt', encoding='ascii', newline='') as o: 66 | actual = o.read() 67 | expect = ( 68 | u"\n" 69 | u"\n" 70 | u"\n" 71 | u"\n" 72 | u"\n" 73 | u"\n" 74 | u"\n" 75 | u"\n" 76 | u"\n" 77 | u"\n" 78 | u"\n" 79 | u"\n" 80 | u"\n" 81 | u"\n" 82 | u"\n" 83 | u"\n" 84 | u"\n" 85 | u"\n" 86 | u"
my table
foobar
a1
b(1, 2)
\n" 87 | ) 88 | eq_(expect, actual) 89 | 90 | 91 | def test_tohtml_with_style(): 92 | 93 | # exercise function 94 | table = (('foo', 'bar'), 95 | ('a', 1)) 96 | 97 | f = NamedTemporaryFile(delete=False) 98 | tohtml(table, f.name, encoding='ascii', lineterminator='\n', 99 | tr_style='text-align: right', td_styles='text-align: center') 100 | 101 | # check what it did 102 | with io.open(f.name, mode='rt', encoding='ascii', newline='') as o: 103 | actual = o.read() 104 | expect = ( 105 | u"\n" 106 | u"\n" 107 | u"\n" 108 | u"\n" 109 | u"\n" 110 | u"\n" 111 | u"\n" 112 | u"\n" 113 | u"\n" 114 | u"\n" 115 | u"\n" 116 | u"\n" 117 | u"\n" 118 | u"
foobar
a1
\n" 119 | ) 120 | eq_(expect, actual) 121 | 122 | 123 | def test_tohtml_headerless(): 124 | table = [] 125 | 126 | f = NamedTemporaryFile(delete=False) 127 | tohtml(table, f.name, encoding='ascii', lineterminator='\n') 128 | 129 | # check what it did 130 | with io.open(f.name, mode='rt', encoding='ascii', newline='') as o: 131 | actual = o.read() 132 | expect = ( 133 | u"\n" 134 | u"\n" 135 | u"\n" 136 | u"
\n" 137 | ) 138 | eq_(expect, actual) 139 | -------------------------------------------------------------------------------- /petl/test/io/test_html_unicode.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | 5 | import io 6 | from tempfile import NamedTemporaryFile 7 | from petl.test.helpers import eq_ 8 | 9 | 10 | from petl.io.html import tohtml 11 | 12 | 13 | def test_tohtml(): 14 | 15 | # exercise function 16 | tbl = ((u'name', u'id'), 17 | (u'Արամ Խաչատրյան', 1), 18 | (u'Johann Strauß', 2), 19 | (u'Вагиф Сәмәдоғлу', 3), 20 | (u'章子怡', 4)) 21 | fn = NamedTemporaryFile().name 22 | tohtml(tbl, fn, encoding='utf-8', lineterminator='\n') 23 | 24 | # check what it did 25 | f = io.open(fn, mode='rt', encoding='utf-8', newline='') 26 | actual = f.read() 27 | expect = ( 28 | u"\n" 29 | u"\n" 30 | u"\n" 31 | u"\n" 32 | u"\n" 33 | u"\n" 34 | u"\n" 35 | u"\n" 36 | u"\n" 37 | u"\n" 38 | u"\n" 39 | u"\n" 40 | u"\n" 41 | u"\n" 42 | u"\n" 43 | u"\n" 44 | u"\n" 45 | u"\n" 46 | u"\n" 47 | u"\n" 48 | u"\n" 49 | u"\n" 50 | u"\n" 51 | u"\n" 52 | u"\n" 53 | u"
nameid
Արամ Խաչատրյան1
Johann Strauß2
Вагиф Сәмәдоғлу3
章子怡4
\n" 54 | ) 55 | eq_(expect, actual) 56 | -------------------------------------------------------------------------------- /petl/test/io/test_json_unicode.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | 5 | import json 6 | from tempfile import NamedTemporaryFile 7 | 8 | 9 | from petl.test.helpers import ieq 10 | from petl.io.json import tojson, fromjson 11 | 12 | 13 | def test_json_unicode(): 14 | 15 | tbl = ((u'id', u'name'), 16 | (1, u'Արամ Խաչատրյան'), 17 | (2, u'Johann Strauß'), 18 | (3, u'Вагиф Сәмәдоғлу'), 19 | (4, u'章子怡'), 20 | ) 21 | fn = NamedTemporaryFile().name 22 | tojson(tbl, fn) 23 | 24 | result = json.load(open(fn)) 25 | assert len(result) == 4 26 | for a, b in zip(tbl[1:], result): 27 | assert a[0] == b['id'] 28 | assert a[1] == b['name'] 29 | 30 | actual = fromjson(fn, header=['id', 'name']) 31 | ieq(tbl, actual) 32 | -------------------------------------------------------------------------------- /petl/test/io/test_jsonl.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | from tempfile import NamedTemporaryFile 5 | import json 6 | 7 | from petl import fromjson, tojson 8 | from petl.test.helpers import ieq 9 | 10 | 11 | def test_fromjson_1(): 12 | f = NamedTemporaryFile(delete=False, mode='w') 13 | data = '{"name": "Gilbert", "wins": [["straight", "7S"], ["one pair", "10H"]]}\n' \ 14 | '{"name": "Alexa", "wins": [["two pair", "4S"], ["two pair", "9S"]]}\n' \ 15 | '{"name": "May", "wins": []}\n' \ 16 | '{"name": "Deloise", "wins": [["three of a kind", "5S"]]}' 17 | 18 | f.write(data) 19 | f.close() 20 | 21 | actual = fromjson(f.name, header=['name', 'wins'], lines=True) 22 | 23 | expect = (('name', 'wins'), 24 | ('Gilbert', [["straight", "7S"], ["one pair", "10H"]]), 25 | ('Alexa', [["two pair", "4S"], ["two pair", "9S"]]), 26 | ('May', []), 27 | ('Deloise', [["three of a kind", "5S"]])) 28 | 29 | ieq(expect, actual) 30 | ieq(expect, actual) # verify can iterate twice 31 | 32 | 33 | def test_fromjson_2(): 34 | f = NamedTemporaryFile(delete=False, mode='w') 35 | data = '{"foo": "bar1", "baz": 1}\n' \ 36 | '{"foo": "bar2", "baz": 2}\n' \ 37 | '{"foo": "bar3", "baz": 3}\n' \ 38 | '{"foo": "bar4", "baz": 4}\n' 39 | 40 | f.write(data) 41 | f.close() 42 | 43 | actual = fromjson(f.name, header=['foo', 'baz'], lines=True) 44 | 45 | expect = (('foo', 'baz'), 46 | ('bar1', 1), 47 | ('bar2', 2), 48 | ('bar3', 3), 49 | ('bar4', 4)) 50 | 51 | ieq(expect, actual) 52 | ieq(expect, actual) # verify can iterate twice 53 | 54 | 55 | def test_tojson_1(): 56 | table = (('foo', 'bar'), 57 | ('a', 1), 58 | ('b', 2), 59 | ('c', 2)) 60 | f = NamedTemporaryFile(delete=False, mode='r') 61 | tojson(table, f.name, lines=True) 62 | result = [] 63 | for line in f: 64 | result.append(json.loads(line)) 65 | assert len(result) == 3 66 | assert result[0]['foo'] == 'a' 67 | assert result[0]['bar'] == 1 68 | assert result[1]['foo'] == 'b' 69 | assert result[1]['bar'] == 2 70 | assert result[2]['foo'] == 'c' 71 | assert result[2]['bar'] == 2 72 | 73 | 74 | def test_tojson_2(): 75 | table = [['name', 'wins'], 76 | ['Gilbert', [['straight', '7S'], ['one pair', '10H']]], 77 | ['Alexa', [['two pair', '4S'], ['two pair', '9S']]], 78 | ['May', []], 79 | ['Deloise', [['three of a kind', '5S']]]] 80 | f = NamedTemporaryFile(delete=False, mode='r') 81 | tojson(table, f.name, lines=True) 82 | result = [] 83 | for line in f: 84 | result.append(json.loads(line)) 85 | assert len(result) == 4 86 | assert result[0]['name'] == 'Gilbert' 87 | assert result[0]['wins'] == [['straight', '7S'], ['one pair', '10H']] 88 | assert result[1]['name'] == 'Alexa' 89 | assert result[1]['wins'] == [['two pair', '4S'], ['two pair', '9S']] 90 | assert result[2]['name'] == 'May' 91 | assert result[2]['wins'] == [] 92 | assert result[3]['name'] == 'Deloise' 93 | assert result[3]['wins'] == [['three of a kind', '5S']] 94 | -------------------------------------------------------------------------------- /petl/test/io/test_pandas.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | 4 | 5 | import pytest 6 | 7 | import petl as etl 8 | from petl.test.helpers import ieq 9 | from petl.io.pandas import todataframe, fromdataframe 10 | 11 | 12 | try: 13 | # noinspection PyUnresolvedReferences 14 | import pandas as pd 15 | except ImportError as e: 16 | pytest.skip('SKIP pandas tests: %s' % e, allow_module_level=True) 17 | else: 18 | 19 | def test_todataframe(): 20 | tbl = [('foo', 'bar', 'baz'), 21 | ('apples', 1, 2.5), 22 | ('oranges', 3, 4.4), 23 | ('pears', 7, .1)] 24 | 25 | expect = pd.DataFrame.from_records(tbl[1:], columns=tbl[0]) 26 | actual = todataframe(tbl) 27 | assert expect.equals(actual) 28 | 29 | def test_headerless(): 30 | tbl = [] 31 | expect = pd.DataFrame() 32 | actual = todataframe(tbl) 33 | assert expect.equals(actual) 34 | 35 | def test_fromdataframe(): 36 | tbl = [('foo', 'bar', 'baz'), 37 | ('apples', 1, 2.5), 38 | ('oranges', 3, 4.4), 39 | ('pears', 7, .1)] 40 | df = pd.DataFrame.from_records(tbl[1:], columns=tbl[0]) 41 | ieq(tbl, fromdataframe(df)) 42 | ieq(tbl, fromdataframe(df)) 43 | 44 | def test_integration(): 45 | tbl = [('foo', 'bar', 'baz'), 46 | ('apples', 1, 2.5), 47 | ('oranges', 3, 4.4), 48 | ('pears', 7, .1)] 49 | df = etl.wrap(tbl).todataframe() 50 | tbl2 = etl.fromdataframe(df) 51 | ieq(tbl, tbl2) 52 | ieq(tbl, tbl2) 53 | -------------------------------------------------------------------------------- /petl/test/io/test_pickle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | 5 | from tempfile import NamedTemporaryFile 6 | from petl.compat import pickle 7 | 8 | 9 | from petl.test.helpers import ieq 10 | from petl.io.pickle import frompickle, topickle, appendpickle 11 | 12 | 13 | def picklereader(fl): 14 | try: 15 | while True: 16 | yield pickle.load(fl) 17 | except EOFError: 18 | pass 19 | 20 | 21 | def test_frompickle(): 22 | 23 | f = NamedTemporaryFile(delete=False) 24 | table = (('foo', 'bar'), 25 | ('a', 1), 26 | ('b', 2), 27 | ('c', 2)) 28 | for row in table: 29 | pickle.dump(row, f) 30 | f.close() 31 | 32 | actual = frompickle(f.name) 33 | ieq(table, actual) 34 | ieq(table, actual) # verify can iterate twice 35 | 36 | 37 | def test_topickle_appendpickle(): 38 | 39 | # exercise function 40 | table = (('foo', 'bar'), 41 | ('a', 1), 42 | ('b', 2), 43 | ('c', 2)) 44 | f = NamedTemporaryFile(delete=False) 45 | topickle(table, f.name) 46 | 47 | # check what it did 48 | with open(f.name, 'rb') as o: 49 | actual = picklereader(o) 50 | ieq(table, actual) 51 | 52 | # check appending 53 | table2 = (('foo', 'bar'), 54 | ('d', 7), 55 | ('e', 9), 56 | ('f', 1)) 57 | appendpickle(table2, f.name) 58 | 59 | # check what it did 60 | with open(f.name, 'rb') as o: 61 | actual = picklereader(o) 62 | expect = (('foo', 'bar'), 63 | ('a', 1), 64 | ('b', 2), 65 | ('c', 2), 66 | ('d', 7), 67 | ('e', 9), 68 | ('f', 1)) 69 | ieq(expect, actual) 70 | 71 | 72 | def test_topickle_headerless(): 73 | table = [] 74 | f = NamedTemporaryFile(delete=False) 75 | topickle(table, f.name) 76 | expect = [] 77 | with open(f.name, 'rb') as o: 78 | ieq(expect, picklereader(o)) 79 | -------------------------------------------------------------------------------- /petl/test/io/test_text_unicode.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | 5 | import io 6 | from tempfile import NamedTemporaryFile 7 | from petl.test.helpers import ieq, eq_ 8 | 9 | 10 | from petl.io.text import fromtext, totext 11 | 12 | 13 | def test_fromtext(): 14 | data = ( 15 | u"name,id\n" 16 | u"Արամ Խաչատրյան,1\n" 17 | u"Johann Strauß,2\n" 18 | u"Вагиф Сәмәдоғлу,3\n" 19 | u"章子怡,4\n" 20 | ) 21 | fn = NamedTemporaryFile().name 22 | f = io.open(fn, encoding='utf-8', mode='wt') 23 | f.write(data) 24 | f.close() 25 | 26 | actual = fromtext(fn, encoding='utf-8') 27 | expect = ((u'lines',), 28 | (u'name,id',), 29 | (u'Արամ Խաչատրյան,1',), 30 | (u'Johann Strauß,2',), 31 | (u'Вагиф Сәмәдоғлу,3',), 32 | (u'章子怡,4',), 33 | ) 34 | ieq(expect, actual) 35 | ieq(expect, actual) # verify can iterate twice 36 | 37 | 38 | def test_totext(): 39 | 40 | # exercise function 41 | tbl = ((u'name', u'id'), 42 | (u'Արամ Խաչատրյան', 1), 43 | (u'Johann Strauß', 2), 44 | (u'Вагиф Сәмәдоғлу', 3), 45 | (u'章子怡', 4), 46 | ) 47 | prologue = ( 48 | u"{| class='wikitable'\n" 49 | u"|-\n" 50 | u"! name\n" 51 | u"! id\n" 52 | ) 53 | template = ( 54 | u"|-\n" 55 | u"| {name}\n" 56 | u"| {id}\n" 57 | ) 58 | epilogue = u"|}\n" 59 | fn = NamedTemporaryFile().name 60 | totext(tbl, fn, template=template, prologue=prologue, 61 | epilogue=epilogue, encoding='utf-8') 62 | 63 | # check what it did 64 | f = io.open(fn, encoding='utf-8', mode='rt') 65 | actual = f.read() 66 | expect = ( 67 | u"{| class='wikitable'\n" 68 | u"|-\n" 69 | u"! name\n" 70 | u"! id\n" 71 | u"|-\n" 72 | u"| Արամ Խաչատրյան\n" 73 | u"| 1\n" 74 | u"|-\n" 75 | u"| Johann Strauß\n" 76 | u"| 2\n" 77 | u"|-\n" 78 | u"| Вагиф Сәмәдоғлу\n" 79 | u"| 3\n" 80 | u"|-\n" 81 | u"| 章子怡\n" 82 | u"| 4\n" 83 | u"|}\n" 84 | ) 85 | eq_(expect, actual) 86 | -------------------------------------------------------------------------------- /petl/test/resources/test.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petl-developers/petl/43925d008bd1d98f90204b3db74d88b3fee27a69/petl/test/resources/test.xls -------------------------------------------------------------------------------- /petl/test/resources/test.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/petl-developers/petl/43925d008bd1d98f90204b3db74d88b3fee27a69/petl/test/resources/test.xlsx -------------------------------------------------------------------------------- /petl/test/resources/test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Default 6 | python 2.7 7 | 8 | /petl/src 9 | 10 | 11 | -------------------------------------------------------------------------------- /petl/test/test_fluent.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | from tempfile import NamedTemporaryFile 5 | import csv 6 | from petl.compat import PY2 7 | 8 | 9 | import petl as etl 10 | from petl.test.helpers import ieq, eq_ 11 | 12 | 13 | def test_basics(): 14 | 15 | t1 = (('foo', 'bar'), 16 | ('A', 1), 17 | ('B', 2)) 18 | w1 = etl.wrap(t1) 19 | 20 | eq_(('foo', 'bar'), w1.header()) 21 | eq_(etl.header(w1), w1.header()) 22 | ieq((('A', 1), ('B', 2)), w1.data()) 23 | ieq(etl.data(w1), w1.data()) 24 | 25 | w2 = w1.cut('bar', 'foo') 26 | expect2 = (('bar', 'foo'), 27 | (1, 'A'), 28 | (2, 'B')) 29 | ieq(expect2, w2) 30 | ieq(etl.cut(w1, 'bar', 'foo'), w2) 31 | 32 | w3 = w1.cut('bar', 'foo').cut('foo', 'bar') 33 | ieq(t1, w3) 34 | 35 | 36 | def test_staticmethods(): 37 | 38 | data = [b'foo,bar', 39 | b'a,1', 40 | b'b,2', 41 | b'c,2'] 42 | f = NamedTemporaryFile(mode='wb', delete=False) 43 | f.write(b'\n'.join(data)) 44 | f.close() 45 | 46 | expect = (('foo', 'bar'), 47 | ('a', '1'), 48 | ('b', '2'), 49 | ('c', '2')) 50 | actual = etl.fromcsv(f.name, encoding='ascii') 51 | ieq(expect, actual) 52 | ieq(expect, actual) # verify can iterate twice 53 | 54 | 55 | def test_container(): 56 | table = (('foo', 'bar'), 57 | ('a', 1), 58 | ('b', 2), 59 | ('c', 2)) 60 | actual = etl.wrap(table)[0] 61 | expect = ('foo', 'bar') 62 | eq_(expect, actual) 63 | actual = etl.wrap(table)['bar'] 64 | expect = (1, 2, 2) 65 | ieq(expect, actual) 66 | actual = len(etl.wrap(table)) 67 | expect = 4 68 | eq_(expect, actual) 69 | 70 | 71 | def test_values_container_convenience_methods(): 72 | table = etl.wrap((('foo', 'bar'), 73 | ('a', 1), 74 | ('b', 2), 75 | ('c', 2))) 76 | 77 | actual = table.values('foo').set() 78 | expect = {'a', 'b', 'c'} 79 | eq_(expect, actual) 80 | 81 | actual = table.values('foo').list() 82 | expect = ['a', 'b', 'c'] 83 | eq_(expect, actual) 84 | 85 | actual = table.values('foo').tuple() 86 | expect = ('a', 'b', 'c') 87 | eq_(expect, actual) 88 | 89 | actual = table.values('bar').sum() 90 | expect = 5 91 | eq_(expect, actual) 92 | 93 | actual = table.data().dict() 94 | expect = {'a': 1, 'b': 2, 'c': 2} 95 | eq_(expect, actual) 96 | 97 | 98 | def test_empty(): 99 | 100 | actual = ( 101 | etl 102 | .empty() 103 | .addcolumn('foo', ['a', 'b', 'c']) 104 | .addcolumn('bar', [1, 2, 2]) 105 | ) 106 | expect = (('foo', 'bar'), 107 | ('a', 1), 108 | ('b', 2), 109 | ('c', 2)) 110 | ieq(expect, actual) 111 | ieq(expect, actual) 112 | 113 | 114 | def test_wrap_tuple_return(): 115 | tablea = etl.wrap((('foo', 'bar'), 116 | ('A', 1), 117 | ('C', 7))) 118 | tableb = etl.wrap((('foo', 'bar'), 119 | ('B', 5), 120 | ('C', 7))) 121 | 122 | added, removed = tablea.diff(tableb) 123 | eq_(('foo', 'bar'), added.header()) 124 | eq_(('foo', 'bar'), removed.header()) 125 | ieq(etl.data(added), added.data()) 126 | ieq(etl.data(removed), removed.data()) 127 | -------------------------------------------------------------------------------- /petl/test/test_helpers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | import pytest 5 | 6 | from petl.test.helpers import eq_, ieq, get_env_vars_named 7 | 8 | GET_ENV_PREFIX = "PETL_TEST_HELPER_ENVVAR_" 9 | 10 | 11 | def _testcase_get_env_vars_named(num_vals, prefix=""): 12 | res = {} 13 | for i in range(1, num_vals, 1): 14 | reskey = prefix + str(i) 15 | res[reskey] = str(i) 16 | return res 17 | 18 | 19 | @pytest.fixture() 20 | def setup_helpers_get_env_vars_named(monkeypatch): 21 | varlist = _testcase_get_env_vars_named(3, prefix=GET_ENV_PREFIX) 22 | for k, v in varlist.items(): 23 | monkeypatch.setenv(k, v) 24 | 25 | 26 | def test_helper_get_env_vars_named_prefixed(setup_helpers_get_env_vars_named): 27 | expected = _testcase_get_env_vars_named(3, GET_ENV_PREFIX) 28 | found = get_env_vars_named(GET_ENV_PREFIX, remove_prefix=False) 29 | ieq(found, expected) 30 | 31 | 32 | def test_helper_get_env_vars_named_unprefixed(setup_helpers_get_env_vars_named): 33 | expected = _testcase_get_env_vars_named(3) 34 | found = get_env_vars_named(GET_ENV_PREFIX, remove_prefix=True) 35 | ieq(found, expected) 36 | 37 | 38 | def test_helper_get_env_vars_named_not_found(setup_helpers_get_env_vars_named): 39 | expected = None 40 | found = get_env_vars_named("PETL_TEST_HELPER_ENVVAR_NOT_FOUND_") 41 | eq_(found, expected) 42 | -------------------------------------------------------------------------------- /petl/test/test_interactive.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | import petl as etl 5 | from petl.test.helpers import eq_ 6 | 7 | 8 | def test_repr(): 9 | table = (('foo', 'bar'), 10 | ('a', 1), 11 | ('b', 2), 12 | ('c', 2)) 13 | expect = str(etl.look(table)) 14 | actual = repr(etl.wrap(table)) 15 | eq_(expect, actual) 16 | 17 | 18 | def test_str(): 19 | table = (('foo', 'bar'), 20 | ('a', 1), 21 | ('b', 2), 22 | ('c', 2)) 23 | expect = str(etl.look(table, vrepr=str)) 24 | actual = str(etl.wrap(table)) 25 | eq_(expect, actual) 26 | 27 | 28 | def test_repr_html(): 29 | table = (('foo', 'bar'), 30 | ('a', 1), 31 | ('b', 2), 32 | ('c', 2)) 33 | expect = """ 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
foobar
a1
b2
c2
55 | """ 56 | actual = etl.wrap(table)._repr_html_() 57 | for l1, l2 in zip(expect.split('\n'), actual.split('\n')): 58 | eq_(l1, l2) 59 | 60 | 61 | def test_repr_html_limit(): 62 | table = (('foo', 'bar'), 63 | ('a', 1), 64 | ('b', 2), 65 | ('c', 2)) 66 | 67 | # lower limit 68 | etl.config.display_limit = 2 69 | 70 | expect = """ 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 |
foobar
a1
b2
88 |

...

89 | """ 90 | actual = etl.wrap(table)._repr_html_() 91 | print(actual) 92 | for l1, l2 in zip(expect.split('\n'), actual.split('\n')): 93 | eq_(l1, l2) 94 | -------------------------------------------------------------------------------- /petl/test/transform/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | -------------------------------------------------------------------------------- /petl/test/transform/test_fills.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | from petl.test.helpers import ieq 5 | from petl.transform.fills import filldown, fillleft, fillright 6 | 7 | 8 | def test_filldown(): 9 | 10 | table = (('foo', 'bar', 'baz'), 11 | (1, 'a', None), 12 | (1, None, .23), 13 | (1, 'b', None), 14 | (2, None, None), 15 | (2, None, .56), 16 | (2, 'c', None), 17 | (None, 'c', .72)) 18 | 19 | actual = filldown(table) 20 | expect = (('foo', 'bar', 'baz'), 21 | (1, 'a', None), 22 | (1, 'a', .23), 23 | (1, 'b', .23), 24 | (2, 'b', .23), 25 | (2, 'b', .56), 26 | (2, 'c', .56), 27 | (2, 'c', .72)) 28 | ieq(expect, actual) 29 | ieq(expect, actual) 30 | 31 | actual = filldown(table, 'bar') 32 | expect = (('foo', 'bar', 'baz'), 33 | (1, 'a', None), 34 | (1, 'a', .23), 35 | (1, 'b', None), 36 | (2, 'b', None), 37 | (2, 'b', .56), 38 | (2, 'c', None), 39 | (None, 'c', .72)) 40 | ieq(expect, actual) 41 | ieq(expect, actual) 42 | 43 | actual = filldown(table, 'foo', 'bar') 44 | expect = (('foo', 'bar', 'baz'), 45 | (1, 'a', None), 46 | (1, 'a', .23), 47 | (1, 'b', None), 48 | (2, 'b', None), 49 | (2, 'b', .56), 50 | (2, 'c', None), 51 | (2, 'c', .72)) 52 | ieq(expect, actual) 53 | ieq(expect, actual) 54 | 55 | 56 | def test_filldown_headerless(): 57 | table = [] 58 | actual = filldown(table, 'foo') 59 | expect = [] 60 | ieq(expect, actual) 61 | 62 | 63 | def test_fillright(): 64 | 65 | table = (('foo', 'bar', 'baz'), 66 | (1, 'a', None), 67 | (1, None, .23), 68 | (1, 'b', None), 69 | (2, None, None), 70 | (2, None, .56), 71 | (2, 'c', None), 72 | (None, 'c', .72)) 73 | 74 | actual = fillright(table) 75 | expect = (('foo', 'bar', 'baz'), 76 | (1, 'a', 'a'), 77 | (1, 1, .23), 78 | (1, 'b', 'b'), 79 | (2, 2, 2), 80 | (2, 2, .56), 81 | (2, 'c', 'c'), 82 | (None, 'c', .72)) 83 | ieq(expect, actual) 84 | ieq(expect, actual) 85 | 86 | 87 | def test_fillright_headerless(): 88 | table = [] 89 | actual = fillright(table, 'foo') 90 | expect = [] 91 | ieq(expect, actual) 92 | 93 | 94 | def test_fillleft(): 95 | 96 | table = (('foo', 'bar', 'baz'), 97 | (1, 'a', None), 98 | (1, None, .23), 99 | (1, 'b', None), 100 | (2, None, None), 101 | (None, None, .56), 102 | (2, 'c', None), 103 | (None, 'c', .72)) 104 | 105 | actual = fillleft(table) 106 | expect = (('foo', 'bar', 'baz'), 107 | (1, 'a', None), 108 | (1, .23, .23), 109 | (1, 'b', None), 110 | (2, None, None), 111 | (.56, .56, .56), 112 | (2, 'c', None), 113 | ('c', 'c', .72)) 114 | ieq(expect, actual) 115 | ieq(expect, actual) 116 | 117 | 118 | def test_fillleft_headerless(): 119 | table = [] 120 | actual = fillleft(table, 'foo') 121 | expect = [] 122 | ieq(expect, actual) 123 | -------------------------------------------------------------------------------- /petl/test/transform/test_validation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | 5 | import logging 6 | 7 | import pytest 8 | 9 | import petl as etl 10 | from petl.transform.validation import validate 11 | from petl.test.helpers import ieq 12 | from petl.errors import FieldSelectionError 13 | 14 | 15 | logger = logging.getLogger(__name__) 16 | debug = logger.debug 17 | 18 | 19 | def test_constraints(): 20 | 21 | constraints = [ 22 | dict(name='C1', field='foo', test=int), 23 | dict(name='C2', field='bar', test=etl.dateparser('%Y-%m-%d')), 24 | dict(name='C3', field='baz', assertion=lambda v: v in ['Y', 'N']), 25 | dict(name='C4', assertion=lambda row: None not in row) 26 | ] 27 | 28 | table = (('foo', 'bar', 'baz'), 29 | (1, '2000-01-01', 'Y'), 30 | ('x', '2010-10-10', 'N'), 31 | (2, '2000/01/01', 'Y'), 32 | (3, '2015-12-12', 'x'), 33 | (4, None, 'N'), 34 | ('y', '1999-99-99', 'z')) 35 | 36 | expect = (('name', 'row', 'field', 'value', 'error'), 37 | ('C1', 2, 'foo', 'x', 'ValueError'), 38 | ('C2', 3, 'bar', '2000/01/01', 'ValueError'), 39 | ('C3', 4, 'baz', 'x', 'AssertionError'), 40 | ('C2', 5, 'bar', None, 'AttributeError'), 41 | ('C4', 5, None, None, 'AssertionError'), 42 | ('C1', 6, 'foo', 'y', 'ValueError'), 43 | ('C2', 6, 'bar', '1999-99-99', 'ValueError'), 44 | ('C3', 6, 'baz', 'z', 'AssertionError')) 45 | 46 | actual = validate(table, constraints) 47 | debug(actual) 48 | 49 | ieq(expect, actual) 50 | ieq(expect, actual) 51 | 52 | 53 | def test_non_optional_constraint_with_missing_field(): 54 | constraints = [ 55 | dict(name='C1', field='foo', test=int), 56 | ] 57 | 58 | table = (('bar', 'baz'), 59 | ('1999-99-99', 'z')) 60 | 61 | actual = validate(table, constraints) 62 | with pytest.raises(FieldSelectionError): 63 | debug(actual) 64 | 65 | 66 | def test_optional_constraint_with_missing_field(): 67 | constraints = [ 68 | dict(name='C1', field='foo', test=int, optional=True), 69 | ] 70 | 71 | table = (('bar', 'baz'), 72 | ('1999-99-99', 'z')) 73 | 74 | expect = (('name', 'row', 'field', 'value', 'error'),) 75 | 76 | actual = validate(table, constraints) 77 | debug(actual) 78 | 79 | ieq(expect, actual) 80 | 81 | 82 | def test_row_length(): 83 | 84 | table = (('foo', 'bar', 'baz'), 85 | (1, '2000-01-01', 'Y'), 86 | ('x', '2010-10-10'), 87 | (2, '2000/01/01', 'Y', True)) 88 | 89 | expect = (('name', 'row', 'field', 'value', 'error'), 90 | ('__len__', 2, None, 2, 'AssertionError'), 91 | ('__len__', 3, None, 4, 'AssertionError')) 92 | 93 | actual = validate(table) 94 | debug(actual) 95 | 96 | ieq(expect, actual) 97 | ieq(expect, actual) 98 | 99 | 100 | def test_header(): 101 | 102 | header = ('foo', 'bar', 'baz') 103 | 104 | table = (('foo', 'bar', 'bazzz'), 105 | (1, '2000-01-01', 'Y'), 106 | ('x', '2010-10-10', 'N')) 107 | 108 | expect = (('name', 'row', 'field', 'value', 'error'), 109 | ('__header__', 0, None, None, 'AssertionError')) 110 | 111 | actual = validate(table, header=header) 112 | debug(actual) 113 | 114 | ieq(expect, actual) 115 | ieq(expect, actual) 116 | 117 | header = ('foo', 'bar', 'baz', 'quux') 118 | 119 | table = (('foo', 'bar', 'baz'), 120 | (1, '2000-01-01', 'Y'), 121 | ('x', '2010-10-10', 'N')) 122 | 123 | expect = (('name', 'row', 'field', 'value', 'error'), 124 | ('__header__', 0, None, None, 'AssertionError'), 125 | ('__len__', 1, None, 3, 'AssertionError'), 126 | ('__len__', 2, None, 3, 'AssertionError')) 127 | 128 | actual = validate(table, header=header) 129 | debug(actual) 130 | 131 | ieq(expect, actual) 132 | ieq(expect, actual) 133 | 134 | 135 | def test_validation_headerless(): 136 | header = ('foo', 'bar', 'baz') 137 | table = [] 138 | # Expect only a missing header - no exceptions please 139 | expect = (('name', 'row', 'field', 'value', 'error'), 140 | ('__header__', 0, None, None, 'AssertionError')) 141 | actual = validate(table, header=header) 142 | ieq(expect, actual) 143 | ieq(expect, actual) 144 | -------------------------------------------------------------------------------- /petl/test/util/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division -------------------------------------------------------------------------------- /petl/test/util/test_materialise.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | import pytest 4 | 5 | from petl.errors import FieldSelectionError 6 | from petl.test.helpers import eq_ 7 | from petl.util.materialise import columns, facetcolumns 8 | 9 | 10 | def test_columns(): 11 | 12 | table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]] 13 | cols = columns(table) 14 | eq_(['a', 'b', 'b'], cols['foo']) 15 | eq_([1, 2, 3], cols['bar']) 16 | 17 | 18 | def test_columns_empty(): 19 | table = [('foo', 'bar')] 20 | cols = columns(table) 21 | eq_([], cols['foo']) 22 | eq_([], cols['bar']) 23 | 24 | 25 | def test_columns_headerless(): 26 | table = [] 27 | cols = columns(table) 28 | eq_({}, cols) 29 | 30 | 31 | def test_facetcolumns(): 32 | 33 | table = [['foo', 'bar', 'baz'], 34 | ['a', 1, True], 35 | ['b', 2, True], 36 | ['b', 3]] 37 | 38 | fc = facetcolumns(table, 'foo') 39 | eq_(['a'], fc['a']['foo']) 40 | eq_([1], fc['a']['bar']) 41 | eq_([True], fc['a']['baz']) 42 | eq_(['b', 'b'], fc['b']['foo']) 43 | eq_([2, 3], fc['b']['bar']) 44 | eq_([True, None], fc['b']['baz']) 45 | 46 | 47 | def test_facetcolumns_headerless(): 48 | table = [] 49 | with pytest.raises(FieldSelectionError): 50 | facetcolumns(table, 'foo') 51 | -------------------------------------------------------------------------------- /petl/test/util/test_misc.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | from petl.test.helpers import eq_ 5 | from petl.compat import PY2 6 | from petl.util.misc import typeset, diffvalues, diffheaders 7 | 8 | 9 | def test_typeset(): 10 | 11 | table = (('foo', 'bar', 'baz'), 12 | (b'A', 1, u'2'), 13 | (b'B', '2', u'3.4'), 14 | (b'B', '3', u'7.8', True), 15 | (u'D', u'xyz', 9.0), 16 | (b'E', 42)) 17 | 18 | actual = typeset(table, 'foo') 19 | if PY2: 20 | expect = {'str', 'unicode'} 21 | else: 22 | expect = {'bytes', 'str'} 23 | eq_(expect, actual) 24 | 25 | 26 | def test_diffheaders(): 27 | 28 | table1 = (('foo', 'bar', 'baz'), 29 | ('a', 1, .3)) 30 | 31 | table2 = (('baz', 'bar', 'quux'), 32 | ('a', 1, .3)) 33 | 34 | add, sub = diffheaders(table1, table2) 35 | eq_({'quux'}, add) 36 | eq_({'foo'}, sub) 37 | 38 | 39 | def test_diffvalues(): 40 | 41 | table1 = (('foo', 'bar'), 42 | ('a', 1), 43 | ('b', 3)) 44 | 45 | table2 = (('bar', 'foo'), 46 | (1, 'a'), 47 | (3, 'c')) 48 | 49 | add, sub = diffvalues(table1, table2, 'foo') 50 | eq_({'c'}, add) 51 | eq_({'b'}, sub) 52 | 53 | 54 | -------------------------------------------------------------------------------- /petl/test/util/test_parsers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | from petl.compat import maxint 5 | from petl.test.helpers import eq_ 6 | from petl.util.parsers import numparser, datetimeparser 7 | 8 | 9 | def test_numparser(): 10 | 11 | parsenumber = numparser() 12 | assert parsenumber('1') == 1 13 | assert parsenumber('1.0') == 1.0 14 | assert parsenumber(str(maxint + 1)) == maxint + 1 15 | assert parsenumber('3+4j') == 3 + 4j 16 | assert parsenumber('aaa') == 'aaa' 17 | assert parsenumber(None) is None 18 | 19 | 20 | def test_numparser_strict(): 21 | 22 | parsenumber = numparser(strict=True) 23 | assert parsenumber('1') == 1 24 | assert parsenumber('1.0') == 1.0 25 | assert parsenumber(str(maxint + 1)) == maxint + 1 26 | assert parsenumber('3+4j') == 3 + 4j 27 | try: 28 | parsenumber('aaa') 29 | except ValueError: 30 | pass # expected 31 | else: 32 | assert False, 'expected exception' 33 | try: 34 | parsenumber(None) 35 | except TypeError: 36 | pass # expected 37 | else: 38 | assert False, 'expected exception' 39 | 40 | 41 | def test_laxparsers(): 42 | 43 | p1 = datetimeparser('%Y-%m-%dT%H:%M:%S') 44 | try: 45 | p1('2002-12-25 00:00:00') 46 | except ValueError: 47 | pass 48 | else: 49 | assert False, 'expected exception' 50 | 51 | p2 = datetimeparser('%Y-%m-%dT%H:%M:%S', strict=False) 52 | try: 53 | v = p2('2002-12-25 00:00:00') 54 | except ValueError: 55 | assert False, 'did not expect exception' 56 | else: 57 | eq_('2002-12-25 00:00:00', v) 58 | -------------------------------------------------------------------------------- /petl/test/util/test_random.py: -------------------------------------------------------------------------------- 1 | import random as pyrandom 2 | import time 3 | from functools import partial 4 | 5 | from petl.util.random import randomseed, randomtable, RandomTable, dummytable, DummyTable 6 | 7 | 8 | def test_randomseed(): 9 | """ 10 | Ensure that randomseed provides a non-empty string that changes. 11 | """ 12 | seed_1 = randomseed() 13 | time.sleep(1) 14 | seed_2 = randomseed() 15 | 16 | assert isinstance(seed_1, str) 17 | assert seed_1 != "" 18 | assert seed_1 != seed_2 19 | 20 | 21 | def test_randomtable(): 22 | """ 23 | Ensure that randomtable provides a table with the right number of rows and columns. 24 | """ 25 | columns, rows = 3, 10 26 | table = randomtable(columns, rows) 27 | 28 | assert len(table[0]) == columns 29 | assert len(table) == rows + 1 30 | 31 | 32 | def test_randomtable_class(): 33 | """ 34 | Ensure that RandomTable provides a table with the right number of rows and columns. 35 | """ 36 | columns, rows = 4, 60 37 | table = RandomTable(numflds=columns, numrows=rows) 38 | 39 | assert len(table[0]) == columns 40 | assert len(table) == rows + 1 41 | 42 | 43 | def test_dummytable_custom_fields(): 44 | """ 45 | Ensure that dummytable provides a table with the right number of rows 46 | and that it accepts and uses custom column names provided. 47 | """ 48 | columns = ( 49 | ('count', partial(pyrandom.randint, 0, 100)), 50 | ('pet', partial(pyrandom.choice, ['dog', 'cat', 'cow', ])), 51 | ('color', partial(pyrandom.choice, ['yellow', 'orange', 'brown'])), 52 | ('value', pyrandom.random), 53 | ) 54 | rows = 35 55 | 56 | table = dummytable(numrows=rows, fields=columns) 57 | assert table[0] == ('count', 'pet', 'color', 'value') 58 | assert len(table) == rows + 1 59 | 60 | 61 | def test_dummytable_no_seed(): 62 | """ 63 | Ensure that dummytable provides a table with the right number of rows 64 | and columns when not provided with a seed. 65 | """ 66 | rows = 35 67 | 68 | table = dummytable(numrows=rows) 69 | assert len(table[0]) == 3 70 | assert len(table) == rows + 1 71 | 72 | 73 | def test_dummytable_int_seed(): 74 | """ 75 | Ensure that dummytable provides a table with the right number of rows 76 | and columns when provided with an integer as a seed. 77 | """ 78 | rows = 35 79 | seed = 42 80 | table = dummytable(numrows=rows, seed=seed) 81 | assert len(table[0]) == 3 82 | assert len(table) == rows + 1 83 | 84 | 85 | def test_dummytable_class(): 86 | """ 87 | Ensure that DummyTable provides a table with the right number of rows 88 | and columns. 89 | """ 90 | rows = 70 91 | table = DummyTable(numrows=rows) 92 | 93 | assert len(table) == rows + 1 94 | -------------------------------------------------------------------------------- /petl/test/util/test_statistics.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | from petl.test.helpers import eq_ 5 | from petl.util.statistics import stats 6 | 7 | 8 | def test_stats(): 9 | 10 | table = (('foo', 'bar', 'baz'), 11 | ('A', 1, 2), 12 | ('B', '2', '3.4'), 13 | ('B', '3', '7.8', True), 14 | ('D', 'xyz', 9.0), 15 | ('E', None)) 16 | 17 | result = stats(table, 'bar') 18 | eq_(1.0, result.min) 19 | eq_(3.0, result.max) 20 | eq_(6.0, result.sum) 21 | eq_(3, result.count) 22 | eq_(2, result.errors) 23 | eq_(2.0, result.mean) 24 | eq_(2/3, result.pvariance) 25 | eq_((2/3)**.5, result.pstdev) 26 | -------------------------------------------------------------------------------- /petl/test/util/test_timing.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | from petl.util.counting import nrows 5 | from petl.util.timing import progress, log_progress 6 | 7 | 8 | def test_progress(): 9 | # make sure progress doesn't raise exception 10 | table = (('foo', 'bar', 'baz'), 11 | ('a', 1, True), 12 | ('b', 2, True), 13 | ('b', 3)) 14 | nrows(progress(table)) 15 | 16 | def test_log_progress(): 17 | # make sure log_progress doesn't raise exception 18 | table = (('foo', 'bar', 'baz'), 19 | ('a', 1, True), 20 | ('b', 2, True), 21 | ('b', 3)) 22 | nrows(log_progress(table)) 23 | -------------------------------------------------------------------------------- /petl/transform/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | from petl.transform.basics import cut, cutout, movefield, cat, annex, \ 4 | addfield, addfieldusingcontext, addrownumbers, addcolumn, rowslice, head, \ 5 | tail, skipcomments, stack, addfields 6 | 7 | from petl.transform.headers import rename, setheader, extendheader, \ 8 | pushheader, skip, prefixheader, suffixheader, sortheader 9 | 10 | from petl.transform.conversions import convert, convertall, replace, \ 11 | replaceall, update, convertnumbers, format, formatall, interpolate, \ 12 | interpolateall 13 | 14 | from petl.transform.sorts import sort, mergesort, issorted 15 | 16 | from petl.transform.selects import select, selectop, selectcontains, \ 17 | selecteq, selectfalse, selectge, selectgt, selectin, selectis, \ 18 | selectisinstance, selectisnot, selectle, selectlt, selectne, selectnone, \ 19 | selectnotin, selectnotnone, selectrangeclosed, selectrangeopen, \ 20 | selectrangeopenleft, selectrangeopenright, selecttrue, \ 21 | selectusingcontext, rowlenselect, facet, biselect 22 | 23 | from petl.transform.joins import join, leftjoin, rightjoin, outerjoin, \ 24 | crossjoin, antijoin, lookupjoin, unjoin 25 | 26 | from petl.transform.hashjoins import hashjoin, hashleftjoin, hashrightjoin, \ 27 | hashantijoin, hashlookupjoin 28 | 29 | from petl.transform.reductions import rowreduce, mergeduplicates,\ 30 | aggregate, groupcountdistinctvalues, groupselectfirst, groupselectmax, \ 31 | groupselectmin, merge, fold, Conflict, groupselectlast 32 | 33 | from petl.transform.fills import filldown, fillright, fillleft 34 | 35 | from petl.transform.regex import capture, split, search, searchcomplement, \ 36 | sub, splitdown 37 | 38 | from petl.transform.reshape import melt, recast, transpose, pivot, flatten, \ 39 | unflatten 40 | 41 | from petl.transform.maps import fieldmap, rowmap, rowmapmany, rowgroupmap 42 | 43 | from petl.transform.unpacks import unpack, unpackdict 44 | 45 | from petl.transform.dedup import duplicates, unique, distinct, conflicts, \ 46 | isunique 47 | 48 | from petl.transform.setops import complement, intersection, \ 49 | recordcomplement, diff, recorddiff, hashintersection, hashcomplement 50 | 51 | from petl.transform.intervals import intervaljoin, intervalleftjoin, \ 52 | intervaljoinvalues, intervalantijoin, intervallookup, intervallookupone, \ 53 | intervalrecordlookup, intervalrecordlookupone, intervalsubtract, \ 54 | facetintervallookup, facetintervallookupone, facetintervalrecordlookup, \ 55 | facetintervalrecordlookupone, collapsedintervals 56 | 57 | from petl.transform.validation import validate 58 | -------------------------------------------------------------------------------- /petl/util/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | from petl.util.base import Table, Record, values, header, data, \ 5 | fieldnames, records, dicts, namedtuples, expr, rowgroupby, empty, wrap 6 | 7 | from petl.util.lookups import lookup, lookupone, dictlookup, dictlookupone, \ 8 | recordlookup, recordlookupone 9 | 10 | from petl.util.parsers import dateparser, timeparser, datetimeparser, \ 11 | numparser, boolparser 12 | 13 | from petl.util.vis import look, lookall, lookstr, lookallstr, see 14 | 15 | from petl.util.random import randomtable, dummytable 16 | 17 | from petl.util.counting import parsecounter, parsecounts, typecounter, \ 18 | typecounts, valuecount, valuecounter, valuecounts, stringpatterncounter, \ 19 | stringpatterns, rowlengths, nrows 20 | 21 | from petl.util.materialise import listoflists, listoftuples, tupleoflists, \ 22 | tupleoftuples, columns, facetcolumns 23 | 24 | from petl.util.timing import progress, log_progress, clock 25 | 26 | from petl.util.statistics import limits, stats 27 | 28 | from petl.util.misc import typeset, diffheaders, diffvalues, nthword, strjoin, \ 29 | coalesce 30 | -------------------------------------------------------------------------------- /petl/util/misc.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | from petl.util.base import values, header, Table 5 | 6 | 7 | def typeset(table, field): 8 | """ 9 | Return a set containing all Python types found for values in the given 10 | field. E.g.:: 11 | 12 | >>> import petl as etl 13 | >>> table = [['foo', 'bar', 'baz'], 14 | ... ['A', 1, '2'], 15 | ... ['B', u'2', '3.4'], 16 | ... [u'B', u'3', '7.8', True], 17 | ... ['D', u'xyz', 9.0], 18 | ... ['E', 42]] 19 | >>> sorted(etl.typeset(table, 'foo')) 20 | ['str'] 21 | >>> sorted(etl.typeset(table, 'bar')) 22 | ['int', 'str'] 23 | >>> sorted(etl.typeset(table, 'baz')) 24 | ['NoneType', 'float', 'str'] 25 | 26 | The `field` argument can be a field name or index (starting from zero). 27 | 28 | """ 29 | 30 | s = set() 31 | for v in values(table, field): 32 | try: 33 | s.add(type(v).__name__) 34 | except IndexError: 35 | pass # ignore short rows 36 | return s 37 | 38 | 39 | Table.typeset = typeset 40 | 41 | 42 | def diffheaders(t1, t2): 43 | """ 44 | Return the difference between the headers of the two tables as a pair of 45 | sets. E.g.:: 46 | 47 | >>> import petl as etl 48 | >>> table1 = [['foo', 'bar', 'baz'], 49 | ... ['a', 1, .3]] 50 | >>> table2 = [['baz', 'bar', 'quux'], 51 | ... ['a', 1, .3]] 52 | >>> add, sub = etl.diffheaders(table1, table2) 53 | >>> add 54 | {'quux'} 55 | >>> sub 56 | {'foo'} 57 | 58 | """ 59 | 60 | t1h = set(header(t1)) 61 | t2h = set(header(t2)) 62 | return t2h - t1h, t1h - t2h 63 | 64 | 65 | Table.diffheaders = diffheaders 66 | 67 | 68 | def diffvalues(t1, t2, f): 69 | """ 70 | Return the difference between the values under the given field in the two 71 | tables, e.g.:: 72 | 73 | >>> import petl as etl 74 | >>> table1 = [['foo', 'bar'], 75 | ... ['a', 1], 76 | ... ['b', 3]] 77 | >>> table2 = [['bar', 'foo'], 78 | ... [1, 'a'], 79 | ... [3, 'c']] 80 | >>> add, sub = etl.diffvalues(table1, table2, 'foo') 81 | >>> add 82 | {'c'} 83 | >>> sub 84 | {'b'} 85 | 86 | """ 87 | 88 | t1v = set(values(t1, f)) 89 | t2v = set(values(t2, f)) 90 | return t2v - t1v, t1v - t2v 91 | 92 | 93 | Table.diffvalues = diffvalues 94 | 95 | 96 | def strjoin(s): 97 | """ 98 | Return a function to join sequences using `s` as the separator. Intended 99 | for use with :func:`petl.transform.conversions.convert`. 100 | 101 | """ 102 | 103 | return lambda l: s.join(map(str, l)) 104 | 105 | 106 | def nthword(n, sep=None): 107 | """ 108 | Construct a function to return the nth word in a string. E.g.:: 109 | 110 | >>> import petl as etl 111 | >>> s = 'foo bar' 112 | >>> f = etl.nthword(0) 113 | >>> f(s) 114 | 'foo' 115 | >>> g = etl.nthword(1) 116 | >>> g(s) 117 | 'bar' 118 | 119 | Intended for use with :func:`petl.transform.conversions.convert`. 120 | 121 | """ 122 | 123 | return lambda s: s.split(sep)[n] 124 | 125 | 126 | def coalesce(*fields, **kwargs): 127 | """ 128 | Return a function which accepts a row and returns the first non-missing 129 | value from the specified fields. Intended for use with 130 | :func:`petl.transform.basics.addfield`. 131 | 132 | """ 133 | missing = kwargs.get('missing', None) 134 | default = kwargs.get('default', None) 135 | 136 | def _coalesce(row): 137 | for f in fields: 138 | v = row[f] 139 | if v is not missing: 140 | return v 141 | return default 142 | 143 | return _coalesce 144 | -------------------------------------------------------------------------------- /petl/util/statistics.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | 4 | from collections import namedtuple 5 | 6 | 7 | from petl.util.base import values, Table 8 | 9 | 10 | def limits(table, field): 11 | """ 12 | Find minimum and maximum values under the given field. E.g.:: 13 | 14 | >>> import petl as etl 15 | >>> table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]] 16 | >>> minv, maxv = etl.limits(table, 'bar') 17 | >>> minv 18 | 1 19 | >>> maxv 20 | 3 21 | 22 | The `field` argument can be a field name or index (starting from zero). 23 | 24 | """ 25 | 26 | vals = iter(values(table, field)) 27 | try: 28 | minv = maxv = next(vals) 29 | except StopIteration: 30 | return None, None 31 | else: 32 | for v in vals: 33 | if v < minv: 34 | minv = v 35 | if v > maxv: 36 | maxv = v 37 | return minv, maxv 38 | 39 | 40 | Table.limits = limits 41 | 42 | 43 | _stats = namedtuple('stats', ('count', 'errors', 'sum', 'min', 'max', 'mean', 44 | 'pvariance', 'pstdev')) 45 | 46 | 47 | def stats(table, field): 48 | """ 49 | Calculate basic descriptive statistics on a given field. E.g.:: 50 | 51 | >>> import petl as etl 52 | >>> table = [['foo', 'bar', 'baz'], 53 | ... ['A', 1, 2], 54 | ... ['B', '2', '3.4'], 55 | ... [u'B', u'3', u'7.8', True], 56 | ... ['D', 'xyz', 9.0], 57 | ... ['E', None]] 58 | >>> etl.stats(table, 'bar') 59 | stats(count=3, errors=2, sum=6.0, min=1.0, max=3.0, mean=2.0, pvariance=0.6666666666666666, pstdev=0.816496580927726) 60 | 61 | The `field` argument can be a field name or index (starting from zero). 62 | 63 | """ 64 | 65 | _min = None 66 | _max = None 67 | _sum = 0 68 | _mean = 0 69 | _var = 0 70 | _count = 0 71 | _errors = 0 72 | for v in values(table, field): 73 | try: 74 | v = float(v) 75 | except (ValueError, TypeError): 76 | _errors += 1 77 | else: 78 | _count += 1 79 | if _min is None or v < _min: 80 | _min = v 81 | if _max is None or v > _max: 82 | _max = v 83 | _sum += v 84 | _mean, _var = onlinestats(v, _count, mean=_mean, variance=_var) 85 | _std = _var**.5 86 | return _stats(_count, _errors, _sum, _min, _max, _mean, _var, _std) 87 | 88 | 89 | Table.stats = stats 90 | 91 | 92 | def onlinestats(xi, n, mean=0, variance=0): 93 | # function to calculate online mean and variance 94 | meanprv = mean 95 | varianceprv = variance 96 | mean = (((n - 1)*meanprv) + xi)/n 97 | variance = (((n - 1)*varianceprv) + ((xi - meanprv)*(xi - mean)))/n 98 | return mean, variance 99 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "setuptools-scm", "wheel"] 3 | 4 | [tool.bandit] 5 | exclude_dirs = ["bin", "docs"] 6 | 7 | [tool.bandit.assert_used] 8 | skips = ["*/*_test.py", "*/test_*.py"] 9 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | log_level=DEBUG 3 | doctest_optionflags = NORMALIZE_WHITESPACE ALLOW_UNICODE 4 | addopts = --ignore-glob=*_py2.py --ignore-glob=petl/io/db.py 5 | -------------------------------------------------------------------------------- /requirements-database.txt: -------------------------------------------------------------------------------- 1 | # packages required for testing petl with databases 2 | 3 | cryptography 4 | pymysql 5 | SQLAlchemy>=1.3.6,<2.0 6 | psycopg2-binary 7 | # PyMySQL==0.9.3 8 | -------------------------------------------------------------------------------- /requirements-docs.txt: -------------------------------------------------------------------------------- 1 | # Used in tox.ini @ py3x-docs 2 | 3 | sphinx 4 | sphinx-issues 5 | rinohtype 6 | 7 | # Used for generating docs version from Git 8 | 9 | setuptools 10 | setuptools-scm 11 | -------------------------------------------------------------------------------- /requirements-formats.txt: -------------------------------------------------------------------------------- 1 | Cython 2 | numpy 3 | numexpr 4 | intervaltree>=3.0.2 5 | lxml>=4.6.5 6 | openpyxl>=2.6.2 7 | pandas 8 | Whoosh>=2.7.4 9 | xlrd>=2.0.1 10 | xlwt>=1.3.0 11 | fastavro>=0.24.2 ; python_version >= '3' 12 | fastavro==0.24.2 ; python_version < '3' 13 | gspread>=3.4.0 ; python_version >= '3' 14 | 15 | # version 3.9.2 fails with python3.12 on macos-latest: PyTables/PyTables#1093 16 | tables ; sys_platform != 'darwin' 17 | 18 | -------------------------------------------------------------------------------- /requirements-linting.txt: -------------------------------------------------------------------------------- 1 | ## Used as main formatter/linter: 2 | 3 | ruff >= 0.3 4 | 5 | # Used in Github: 6 | 7 | pylint >= 3.0.0 8 | flake8 >= 7.0.0 9 | black >= 24.0.0 10 | bandit[toml,sarif] >= 1.7.0 11 | 12 | ## Suggestions: 13 | 14 | # pre-commit 15 | 16 | #? Obs: Should work with python >= 3.8 17 | 18 | -------------------------------------------------------------------------------- /requirements-optional.txt: -------------------------------------------------------------------------------- 1 | # Packages bellow need complex local setup # 2 | # Also check: .github/workflows/test-changes.yml 3 | 4 | # Throubleshooting: 5 | # 1. $ export DISABLE_BLOSC_AVX2=1 6 | 7 | # 2.1 $ brew install c-blosc # On macOS 8 | # 2.2 $ sudo apt-get install python3-dev # On debian distros 9 | # 2.3 $ sudo dnf install python3-devel # On debian distros 10 | 11 | # 3.1 $ sudo find / -iname "Python.h" 12 | # 3.2 $ export C_INCLUDE_PATH=/usr/include/python3.11/Python.h 13 | 14 | 15 | blosc ; python_version >= '3.7' and python_version != '3.13' 16 | 17 | # Throubleshooting: 18 | # 1. $ pip install --prefer-binary -r requirements-optional.txt 19 | # 2. $ pip install --prefer-binary bcolz 20 | 21 | bcolz ; python_version >= '3.7' and python_version < '3.9.9' 22 | -------------------------------------------------------------------------------- /requirements-remote.txt: -------------------------------------------------------------------------------- 1 | # packages for testing remote sources 2 | 3 | fastavro>=0.24.2 ; python_version >= '3' 4 | smbprotocol>=1.0.1 5 | paramiko>=2.7.1 6 | requests; python_version >= '3' 7 | fsspec>=0.7.4 ; python_version >= '3' 8 | aiohttp>=3.6.2 ; python_version >= '3.5.3' or ( python_version > '3.10' and python_version < '3.2' ) 9 | s3fs>=0.2.2 ; python_version >= '3' 10 | -------------------------------------------------------------------------------- /requirements-tests.txt: -------------------------------------------------------------------------------- 1 | wheel 2 | setuptools 3 | setuptools-scm 4 | pytest-cov>=2.12.0 5 | pytest>=4.6.6,<7.0.0 6 | tox 7 | coverage 8 | coveralls 9 | mock; python_version < '3.0' -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | from setuptools import find_packages, setup 4 | 5 | setup( 6 | name='petl', 7 | author='Alistair Miles', 8 | author_email='alimanfoo@googlemail.com', 9 | maintainer="Juarez Rudsatz", 10 | maintainer_email="juarezr@gmail.com", 11 | package_dir={'': '.'}, 12 | packages=find_packages('.'), 13 | scripts=['bin/petl'], 14 | url='https://github.com/petl-developers/petl', 15 | license='MIT License', 16 | description='A Python package for extracting, transforming and loading ' 17 | 'tables of data.', 18 | long_description=open('README.txt').read(), 19 | python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*', 20 | setup_requires=["setuptools>18.0", "setuptools-scm>1.5.4"], 21 | extras_require={ 22 | 'avro': ['fastavro>=0.24.0'], 23 | 'bcolz': ['bcolz>=1.2.1'], 24 | 'db': ['SQLAlchemy>=1.3.6,<2.0'], 25 | 'hdf5': ['cython>=0.29.13', 'numpy>=1.16.4', 'numexpr>=2.6.9', 26 | 'tables>=3.5.2'], 27 | 'http': ['aiohttp>=3.6.2', 'requests'], 28 | 'interval': ['intervaltree>=3.0.2'], 29 | 'numpy': ['numpy>=1.16.4'], 30 | 'pandas': ['pandas>=0.24.2'], 31 | 'remote': ['fsspec>=0.7.4'], 32 | 'smb': ['smbprotocol>=1.0.1'], 33 | 'xls': ['xlrd>=2.0.1', 'xlwt>=1.3.0'], 34 | 'xlsx': ['openpyxl>=2.6.2'], 35 | 'xpath': ['lxml>=4.4.0'], 36 | 'whoosh': ['whoosh'], 37 | }, 38 | use_scm_version={ 39 | "version_scheme": "guess-next-dev", 40 | "local_scheme": "dirty-tag", 41 | "write_to": "petl/version.py", 42 | }, 43 | classifiers=['Intended Audience :: Developers', 44 | 'License :: OSI Approved :: MIT License', 45 | 'Programming Language :: Python :: 2', 46 | 'Programming Language :: Python :: 2.7', 47 | 'Programming Language :: Python :: 3', 48 | 'Programming Language :: Python :: 3.6', 49 | 'Programming Language :: Python :: 3.7', 50 | 'Programming Language :: Python :: 3.8', 51 | 'Programming Language :: Python :: 3.9', 52 | 'Programming Language :: Python :: 3.10', 53 | 'Programming Language :: Python :: 3.11', 54 | 'Programming Language :: Python :: 3.12', 55 | 'Programming Language :: Python :: 3.13', 56 | 'Topic :: Software Development :: Libraries :: Python Modules' 57 | ] 58 | ) 59 | --------------------------------------------------------------------------------