├── .coveragerc
├── .github
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yml
    │   └── feature_request.yml
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── codacy-analysis.yml
    │   ├── codeql-analysis.yml
    │   ├── publish-release.yml
    │   └── test-changes.yml
├── .gitignore
├── .readthedocs.yaml
├── .vscode
    ├── extensions.json
    ├── launch.json
    ├── settings.json
    └── tasks.json
├── CODE_OF_CONDUCT.md
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── README.txt
├── bin
    └── petl
├── docker-compose.yml
├── docs
    ├── Makefile
    ├── acknowledgments.rst
    ├── changes.rst
    ├── conf.py
    ├── config.rst
    ├── contributing.rst
    ├── index.rst
    ├── install.rst
    ├── intro.rst
    ├── io.rst
    ├── make.bat
    ├── petl-architecture.png
    ├── related_work.rst
    ├── transform.rst
    └── util.rst
├── examples
    ├── comparison.py
    ├── intro.py
    ├── io
    │   ├── csv.py
    │   ├── html.py
    │   ├── json.py
    │   ├── numpy.py
    │   ├── pandas.py
    │   ├── pickle.py
    │   ├── pytables.py
    │   ├── sqlite3.py
    │   ├── text.py
    │   ├── whoosh.py
    │   └── xml.py
    ├── notes
    │   ├── .gitignore
    │   ├── 20140424_example.ipynb
    │   ├── 20140424_example.py
    │   ├── 20141022_example.ipynb
    │   ├── 20141110_example.ipynb
    │   ├── 20150319 resolve conflicts.ipynb
    │   ├── 20150331 split null.ipynb
    │   ├── case_study_1.ipynb
    │   ├── issue_219.ipynb
    │   ├── issue_219.py
    │   ├── issue_256.ipynb
    │   └── issue_256.py
    ├── transform
    │   ├── basics.py
    │   ├── conversions.py
    │   ├── dedup.py
    │   ├── fills.py
    │   ├── headers.py
    │   ├── intervals.py
    │   ├── joins.py
    │   ├── maps.py
    │   ├── reductions.py
    │   ├── regex.py
    │   ├── reshape.py
    │   ├── selects.py
    │   ├── setops.py
    │   ├── sorts.py
    │   ├── unpacks.py
    │   └── validation.py
    └── util
    │   ├── base.py
    │   ├── counting.py
    │   ├── lookups.py
    │   ├── materialise.py
    │   ├── misc.py
    │   ├── parsers.py
    │   ├── random.py
    │   ├── statistics.py
    │   ├── timing.py
    │   └── vis.py
├── petl
    ├── __init__.py
    ├── comparison.py
    ├── compat.py
    ├── config.py
    ├── errors.py
    ├── io
    │   ├── __init__.py
    │   ├── avro.py
    │   ├── base.py
    │   ├── bcolz.py
    │   ├── csv.py
    │   ├── csv_py2.py
    │   ├── csv_py3.py
    │   ├── db.py
    │   ├── db_create.py
    │   ├── db_utils.py
    │   ├── gsheet.py
    │   ├── html.py
    │   ├── json.py
    │   ├── numpy.py
    │   ├── pandas.py
    │   ├── pickle.py
    │   ├── pytables.py
    │   ├── remotes.py
    │   ├── sources.py
    │   ├── text.py
    │   ├── whoosh.py
    │   ├── xls.py
    │   ├── xlsx.py
    │   ├── xlutils_view.py
    │   └── xml.py
    ├── test
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── failonerror.py
    │   ├── helpers.py
    │   ├── io
    │   │   ├── __init__.py
    │   │   ├── test_avro.py
    │   │   ├── test_avro_schemas.py
    │   │   ├── test_bcolz.py
    │   │   ├── test_csv.py
    │   │   ├── test_csv_unicode.py
    │   │   ├── test_db.py
    │   │   ├── test_db_create.py
    │   │   ├── test_db_server.py
    │   │   ├── test_gsheet.py
    │   │   ├── test_html.py
    │   │   ├── test_html_unicode.py
    │   │   ├── test_json.py
    │   │   ├── test_json_unicode.py
    │   │   ├── test_jsonl.py
    │   │   ├── test_numpy.py
    │   │   ├── test_pandas.py
    │   │   ├── test_pickle.py
    │   │   ├── test_pytables.py
    │   │   ├── test_remotes.py
    │   │   ├── test_sources.py
    │   │   ├── test_sqlite3.py
    │   │   ├── test_tees.py
    │   │   ├── test_text.py
    │   │   ├── test_text_unicode.py
    │   │   ├── test_whoosh.py
    │   │   ├── test_xls.py
    │   │   ├── test_xlsx.py
    │   │   └── test_xml.py
    │   ├── resources
    │   │   ├── test.xls
    │   │   ├── test.xlsx
    │   │   └── test.xml
    │   ├── test_comparison.py
    │   ├── test_fluent.py
    │   ├── test_helpers.py
    │   ├── test_interactive.py
    │   ├── transform
    │   │   ├── __init__.py
    │   │   ├── test_basics.py
    │   │   ├── test_conversions.py
    │   │   ├── test_dedup.py
    │   │   ├── test_fills.py
    │   │   ├── test_headers.py
    │   │   ├── test_intervals.py
    │   │   ├── test_joins.py
    │   │   ├── test_maps.py
    │   │   ├── test_reductions.py
    │   │   ├── test_regex.py
    │   │   ├── test_reshape.py
    │   │   ├── test_selects.py
    │   │   ├── test_setops.py
    │   │   ├── test_sorts.py
    │   │   ├── test_unpacks.py
    │   │   └── test_validation.py
    │   └── util
    │   │   ├── __init__.py
    │   │   ├── test_base.py
    │   │   ├── test_counting.py
    │   │   ├── test_lookups.py
    │   │   ├── test_materialise.py
    │   │   ├── test_misc.py
    │   │   ├── test_parsers.py
    │   │   ├── test_random.py
    │   │   ├── test_statistics.py
    │   │   ├── test_timing.py
    │   │   └── test_vis.py
    ├── transform
    │   ├── __init__.py
    │   ├── basics.py
    │   ├── conversions.py
    │   ├── dedup.py
    │   ├── fills.py
    │   ├── hashjoins.py
    │   ├── headers.py
    │   ├── intervals.py
    │   ├── joins.py
    │   ├── maps.py
    │   ├── reductions.py
    │   ├── regex.py
    │   ├── reshape.py
    │   ├── selects.py
    │   ├── setops.py
    │   ├── sorts.py
    │   ├── unpacks.py
    │   └── validation.py
    └── util
    │   ├── __init__.py
    │   ├── base.py
    │   ├── counting.py
    │   ├── lookups.py
    │   ├── materialise.py
    │   ├── misc.py
    │   ├── parsers.py
    │   ├── random.py
    │   ├── statistics.py
    │   ├── timing.py
    │   └── vis.py
├── pyproject.toml
├── pytest.ini
├── repr_html.ipynb
├── requirements-database.txt
├── requirements-docs.txt
├── requirements-formats.txt
├── requirements-linting.txt
├── requirements-optional.txt
├── requirements-remote.txt
├── requirements-tests.txt
├── setup.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | exclude_lines =
3 |     pragma: no cover
4 |     pragma: ${PY_MAJOR_VERSION} no cover
5 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | Contributing
2 | ============
3 | 
4 | Please see the [project documentation](http://petl.readthedocs.io/en/stable/contributing.html) for information about contributing to petl.
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: Feature request
 2 | description: Suggest an idea for this project
 3 | title: "Feature Request: "
 4 | labels: ["Feature"]
 5 | # projects: ["petl-developers/petl"]
 6 | # assignees:
 7 | #   - juarezr
 8 | body:
 9 |   - type: markdown
10 |     attributes:
11 |       value: |
12 |         ## Feature request
13 | 
14 |         Thanks for taking the time to fill out this feature request!
15 |   - type: textarea
16 |     id: request-objective
17 |     attributes:
18 |       label: Explain why petl needs this feature?
19 |       description: |
20 |         Please explain:
21 |         - What would you want to achieve with this request?
22 |         - Is your feature request related to a problem or shortcoming?
23 |         - Why the current behavior is a problem?
24 |       placeholder: Please, tell us a clear and concise description of the feature.
25 |     validations:
26 |       required: true
27 |   - type: markdown
28 |     attributes:
29 |       value: |
30 |         ## Desired Solution
31 |   - type: textarea
32 |     id: desired-solution
33 |     attributes:
34 |       label: Describe the solution you would like
35 |       description: |
36 |         Please explain:
37 |         - How you expect the feature would work.
38 |         - What is the expected output/behavior for this feature.
39 |         - What you think that shouldn't be done.
40 |       placeholder: Please, tell us what did you expect to happen, what's the intended behavior.
41 |     validations:
42 |       required: true
43 |   - type: textarea
44 |     id: solution-alternatives
45 |     attributes:
46 |       label: Describe alternatives solutions you would have considered
47 |       description: |
48 |         Please explain:
49 |         - If there are any any alternative solutions tha might work.
50 |         - If there is any workaround for the problem.
51 |         - Why this alternatives aren't satisfactory?
52 |     validations:
53 |       required: false
54 |   - type: markdown
55 |     attributes:
56 |       value: |
57 |         ## Suggestions
58 |   - type: textarea
59 |     id: source-code-example
60 |     attributes:
61 |       label: Source Code Examples
62 |       description: |
63 |         Whenever relevant, please provide a code sample, of what would be the syntax, the way you meant to use. 
64 |         This will be automatically formatted into code, so no need for backticks.
65 |       render: python
66 |     validations:
67 |       required: false
68 |   - type: markdown
69 |     attributes:
70 |       value: |
71 |         ## Additional context
72 |   - type: textarea
73 |     id: other-notes
74 |     attributes:
75 |       label: Additional Notes
76 |       description: Anything not covered or N/A
77 |       placeholder: n/a
78 |     validations:
79 |       required: false
80 |   - type: checkboxes
81 |     id: agree-to-code-of-conduct
82 |     attributes:
83 |       label: Code of Conduct
84 |       description: By submitting this issue, you agree to follow the project [Code of Conduct](https://github.com/petl-developers/petl/blob/master/CODE_OF_CONDUCT.md).
85 |       options:
86 |         - label: I agree to follow this project's Code of Conduct
87 |           required: true
88 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | This PR has the objective of <describe it here>.
 3 | 
 4 | ## Changes
 5 | 
 6 | 1. Added new feature for...
 7 | 2. Fixed a bug in...
 8 | 3. Changed the behavior of...
 9 | 4. Improved the docs about...
10 | 
11 | ## Checklist
12 | 
13 | Use this checklist to ensure the quality of pull requests that include new code and/or make changes to existing code.
14 | 
15 | * [ ] Source Code guidelines:
16 |   * [ ] Includes unit tests
17 |   * [ ] New functions have docstrings with examples that can be run with doctest
18 |   * [ ] New functions are included in API docs
19 |   * [ ] Docstrings include notes for any changes to API or behavior
20 |   * [ ] All changes are documented in docs/changes.rst
21 | * [ ] Versioning and history tracking guidelines:
22 |   * [ ] Using atomic commits whenever possible
23 |   * [ ] Commits are reversible whenever possible
24 |   * [ ] There are no incomplete changes in the pull request
25 |   * [ ] There is no accidental garbage added to the source code
26 | * [ ] Testing guidelines:
27 |   * [ ] Tested locally using `tox` / `pytest`
28 |   * [ ] Rebased to `master` branch and tested before sending the PR
29 |   * [ ] Automated testing passes (see [CI](https://github.com/petl-developers/petl/actions))
30 |   * [ ] Unit test coverage has not decreased (see [Coveralls](https://coveralls.io/github/petl-developers/petl))
31 | * [ ] State of these changes is:
32 |   * [ ] Just a proof of concept
33 |   * [ ] Work in progress / Further changes needed
34 |   * [ ] Ready to review
35 |   * [ ] Ready to merge
36 | 


--------------------------------------------------------------------------------
/.github/workflows/codacy-analysis.yml:
--------------------------------------------------------------------------------
 1 | # This workflow uses actions that are not certified by GitHub.
 2 | # They are provided by a third-party and are governed by
 3 | # separate terms of service, privacy policy, and support
 4 | # documentation.
 5 | 
 6 | # This workflow checks out code, performs a Codacy security scan
 7 | # and integrates the results with the
 8 | # GitHub Advanced Security code scanning feature.  For more information on
 9 | # the Codacy security scan action usage and parameters, see
10 | # https://github.com/codacy/codacy-analysis-cli-action.
11 | # For more information on Codacy Analysis CLI in general, see
12 | # https://github.com/codacy/codacy-analysis-cli.
13 | 
14 | name: Codacy Security Scan
15 | 
16 | on:
17 |   schedule:
18 |     - cron: '59 11 27 * *'
19 |   push:
20 |     branches: [ "master" ]
21 |   pull_request:
22 |     branches: [ "master" ]
23 |     types: [opened, reopened, synchronize, ready_for_review]
24 |   # workflow_run:
25 |   #   workflows: [Test Changes]
26 |   #   types:
27 |   #     - completed
28 |   workflow_call:
29 |   workflow_dispatch:
30 |     inputs:
31 |       logLevel:
32 |         description: 'Log level'
33 |         required: true
34 |         default: 'warning'
35 |         type: choice
36 |         options:
37 |         - info
38 |         - warning
39 |         - debug
40 | 
41 | permissions:
42 |   contents: read
43 | 
44 | jobs:
45 |   codacy-security-scan:
46 |     permissions:
47 |       contents: read # for actions/checkout to fetch code
48 |       security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
49 |       actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
50 |     name: Codacy Security Scan
51 |     runs-on: ubuntu-latest
52 |     steps:
53 |       # Checkout the repository to the GitHub Actions runner
54 |       - name: Checkout code
55 |         uses: actions/checkout@v3
56 | 
57 |       # Execute Codacy Analysis CLI and generate a SARIF output with the security issues identified during the analysis
58 |       - name: Run Codacy Analysis CLI
59 |         uses: codacy/codacy-analysis-cli-action@v4
60 |         with:
61 |           # Check https://github.com/codacy/codacy-analysis-cli#project-token to get your project token from your Codacy repository
62 |           # You can also omit the token and run the tools that support default configurations
63 |           project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
64 |           verbose: true
65 |           output: results.sarif
66 |           format: sarif
67 |           # Adjust severity of non-security issues
68 |           gh-code-scanning-compat: true
69 |           # Force 0 exit code to allow SARIF file generation
70 |           # This will handover control about PR rejection to the GitHub side
71 |           max-allowed-issues: 2147483647
72 | 
73 |       # Upload the SARIF file generated in the previous step
74 |       - name: Upload SARIF results file
75 |         uses: github/codeql-action/upload-sarif@v2
76 |         with:
77 |           sarif_file: results.sarif
78 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
  1 | # For most projects, this workflow file will not need changing; you simply need
  2 | # to commit it to your repository.
  3 | #
  4 | # You may wish to alter this file to override the set of languages analyzed,
  5 | # or to provide custom queries or build logic.
  6 | #
  7 | # ******** NOTE ********
  8 | # We have attempted to detect the languages in your repository. Please check
  9 | # the `language` matrix defined below to confirm you have the correct set of
 10 | # supported CodeQL languages.
 11 | #
 12 | name: "CodeQL"
 13 | 
 14 | on:
 15 |   schedule:
 16 |     - cron: '59 10 27 * *'
 17 |   push:
 18 |     branches: [ "master" ]
 19 |   pull_request:
 20 |     branches: [ "master" ]
 21 |     types: [opened, reopened, synchronize, ready_for_review]
 22 | # workflow_run:
 23 |   #   workflows: [Test Changes]
 24 |   #   types:
 25 |   #     - completed
 26 |   workflow_call:
 27 |   workflow_dispatch:
 28 |     inputs:
 29 |       logLevel:
 30 |         description: 'Log level'
 31 |         required: true
 32 |         default: 'warning'
 33 |         type: choice
 34 |         options:
 35 |         - info
 36 |         - warning
 37 |         - debug
 38 | 
 39 | jobs:
 40 |   analyze:
 41 |     name: Analyze
 42 |     # Runner size impacts CodeQL analysis time. To learn more, please see:
 43 |     #   - https://gh.io/recommended-hardware-resources-for-running-codeql
 44 |     #   - https://gh.io/supported-runners-and-hardware-resources
 45 |     #   - https://gh.io/using-larger-runners
 46 |     # Consider using larger runners for possible analysis time improvements.
 47 |     runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
 48 |     timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
 49 |     permissions:
 50 |       # required for all workflows
 51 |       security-events: write
 52 | 
 53 |       # only required for workflows in private repositories
 54 |       actions: read
 55 |       contents: read
 56 | 
 57 |     strategy:
 58 |       fail-fast: false
 59 |       matrix:
 60 |         language: [ 'python' ]
 61 |         # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ]
 62 |         # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both
 63 |         # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
 64 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
 65 | 
 66 |     steps:
 67 |     - name: Checkout repository
 68 |       uses: actions/checkout@v4
 69 | 
 70 |     # Initializes the CodeQL tools for scanning.
 71 |     - name: Initialize CodeQL
 72 |       uses: github/codeql-action/init@v3
 73 |       with:
 74 |         languages: ${{ matrix.language }}
 75 |         # If you wish to specify custom queries, you can do so here or in a config file.
 76 |         # By default, queries listed here will override any specified in a config file.
 77 |         # Prefix the list here with "+" to use these queries and those in the config file.
 78 | 
 79 |         # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
 80 |         # queries: security-extended,security-and-quality
 81 | 
 82 | 
 83 |     # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
 84 |     # If this step fails, then you should remove it and run the build manually (see below)
 85 |     - name: Autobuild
 86 |       uses: github/codeql-action/autobuild@v3
 87 | 
 88 |     # ℹ️ Command-line programs to run using the OS shell.
 89 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
 90 | 
 91 |     #   If the Autobuild fails above, remove it and uncomment the following three lines.
 92 |     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
 93 | 
 94 |     # - run: |
 95 |     #     echo "Run, Build Application using script"
 96 |     #     ./location_of_script_within_repo/buildscript.sh
 97 | 
 98 |     - name: Perform CodeQL Analysis
 99 |       uses: github/codeql-action/analyze@v3
100 |       with:
101 |         category: "/language:${{matrix.language}}"
102 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-release.yml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 |   workflow_dispatch:
 7 |     inputs:
 8 |       logLevel:
 9 |         description: 'Log level'
10 |         required: true
11 |         default: 'warning'
12 |         type: choice
13 |         options:
14 |         - info
15 |         - warning
16 |         - debug
17 | 
18 | jobs:
19 |   pypi:
20 |     strategy:
21 |       matrix:
22 |         python: ['3.10']
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       - name: Checkout source code
26 |         uses: actions/checkout@v4
27 | 
28 |       - name: Set up Python ${{ matrix.python }}
29 |         uses: actions/setup-python@v5
30 |         with:
31 |           python-version: ${{ matrix.python }}
32 | 
33 |       - name: Install pypa/build
34 |         run: |
35 |           python -m pip install build --user
36 | 
37 |       - name: Build the petl package
38 |         run: |
39 |           python -m build --outdir dist/ .
40 | 
41 |       - name: Publish the package version ${{ github.event.release.tag_name }} to PyPI
42 |         if: startsWith(github.ref, 'refs/tags')
43 |         uses: pypa/gh-action-pypi-publish@release/v1
44 |         with:
45 |           password: ${{ secrets.PYPI_API_TOKEN }}
46 |           print_hash: true
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore ##
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test
 42 | .tox/
 43 | .nox/
 44 | .cache
 45 | .hypothesis/
 46 | .pytest_cache/
 47 | nosetests.xml
 48 | 
 49 | # Coverage reports
 50 | .coverage
 51 | .coverage.*
 52 | *.cover
 53 | *.py,cover
 54 | lcov.info
 55 | cov.xml
 56 | coverage.xml
 57 | cover/
 58 | coverage/
 59 | htmlcov/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | .pybuilder/
 83 | target/
 84 | 
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # IPython
 89 | profile_default/
 90 | ipython_config.py
 91 | 
 92 | # pyenv
 93 | #   For a library or package, you might want to ignore these files since the code is
 94 | #   intended to run in multiple environments; otherwise, check them in:
 95 | # .python-version
 96 | 
 97 | # pipenv
 98 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 99 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | #   install all needed dependencies.
102 | #Pipfile.lock
103 | 
104 | # poetry
105 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
106 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
107 | #   commonly ignored for libraries.
108 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
109 | #poetry.lock
110 | 
111 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
112 | __pypackages__/
113 | 
114 | # Celery stuff
115 | celerybeat-schedule
116 | celerybeat.pid
117 | 
118 | # SageMath parsed files
119 | *.sage.py
120 | 
121 | # Environments
122 | .env
123 | .venv
124 | env/
125 | venv/
126 | ENV/
127 | env.bak/
128 | venv.bak/
129 | 
130 | # Spyder project settings
131 | .spyderproject
132 | .spyproject
133 | 
134 | # Rope project settings
135 | .ropeproject
136 | 
137 | # mkdocs documentation
138 | /site
139 | 
140 | # mypy
141 | .mypy_cache/
142 | .dmypy.json
143 | dmypy.json
144 | 
145 | # Pyre type checker
146 | .pyre/
147 | 
148 | # pytype static type analyzer
149 | .pytype/
150 | 
151 | # Cython debug symbols
152 | cython_debug/
153 | 
154 | # PyCharm
155 | #  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
156 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
157 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
158 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
159 | #.idea/
160 | 
161 | ## Custom section for petl ##
162 | 
163 | # Python generated files
164 | 
165 | *.pyc
166 | 
167 | # Jypyter notebooks tem files
168 | 
169 | .ipynb_checkpoints/
170 | **/.ipynb_checkpoints/*
171 | 
172 | # Editor backup files 
173 | *~
174 | *.backup
175 | 
176 | # Petl build generated files
177 | petl/version.py
178 | **/tmp/
179 | 
180 | # Petl doctest generated files
181 | example*.*
182 | 
183 | # Ignore this patterns for develepment convenience
184 | 
185 | sketch*
186 | 
187 | 
188 | # Ignore this folder not idea users
189 | 
190 | .idea/
191 | 
192 | ## end of .gitignore file ##
193 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for Sphinx projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version and other tools you might need
 8 | build:
 9 |   os: ubuntu-lts-latest
10 |   tools:
11 |     # python: "3.12"
12 |     python: "latest"
13 |     # You can also specify other tool versions:
14 |     # nodejs: "20"
15 |     # rust: "1.70"
16 |     # golang: "1.20"
17 |   jobs:
18 |     pre_build:
19 |       - echo "Generating version number at 'pre_build' step"
20 |       - python3 setup.py build
21 | 
22 | # Build documentation in the "docs/" directory with Sphinx
23 | sphinx:
24 |   configuration: docs/conf.py
25 |   # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
26 |   # builder: "dirhtml"
27 |   # Fail on all warnings to avoid broken references
28 |   # fail_on_warning: true
29 | 
30 | # Optionally build your docs in additional formats such as PDF and ePub
31 | formats:
32 |   - pdf
33 |   - epub
34 | 
35 | # Optional but recommended, declare the Python requirements required
36 | # to build your documentation
37 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
38 | python:
39 |   install:
40 |     # - requirements: docs/requirements.txt
41 |     - requirements: requirements-docs.txt
42 | 
43 | # End of the config file #
44 | 


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "recommendations": [
 3 |         //
 4 |         //-- Used for IDE, Workbench, Tools --------------------------------------------
 5 |         //
 6 |         "editorconfig.editorconfig",
 7 |         "VisualStudioExptTeam.vscodeintellicode",
 8 |         //
 9 |         //-- Used for linters, formatters ----------------------------------------------
10 |         //
11 |         "ms-python.python",
12 |         "ms-python.vscode-pylance",
13 |         "ms-python.debugpy",
14 |         "charliermarsh.ruff",
15 |         "njpwerner.autodocstring",
16 |         "njqdev.vscode-python-typehint",
17 |         //
18 |         // "ms-python.pylint",
19 |         // "ms-python.flake8",
20 |         // "ms-python.mypy-type-checker",
21 |         // "ms-python.isort",
22 |         //
23 |         //-- Used for: Git, Code Quality -----------------------------------------------
24 |         //
25 |         "Wequick.coverage-gutters",
26 |         "vivaxy.vscode-conventional-commits"
27 |     ]
28 | }


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |         {
 5 |             "name": "python: with Args",
 6 |             "type": "debugpy",
 7 |             "request": "launch",
 8 |             "program": "${file}",
 9 |             "args": "${input:arguments}",
10 |             "cwd": "${input:debug_working_dir}",
11 |             "justMyCode": true,
12 |             "autoReload": {
13 |                 "enable": true
14 |             }
15 |         },
16 |         {
17 |             "name": "python: Within Libs",
18 |             "type": "debugpy",
19 |             "request": "launch",
20 |             "program": "${file}",
21 |             "args": "${input:last_arguments}",
22 |             "cwd": "${input:debug_working_dir}",
23 |             "justMyCode": false,
24 |             "autoReload": {
25 |                 "enable": true
26 |             }
27 |         }
28 |     ],
29 |     "inputs": [
30 |         {
31 |             // Usage: "args": "${input:arguments}",
32 |             "id": "arguments",
33 |             "type": "promptString",
34 |             "description": "Which arguments to pass to the command?"
35 |         },
36 |         {
37 |             // Usage: "cwd": "${input:debug_working_dir}"
38 |             "id": "debug_working_dir",
39 |             "type": "pickString",
40 |             "description": "Debug the python program in which of these folders?",
41 |             "options": [
42 |                 "${fileDirname}",
43 |                 "${fileWorkspaceFolder}",
44 |                 "${fileWorkspaceFolder}/petl",
45 |                 "${fileWorkspaceFolder}/petl/tests",
46 |                 "${fileWorkspaceFolder}/examples",
47 |                 "${relativeFileDirname}",
48 |                 "${userHome}",
49 |                 "${cwd}",
50 |                 "${selectedText}",
51 |                 ""
52 |             ],
53 |             "default": "${fileDirname}"
54 |         },
55 |     ]
56 | }


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // See https://go.microsoft.com/fwlink/?LinkId=733558
 3 |     // for the documentation about the tasks.json format
 4 |     "version": "2.0.0",
 5 |     "tasks": [
 6 |         {
 7 |             "label": "Package: build",
 8 |             "command": "python2",
 9 |             "args": [
10 |                 "setup.py",
11 |                 "build"
12 |             ],
13 |             "presentation": {
14 |                 "echo": true,
15 |                 "panel": "shared",
16 |                 "focus": true
17 |             }
18 |         },
19 |         {
20 |             "label": "Package: install",
21 |             "command": "python3",
22 |             "group": {
23 |                 "kind": "build",
24 |                 "isDefault": true
25 |             },
26 |             "args": [
27 |                 "setup.py",
28 |                 "install"
29 |             ],
30 |             "presentation": {
31 |                 "echo": true,
32 |                 "panel": "shared",
33 |                 "focus": true
34 |             }
35 |         }
36 |     ],
37 |     "problemMatcher": [
38 |         {
39 |             "fileLocation": "absolute",
40 |             "pattern": [
41 |                 {
42 |                     "regexp": "^\\s+File \"(.*)\", line (\\d+), in (.*)$",
43 |                     "file": 1,
44 |                     "line": 2
45 |                 },
46 |                 {
47 |                     "regexp": "^\\s+(.*)$",
48 |                     "message": 1
49 |                 }
50 |             ]
51 |         }
52 |     ]
53 | }


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012 Alistair Miles
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 
 4 | this software and associated documentation files (the "Software"), to deal in 
 5 | the Software without restriction, including without limitation the rights to 
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 
 7 | the Software, and to permit persons to whom the Software is furnished to do so, 
 8 | subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all 
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 
15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 
16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 
17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 
18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.txt
2 | recursive-include docs *.txt
3 | 
4 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | petl - Extract, Transform and Load
 2 | ===================================================
 3 | 
 4 | ``petl`` is a general purpose Python package for extracting, transforming and
 5 | loading tables of data.
 6 | 
 7 | .. image:: docs/petl-architecture.png
 8 |     :align: center
 9 |     :alt: petl usage possibilities
10 | 
11 | Resources
12 | ---------
13 | 
14 | - Documentation: http://petl.readthedocs.org/
15 | - PyPI: http://pypi.python.org/pypi/petl
16 | - Conda: https://anaconda.org/conda-forge/petl
17 | - Discussion: http://groups.google.com/group/python-etl
18 | 
19 | DevOps Status
20 | -------------
21 | 
22 | |downloads| |monthly|
23 | 
24 | |ci| |pypi| |conda|
25 | 
26 | |coveralls| |readthedocs| |zenodo|
27 | 
28 | .. |downloads|    image:: https://static.pepy.tech/badge/petl
29 |     :target: https://pepy.tech/project/petl
30 |     :alt: Downloads
31 | 
32 | .. |monthly|    image:: https://static.pepy.tech/badge/petl/month
33 |     :target: https://pepy.tech/project/petl
34 |     :alt: Downloads/Month
35 | 
36 | .. |ci|    image:: https://github.com/petl-developers/petl/actions/workflows/test-changes.yml/badge.svg
37 |     :target: https://github.com/petl-developers/petl/actions/workflows/test-changes.yml
38 |     :alt: Continuous Integration build status
39 | 
40 | .. |pypi|    image:: https://github.com/petl-developers/petl/actions/workflows/publish-release.yml/badge.svg
41 |     :target: https://github.com/petl-developers/petl/actions/workflows/publish-release.yml
42 |     :alt: PyPI release status
43 | 
44 | .. |conda|    image:: https://github.com/conda-forge/petl-feedstock/actions/workflows/automerge.yml/badge.svg
45 |     :target: https://github.com/conda-forge/petl-feedstock/actions/workflows/automerge.yml
46 |     :alt: Conda Forge release status
47 | 
48 | .. |readthedocs|    image:: https://readthedocs.org/projects/petl/badge/?version=stable
49 |     :target: http://petl.readthedocs.io/en/stable/?badge=stable
50 |     :alt: readthedocs.org release status
51 | 
52 | .. |coveralls|    image:: https://coveralls.io/repos/github/petl-developers/petl/badge.svg?branch=master
53 |     :target: https://coveralls.io/github/petl-developers/petl?branch=master
54 |     :alt: Coveralls release status
55 | 
56 | .. |zenodo|    image:: https://zenodo.org/badge/2233194.svg
57 |    :target: https://zenodo.org/badge/latestdoi/2233194
58 | 


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
 1 | petl - Extract, Transform and Load
 2 | ==================================
 3 | 
 4 | ``petl`` is a general purpose Python package for extracting, transforming and 
 5 | loading tables of data.
 6 | 
 7 | Resources
 8 | ---------
 9 | 
10 | - Documentation: http://petl.readthedocs.org/
11 | - Mailing List: http://groups.google.com/group/python-etl
12 | - Source Code: https://github.com/petl-developers/petl
13 | - Download: 
14 |   - PyPI: http://pypi.python.org/pypi/petl
15 |   - Conda Forge:https://anaconda.org/conda-forge/petl
16 | 
17 | Getting Help
18 | -------------
19 | 
20 | Please feel free to ask questions via the mailing list
21 | (python-etl@googlegroups.com).
22 | 
23 | To report installation problems, bugs or any other issues please email
24 | python-etl@googlegroups.com or `raise an issue on GitHub
25 | <https://github.com/petl-developers/petl/issues/new>`_.
26 | 


--------------------------------------------------------------------------------
/bin/petl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import print_function, division, absolute_import
 4 | import sys
 5 | import os
 6 | import os.path
 7 | import glob
 8 | from optparse import OptionParser
 9 | 
10 | from petl import __version__
11 | from petl import *
12 | 
13 | parser = OptionParser(
14 |     usage="%prog [options] expression",
15 |     description="Evaluate a Python expression.  The expression will be "
16 |                 "evaluated using eval(), with petl functions imported.",
17 |     version=__version__)
18 | 
19 | options, args = parser.parse_args()
20 | 
21 | try:
22 |     (expression,) = args
23 | except ValueError:
24 |     parser.error("invalid number of arguments (%s)" % len(args))
25 | r = eval(expression)
26 | 
27 | if r is not None:
28 |     if isinstance(r, Table):
29 |         print(look(r))
30 |     else:
31 |         print(str(r))
32 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env -S bash -x -c 'docker compose --file docker-compose.yml up --detach'
 2 | 
 3 | #region commands -----------------------------------------------------------------------------------
 4 | 
 5 | #$ docker compose --file docker-compose.yml up --detach
 6 | #$ docker compose --file docker-compose.yml down --remove-orphans -v --rmi local
 7 | 
 8 | #$ docker exec --tty --interactive --privileged petl-xxxxxx /bin/bash
 9 | 
10 | #$ docker exec -it petl-postgres psql -U petl --dbname=petl
11 | #$ docker exec -it petl-msyql mysql --user=petl --database=petl --password=test
12 | 
13 | #endregion -----------------------------------------------------------------------------------------
14 | 
15 | #region docker composer ----------------------------------------------------------------------------
16 | 
17 | ---
18 | services:
19 |   postgres:
20 |     container_name: petl-postgres
21 |     hostname: petl_postgres
22 |     image: postgres:latest
23 |     environment:
24 |       - POSTGRES_USER=petl
25 |       - POSTGRES_PASSWORD=test
26 |       - POSTGRES_DB=petl
27 |       - POSTGRES_HOST_AUTH_METHOD=password
28 |     ports:
29 |       - "5432:5432/tcp"
30 |     restart: "unless-stopped"
31 |     stdin_open: true
32 |     tty: true
33 |     healthcheck:
34 |       test: ["CMD", "psql", "--host=localhost", "--username=petl", "--dbname=petl", "-c", "select 1 as ok"]
35 |       interval: 20s
36 |       timeout: 10s
37 |       retries: 5
38 |       start_period: 2s
39 | 
40 |   mysql:
41 |     container_name: petl-mysql
42 |     hostname: petl_mysql
43 |     image: mysql:latest
44 |     ports:
45 |       - "3306:3306/tcp"
46 |       - "33060:33060/tcp"
47 |     environment:
48 |       MYSQL_ALLOW_EMPTY_PASSWORD: "yes"
49 |       MYSQL_DATABASE: "petl"
50 |       MYSQL_USER: "petl"
51 |       MYSQL_PASSWORD: "test"
52 |       MYSQL_ROOT_PASSWORD: "pass0"
53 |     restart: "unless-stopped"
54 |     stdin_open: true
55 |     tty: true
56 |     healthcheck:
57 |       test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
58 |       interval: 20s
59 |       timeout: 10s
60 |       retries: 5
61 |       start_period: 2s
62 | 
63 |   samba:
64 |     container_name: petl-samba
65 |     hostname: petl_samba
66 |     image: dperson/samba
67 |     ports:
68 |       - "137:137/udp"
69 |       - "138:138/udp"
70 |       - "139:139/tcp"
71 |       - "445:445/tcp"
72 |     tmpfs:
73 |       - /tmp
74 |     restart: unless-stopped
75 |     stdin_open: true
76 |     tty: true
77 |     volumes:
78 |       - /mnt:/mnt:z
79 |       - /mnt2:/mnt2:z
80 |     command: '-s "public;/mnt;yes;no;yes;all" -s "mount2;/mnt2" -u "petl;test" -p'
81 | 
82 |   sftp:
83 |     container_name: petl-sftp
84 |     hostname: petl_sftp
85 |     image: atmoz/sftp
86 |     ports:
87 |       - "22:22/tcp"
88 |     tmpfs:
89 |       - /tmp
90 |     restart: unless-stopped
91 |     stdin_open: true
92 |     tty: true
93 |     command: 'petl:test:::public'
94 | 
95 | #endregion -----------------------------------------------------------------------------------------
96 | 


--------------------------------------------------------------------------------
/docs/acknowledgments.rst:
--------------------------------------------------------------------------------
 1 | Acknowledgments
 2 | ===============
 3 | 
 4 | This is community-maintained software. The following people have contributed to
 5 | the development of this package:
 6 | 
 7 | * Alexander Stauber
 8 | * Alistair Miles (`alimanfoo <https://github.com/alimanfoo>`_)
 9 | * Andreas Porevopoulos (`sv1jsb <https://github.com/sv1jsb>`_)
10 | * Andrew Kim (`andrewakim <https://github.com/andrewakim>`_)
11 | * Artur Poniński (`arturponinski <https://github.com/arturponinski>`_)
12 | * Brad Maggard (`bmaggard <https://github.com/bmaggard>`_)
13 | * Caleb Lloyd (`caleblloyd <https://github.com/caleblloyd>`_)
14 | * César Roldán (`ihuro <https://github.com/ihuro>`_)
15 | * Chris Lasher (`gotgenes <https://github.com/gotgenes>`_)
16 | * Dean Way (`DeanWay <https://github.com/DeanWay>`_)
17 | * Dustin Engstrom (`engstrom <https://github.com/engstrom>`_)
18 | * Fahad Siddiqui (`fahadsiddiqui <https://github.com/fahadsiddiqui>`_)
19 | * Florent Xicluna (`florentx <https://github.com/florentx>`_)
20 | * Henry Rizzi (`henryrizzi <https://github.com/henryrizzi>`_)
21 | * Jonathan Camile (`deytao <https://github.com/deytao>`_)
22 | * Jonathan Moss (`a-musing-moose <https://github.com/a-musing-moose>`_)
23 | * Juarez Rudsatz (`juarezr <https://github.com/juarezr>`_)
24 | * Kenneth Borthwick
25 | * Krisztián Fekete (`krisztianfekete <https://github.com/krisztianfekete>`_)
26 | * Matt Katz (`mattkatz <https://github.com/mattkatz>`_)
27 | * Matthew Scholefield (`MatthewScholefield <https://github.com/MatthewScholefield>`_)
28 | * Michał Karol (`MichalKarol <https://github.com/MichalKarol>`_)
29 | * Michael Rea (`rea725 <https://github.com/rea725>`_)
30 | * Olivier Macchioni (`omacchioni <https://github.com/omacchioni>`_)
31 | * Olivier Poitrey (`rs <https://github.com/rs>`_)
32 | * Pablo Castellano (`PabloCastellano <https://github.com/PabloCastellano>`_)
33 | * Paul Jensen (`psnj <https://github.com/psnj>`_)
34 | * Paulo Scardine (`scardine <https://github.com/scardine>`_)
35 | * Peder Jakobsen (`pjakobsen <https://github.com/pjakobsen>`_)
36 | * Phillip Knaus (`phillipknaus <https://github.com/phillipknauss>`_)
37 | * Richard Pearson (`podpearson <https://github.com/podpearson>`_)
38 | * Robert DeSimone (`icenine457 <https://github.com/icenine457>`_)
39 | * Robin Moss (`LupusUmbrae <https://github.com/LupusUmbrae>`_)
40 | * Roger Woodley (`rogerkwoodley <https://github.com/rogerkwoodley>`_)
41 | * Tim Hebbeler (`timheb <https://github.com/timheb>`_)
42 | * Tucker Beck (`dusktreader <https://github.com/dusktreader>`_)
43 | * Viliam Segeďa (`vilos <https://github.com/vilos>`_)
44 | * Zach Palchick (`palchicz <https://github.com/palchicz>`_)
45 | * `adamsdarlingtower <https://github.com/adamsdarlingtower>`_
46 | * `hugovk <https://github.com/hugovk>`_
47 | * `imazor <https://github.com/imazor>`_
48 | * `james-unified <https://github.com/james-unified>`_
49 | * `Mgutjahr <https://github.com/Mgutjahr>`_
50 | * `shayh <https://github.com/shayh>`_
51 | * `thatneat <https://github.com/thatneat>`_
52 | * `titusz <https://github.com/titusz>`_
53 | * `zigen <https://github.com/zigen>`_
54 | 
55 | Development of petl has been supported by an open source license for
56 | `PyCharm <https://www.jetbrains.com/pycharm/>`_.
57 | 


--------------------------------------------------------------------------------
/docs/config.rst:
--------------------------------------------------------------------------------
1 | Configuration
2 | ===============================
3 | 
4 | .. automodule:: petl.config
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. module:: petl
 2 | 
 3 | petl - Extract, Transform and Load
 4 | ===================================
 5 | 
 6 | :mod:`petl` is a general purpose Python package for extracting, transforming
 7 | and loading tables of data.
 8 | 
 9 | .. image:: petl-architecture.png
10 |     :width: 750
11 |     :align: center
12 |     :alt: petl use cases diagram
13 | 
14 | Resources
15 | ---------
16 | 
17 | - Documentation: http://petl.readthedocs.org/
18 | - Mailing List: http://groups.google.com/group/python-etl
19 | - Source Code: https://github.com/petl-developers/petl
20 | - Download: 
21 |   - PyPI: http://pypi.python.org/pypi/petl
22 |   - Conda Forge:https://anaconda.org/conda-forge/petl
23 | 
24 | .. note::
25 | 
26 |    - Version 2.0 will be a major milestone for :mod:`petl`.
27 |    - This version will introduce some changes that could affect current behaviour.
28 |    - We will try to keep compatibility to the maximum possible, except 
29 |      when the current behavior is inconsistent or have shortcomings.
30 |    - The biggest change is the end of support of Python `2.7`.
31 |    - The minimum supported version will be Python `3.6`.
32 | 
33 | Getting Help
34 | -------------
35 | 
36 | Please feel free to ask questions via the mailing list
37 | (python-etl@googlegroups.com).
38 | 
39 | To report installation problems, bugs or any other issues please email
40 | python-etl@googlegroups.com or `raise an issue on GitHub
41 | <https://github.com/petl-developers/petl/issues/new>`_.
42 | 
43 | For an example of :mod:`petl` in use, see the `case study on comparing tables
44 | <http://nbviewer.ipython.org/github/petl-developers/petl/tree/master/notes/case_study_1.ipynb>`_.
45 | 
46 | Contents
47 | --------
48 | 
49 | For an alphabetic list of all functions in the package,
50 | see the :ref:`genindex`.
51 | 
52 | .. toctree::
53 |    :maxdepth: 2
54 | 
55 |    install
56 |    intro
57 |    io
58 |    transform
59 |    util
60 |    config
61 |    contributing
62 |    acknowledgments
63 |    related_work
64 |    changes
65 | 
66 | Indices and tables
67 | ------------------
68 | 
69 | * :ref:`genindex`
70 | * :ref:`modindex`
71 | * :ref:`search`
72 | 


--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | .. _intro_installation:
 5 | 
 6 | Getting Started
 7 | ---------------
 8 | 
 9 | This package is available from the `Python Package Index
10 | <http://pypi.python.org/pypi/petl>`_. If you have `pip
11 | <https://pip.pypa.io/>`_ you should be able to do::
12 | 
13 |     $ pip install petl
14 | 
15 | You can also download manually, extract and run ``python setup.py
16 | install``.
17 | 
18 | To verify the installation, the test suite can be run with `pytest
19 | <https://docs.pytest.org/>`_, e.g.::
20 | 
21 |     $ pip install pytest
22 |     $ pytest -v petl
23 | 
24 | :mod:`petl` has been tested with Python versions 2.7 and 3.6-3.13
25 | under Linux, MacOS, and Windows operating systems.
26 | 
27 | .. _intro_dependencies:
28 | 
29 | Dependencies and extensions
30 | ---------------------------
31 | 
32 | This package is written in pure Python and has no installation requirements
33 | other than the Python core modules.
34 | 
35 | Some domain-specific and/or experimental extensions to :mod:`petl` are
36 | available from the petlx_ package.
37 | 
38 | .. _petlx: http://petlx.readthedocs.org
39 | 
40 | Some of the functions in this package require installation of third party
41 | packages. These packages are indicated in the relevant parts of the 
42 | documentation for each file format.
43 | 
44 | Also is possible to install some of dependencies when installing `petl` by
45 | specifying optional extra features, e.g.::
46 | 
47 |     $ pip install petl['avro', 'interval', 'remote']
48 | 
49 | The available extra features are:
50 | 
51 | db
52 |     For using records from :ref:`Databases <io_db>` with `SQLAlchemy`.
53 | 
54 |     Note that is also required installing the package for the desired database.
55 | 
56 | interval
57 |     For using :ref:`Interval transformations <transform_intervals>`
58 |     with `intervaltree`
59 | 
60 | avro
61 |   For using :ref:`Avro files <io_avro>` with `fastavro`
62 | 
63 | pandas
64 |   For using :ref:`DataFrames <io_pandas>` with `pandas`
65 | 
66 | numpy
67 |   For using :ref:`Arrays <io_numpy>` with `numpy`
68 | 
69 | xls
70 |   For using :ref:`Excel/LO files <io_xls>` with `xlrd`/`xlwt`
71 | 
72 | xlsx
73 |   For using :ref:`Excel/LO files <io_xlsx>` with `openpyxl`
74 | 
75 | xpath
76 |   For using :ref:`XPath expressions <io_xml>` with `lxml`
77 | 
78 | bcolz
79 |   For using :ref:`Bcolz ctables <io_bcolz>` with `bcolz`
80 | 
81 | whoosh
82 |   For using :ref:`Text indexes <io_whoosh>` with `whoosh`
83 | 
84 | hdf5
85 |   For using :ref:`HDF5 files <io_pytables>` with `PyTables`.
86 | 
87 |   Note that also are additional software to be installed.
88 | 
89 | remote
90 |   For reading and writing from :ref:`Remote Sources <io_remotes>` with `fsspec`.
91 | 
92 |   Note that `fsspec` also depends on other packages for providing support for 
93 |   each protocol as described in :class:`petl.io.remotes.RemoteSource`.
94 | 


--------------------------------------------------------------------------------
/docs/petl-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/petl-developers/petl/43925d008bd1d98f90204b3db74d88b3fee27a69/docs/petl-architecture.png


--------------------------------------------------------------------------------
/docs/util.rst:
--------------------------------------------------------------------------------
  1 | .. module:: petl.util
  2 | 
  3 | Utility functions
  4 | =================
  5 | 
  6 | 
  7 | Basic utilities
  8 | ---------------
  9 | 
 10 | .. autofunction:: petl.util.base.header
 11 | .. autofunction:: petl.util.base.fieldnames
 12 | .. autofunction:: petl.util.base.data
 13 | .. autofunction:: petl.util.base.values
 14 | .. autofunction:: petl.util.base.dicts
 15 | .. autofunction:: petl.util.base.namedtuples
 16 | .. autofunction:: petl.util.base.records
 17 | .. autofunction:: petl.util.base.expr
 18 | .. autofunction:: petl.util.base.rowgroupby
 19 | .. autofunction:: petl.util.base.empty
 20 | 
 21 | 
 22 | Visualising tables
 23 | ------------------
 24 | 
 25 | .. autofunction:: petl.util.vis.look
 26 | .. autofunction:: petl.util.vis.lookall
 27 | .. autofunction:: petl.util.vis.see
 28 | .. autofunction:: petl.util.vis.display
 29 | .. autofunction:: petl.util.vis.displayall
 30 | 
 31 | 
 32 | Lookup data structures
 33 | ----------------------
 34 | 
 35 | .. autofunction:: petl.util.lookups.lookup
 36 | .. autofunction:: petl.util.lookups.lookupone
 37 | .. autofunction:: petl.util.lookups.dictlookup
 38 | .. autofunction:: petl.util.lookups.dictlookupone
 39 | .. autofunction:: petl.util.lookups.recordlookup
 40 | .. autofunction:: petl.util.lookups.recordlookupone
 41 | 
 42 | 
 43 | Parsing string/text values
 44 | --------------------------
 45 | 
 46 | .. autofunction:: petl.util.parsers.dateparser
 47 | .. autofunction:: petl.util.parsers.timeparser
 48 | .. autofunction:: petl.util.parsers.datetimeparser
 49 | .. autofunction:: petl.util.parsers.boolparser
 50 | .. autofunction:: petl.util.parsers.numparser
 51 | 
 52 | 
 53 | Counting
 54 | --------
 55 | 
 56 | .. autofunction:: petl.util.counting.nrows
 57 | .. autofunction:: petl.util.counting.valuecount
 58 | .. autofunction:: petl.util.counting.valuecounter
 59 | .. autofunction:: petl.util.counting.valuecounts
 60 | .. autofunction:: petl.util.counting.stringpatterncounter
 61 | .. autofunction:: petl.util.counting.stringpatterns
 62 | .. autofunction:: petl.util.counting.rowlengths
 63 | .. autofunction:: petl.util.counting.typecounter
 64 | .. autofunction:: petl.util.counting.typecounts
 65 | .. autofunction:: petl.util.counting.parsecounter
 66 | .. autofunction:: petl.util.counting.parsecounts
 67 | 
 68 | 
 69 | Timing
 70 | ------
 71 | 
 72 | .. autofunction:: petl.util.timing.progress
 73 | .. autofunction:: petl.util.timing.log_progress
 74 | .. autofunction:: petl.util.timing.clock
 75 | 
 76 | 
 77 | Statistics
 78 | ----------
 79 | 
 80 | .. autofunction:: petl.util.statistics.limits
 81 | .. autofunction:: petl.util.statistics.stats
 82 | 
 83 | 
 84 | Materialising tables
 85 | --------------------
 86 | 
 87 | .. autofunction:: petl.util.materialise.columns
 88 | .. autofunction:: petl.util.materialise.facetcolumns
 89 | .. autofunction:: petl.util.materialise.listoflists
 90 | .. autofunction:: petl.util.materialise.listoftuples
 91 | .. autofunction:: petl.util.materialise.tupleoflists
 92 | .. autofunction:: petl.util.materialise.tupleoftuples
 93 | .. autofunction:: petl.util.materialise.cache
 94 | 
 95 | 
 96 | Randomly generated tables
 97 | -------------------------
 98 | 
 99 | .. autofunction:: petl.util.random.randomtable
100 | .. autofunction:: petl.util.random.dummytable
101 | 
102 | 
103 | Miscellaneous
104 | -------------
105 | 
106 | .. autofunction:: petl.util.misc.typeset
107 | .. autofunction:: petl.util.misc.diffheaders
108 | .. autofunction:: petl.util.misc.diffvalues
109 | .. autofunction:: petl.util.misc.strjoin
110 | .. autofunction:: petl.util.misc.nthword
111 | .. autofunction:: petl.util.misc.coalesce
112 | 


--------------------------------------------------------------------------------
/examples/comparison.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | import petl as etl
 5 | table = [['foo', 'bar'],
 6 |          ['a', 1],
 7 |          ['b', None]]
 8 | 
 9 | # raises exception under Python 3
10 | etl.select(table, 'bar', lambda v: v > 0)
11 | # no error under Python 3
12 | etl.selectgt(table, 'bar', 0)
13 | # or ...
14 | etl.select(table, 'bar', lambda v: v > etl.Comparable(0))
15 | 
16 | 


--------------------------------------------------------------------------------
/examples/intro.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | example_data = """foo,bar,baz
 4 | a,1,3.4
 5 | b,2,7.4
 6 | c,6,2.2
 7 | d,9,8.1
 8 | """
 9 | with open('example.csv', 'w') as f:
10 |     f.write(example_data)
11 | 
12 | import petl as etl
13 | table1 = etl.fromcsv('example.csv')
14 | table2 = etl.convert(table1, 'foo', 'upper')
15 | table3 = etl.convert(table2, 'bar', int)
16 | table4 = etl.convert(table3, 'baz', float)
17 | table5 = etl.addfield(table4, 'quux', lambda row: row.bar * row.baz)
18 | table5
19 | 
20 | table = (
21 |     etl
22 |     .fromcsv('example.csv')
23 |     .convert('foo', 'upper')
24 |     .convert('bar', int)
25 |     .convert('baz', float)
26 |     .addfield('quux', lambda row: row.bar * row.baz)
27 | )
28 | table
29 | 
30 | l = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]]
31 | table = etl.wrap(l)
32 | table.look()
33 | 
34 | l = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]]
35 | table = etl.wrap(l)
36 | table
37 | 
38 | etl.config.look_index_header = True
39 | 
40 | table
41 | 


--------------------------------------------------------------------------------
/examples/io/csv.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # fromcsv()
 5 | ###########
 6 | 
 7 | import petl as etl
 8 | import csv
 9 | # set up a CSV file to demonstrate with
10 | table1 = [['foo', 'bar'],
11 |           ['a', 1],
12 |           ['b', 2],
13 |           ['c', 2]]
14 | with open('example.csv', 'w') as f:
15 |     writer = csv.writer(f)
16 |     writer.writerows(table1)
17 | 
18 | # now demonstrate the use of fromcsv()
19 | table2 = etl.fromcsv('example.csv')
20 | table2
21 | 
22 | 
23 | # tocsv()
24 | #########
25 | 
26 | import petl as etl
27 | table1 = [['foo', 'bar'],
28 |           ['a', 1],
29 |           ['b', 2],
30 |           ['c', 2]]
31 | etl.tocsv(table1, 'example.csv')
32 | # look what it did
33 | print(open('example.csv').read())
34 | 


--------------------------------------------------------------------------------
/examples/io/html.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # tohtml()
 5 | ##########
 6 | 
 7 | 
 8 | import petl as etl
 9 | table1 = [['foo', 'bar'],
10 |           ['a', 1],
11 |           ['b', 2],
12 |           ['c', 2]]
13 | etl.tohtml(table1, 'example.html', caption='example table')
14 | print(open('example.html').read())
15 | 


--------------------------------------------------------------------------------
/examples/io/json.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # fromjson()
 5 | ############
 6 | 
 7 | import petl as etl
 8 | data = '''
 9 | [{"foo": "a", "bar": 1},
10 | {"foo": "b", "bar": 2},
11 | {"foo": "c", "bar": 2}]
12 | '''
13 | with open('example.json', 'w') as f:
14 |     f.write(data)
15 | 
16 | table1 = etl.fromjson('example.json')
17 | table1
18 | 
19 | 
20 | # fromdicts()
21 | #############
22 | 
23 | import petl as etl
24 | dicts = [{"foo": "a", "bar": 1},
25 |          {"foo": "b", "bar": 2},
26 |          {"foo": "c", "bar": 2}]
27 | table1 = etl.fromdicts(dicts)
28 | table1
29 | 
30 | 
31 | # tojson()
32 | ##########
33 | 
34 | import petl as etl
35 | table1 = [['foo', 'bar'],
36 |           ['a', 1],
37 |           ['b', 2],
38 |           ['c', 2]]
39 | etl.tojson(table1, 'example.json', sort_keys=True)
40 | # check what it did
41 | print(open('example.json').read())
42 | 
43 | 
44 | # tojsonarrays()
45 | ################
46 | 
47 | import petl as etl
48 | table1 = [['foo', 'bar'],
49 |           ['a', 1],
50 |           ['b', 2],
51 |           ['c', 2]]
52 | etl.tojsonarrays(table1, 'example.json')
53 | # check what it did
54 | print(open('example.json').read())
55 | 
56 | 


--------------------------------------------------------------------------------
/examples/io/numpy.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # toarray()
 5 | ###########
 6 | 
 7 | import petl as etl
 8 | table = [('foo', 'bar', 'baz'),
 9 |          ('apples', 1, 2.5),
10 |          ('oranges', 3, 4.4),
11 |          ('pears', 7, .1)]
12 | a = etl.toarray(table)
13 | a
14 | # the dtype can be specified as a string
15 | a = etl.toarray(table, dtype='a4, i2, f4')
16 | a
17 | # the dtype can also be partially specified
18 | a = etl.toarray(table, dtype={'foo': 'a4'})
19 | a
20 | 
21 | 
22 | # fromarray()
23 | #############
24 | 
25 | import petl as etl
26 | import numpy as np
27 | a = np.array([('apples', 1, 2.5),
28 |               ('oranges', 3, 4.4),
29 |               ('pears', 7, 0.1)],
30 |              dtype='U8, i4,f4')
31 | table = etl.fromarray(a)
32 | table
33 | 
34 | 
35 | # valuestoarray()
36 | #################
37 | 
38 | import petl as etl
39 | table = [('foo', 'bar', 'baz'),
40 |          ('apples', 1, 2.5),
41 |          ('oranges', 3, 4.4),
42 |          ('pears', 7, .1)]
43 | table = etl.wrap(table)
44 | table.values('bar').array()
45 | # specify dtype
46 | table.values('bar').array(dtype='i4')
47 | 


--------------------------------------------------------------------------------
/examples/io/pandas.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # todataframe()
 5 | ###############
 6 | 
 7 | import petl as etl
 8 | table = [('foo', 'bar', 'baz'),
 9 |          ('apples', 1, 2.5),
10 |          ('oranges', 3, 4.4),
11 |          ('pears', 7, .1)]
12 | df = etl.todataframe(table)
13 | df
14 | 
15 | 
16 | # fromdataframe()
17 | #################
18 | 
19 | import petl as etl
20 | import pandas as pd
21 | records = [('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, 0.1)]
22 | df = pd.DataFrame.from_records(records, columns=('foo', 'bar', 'baz'))
23 | table = etl.fromdataframe(df)
24 | table
25 | 


--------------------------------------------------------------------------------
/examples/io/pickle.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # frompickle()
 5 | ##############
 6 | 
 7 | import petl as etl
 8 | import pickle
 9 | # set up a file to demonstrate with
10 | with open('example.p', 'wb') as f:
11 |     pickle.dump(['foo', 'bar'], f)
12 |     pickle.dump(['a', 1], f)
13 |     pickle.dump(['b', 2], f)
14 |     pickle.dump(['c', 2.5], f)
15 | 
16 | # demonstrate the use of frompickle()
17 | table1 = etl.frompickle('example.p')
18 | table1
19 | 
20 | 
21 | # topickle()
22 | ############
23 | 
24 | import petl as etl
25 | table1 = [['foo', 'bar'],
26 |           ['a', 1],
27 |           ['b', 2],
28 |           ['c', 2]]
29 | etl.topickle(table1, 'example.p')
30 | # look what it did
31 | table2 = etl.frompickle('example.p')
32 | table2
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/examples/io/pytables.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, print_function, division
 3 | 
 4 | 
 5 | # fromhdf5()
 6 | ############
 7 | 
 8 | import petl as etl
 9 | import tables
10 | # set up a new hdf5 table to demonstrate with
11 | h5file = tables.openFile('example.h5', mode='w', title='Example file')
12 | h5file.createGroup('/', 'testgroup', 'Test Group')
13 | class FooBar(tables.IsDescription):
14 |     foo = tables.Int32Col(pos=0)
15 |     bar = tables.StringCol(6, pos=2)
16 | 
17 | h5table = h5file.createTable('/testgroup', 'testtable', FooBar, 'Test Table')
18 | # load some data into the table
19 | table1 = (('foo', 'bar'),
20 |           (1, b'asdfgh'),
21 |           (2, b'qwerty'),
22 |           (3, b'zxcvbn'))
23 | 
24 | for row in table1[1:]:
25 |     for i, f in enumerate(table1[0]):
26 |         h5table.row[f] = row[i]
27 |     h5table.row.append()
28 | 
29 | h5file.flush()
30 | h5file.close()
31 | #
32 | # now demonstrate use of fromhdf5
33 | table1 = etl.fromhdf5('example.h5', '/testgroup', 'testtable')
34 | table1
35 | # alternatively just specify path to table node
36 | table1 = etl.fromhdf5('example.h5', '/testgroup/testtable')
37 | # ...or use an existing tables.File object
38 | h5file = tables.openFile('example.h5')
39 | table1 = etl.fromhdf5(h5file, '/testgroup/testtable')
40 | # ...or use an existing tables.Table object
41 | h5tbl = h5file.getNode('/testgroup/testtable')
42 | table1 = etl.fromhdf5(h5tbl)
43 | # use a condition to filter data
44 | table2 = etl.fromhdf5(h5tbl, condition='foo < 3')
45 | table2
46 | h5file.close()
47 | 
48 | 
49 | # fromhdf5sorted()
50 | ##################
51 | 
52 | import petl as etl
53 | import tables
54 | # set up a new hdf5 table to demonstrate with
55 | h5file = tables.openFile('example.h5', mode='w', title='Test file')
56 | h5file.createGroup('/', 'testgroup', 'Test Group')
57 | class FooBar(tables.IsDescription):
58 |     foo = tables.Int32Col(pos=0)
59 |     bar = tables.StringCol(6, pos=2)
60 | 
61 | h5table = h5file.createTable('/testgroup', 'testtable', FooBar, 'Test Table')
62 | # load some data into the table
63 | table1 = (('foo', 'bar'),
64 |           (3, b'asdfgh'),
65 |           (2, b'qwerty'),
66 |           (1, b'zxcvbn'))
67 | for row in table1[1:]:
68 |     for i, f in enumerate(table1[0]):
69 |         h5table.row[f] = row[i]
70 |     h5table.row.append()
71 | 
72 | h5table.cols.foo.createCSIndex()  # CS index is required
73 | h5file.flush()
74 | h5file.close()
75 | #
76 | # access the data, sorted by the indexed column
77 | table2 = etl.fromhdf5sorted('example.h5', '/testgroup', 'testtable',
78 |                             sortby='foo')
79 | table2
80 | 
81 | 
82 | # tohdf5()
83 | ##########
84 | 
85 | import petl as etl
86 | table1 = (('foo', 'bar'),
87 |           (1, b'asdfgh'),
88 |           (2, b'qwerty'),
89 |           (3, b'zxcvbn'))
90 | etl.tohdf5(table1, 'example.h5', '/testgroup', 'testtable',
91 |            drop=True, create=True, createparents=True)
92 | etl.fromhdf5('example.h5', '/testgroup', 'testtable')
93 | 


--------------------------------------------------------------------------------
/examples/io/sqlite3.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | import os
 3 | 
 4 | 
 5 | # fromsqlite3()
 6 | ###############
 7 | 
 8 | os.remove('example.db')
 9 | 
10 | import petl as etl
11 | import sqlite3
12 | # set up a database to demonstrate with
13 | data = [['a', 1],
14 |         ['b', 2],
15 |         ['c', 2.0]]
16 | connection = sqlite3.connect('example.db')
17 | c = connection.cursor()
18 | _ = c.execute('drop table if exists foobar')
19 | _ = c.execute('create table foobar (foo, bar)')
20 | for row in data:
21 |     _ = c.execute('insert into foobar values (?, ?)', row)
22 | 
23 | connection.commit()
24 | c.close()
25 | # now demonstrate the petl.fromsqlite3 function
26 | table = etl.fromsqlite3('example.db', 'select * from foobar')
27 | table
28 | 
29 | 
30 | # tosqlite3()
31 | ##############
32 | 
33 | os.remove('example.db')
34 | 
35 | import petl as etl
36 | table1 = [['foo', 'bar'],
37 |           ['a', 1],
38 |           ['b', 2],
39 |           ['c', 2]]
40 | _ = etl.tosqlite3(table1, 'example.db', 'foobar', create=True)
41 | # look what it did
42 | table2 = etl.fromsqlite3('example.db', 'select * from foobar')
43 | table2
44 | 


--------------------------------------------------------------------------------
/examples/io/text.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # fromtext()
 5 | ############
 6 | 
 7 | import petl as etl
 8 | # setup example file
 9 | text = 'a,1\nb,2\nc,2\n'
10 | with open('example.txt', 'w') as f:
11 |     f.write(text)
12 | 
13 | table1 = etl.fromtext('example.txt')
14 | table1
15 | # post-process, e.g., with capture()
16 | table2 = table1.capture('lines', '(.*),(.*)$', ['foo', 'bar'])
17 | table2
18 | 
19 | 
20 | # totext()
21 | ##########
22 | 
23 | import petl as etl
24 | table1 = [['foo', 'bar'],
25 |           ['a', 1],
26 |           ['b', 2],
27 |           ['c', 2]]
28 | prologue = '''{| class="wikitable"
29 | |-
30 | ! foo
31 | ! bar
32 | '''
33 | template = '''|-
34 | | {foo}
35 | | {bar}
36 | '''
37 | epilogue = '|}'
38 | etl.totext(table1, 'example.txt', template, prologue, epilogue)
39 | # see what we did
40 | print(open('example.txt').read())
41 | 


--------------------------------------------------------------------------------
/examples/io/whoosh.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, print_function, division
 3 | 
 4 | 
 5 | # fromtextindex()
 6 | #################
 7 | 
 8 | import petl as etl
 9 | import os
10 | # set up an index and load some documents via the Whoosh API
11 | from whoosh.index import create_in
12 | from whoosh.fields import *
13 | schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
14 | dirname = 'example.whoosh'
15 | if not os.path.exists(dirname):
16 |     os.mkdir(dirname)
17 | 
18 | index = create_in(dirname, schema)
19 | writer = index.writer()
20 | writer.add_document(title=u"First document", path=u"/a",
21 |                     content=u"This is the first document we've added!")
22 | writer.add_document(title=u"Second document", path=u"/b",
23 |                     content=u"The second one is even more interesting!")
24 | writer.commit()
25 | # extract documents as a table
26 | table = etl.fromtextindex(dirname)
27 | table
28 | 
29 | 
30 | # totextindex()
31 | ###############
32 | 
33 | import petl as etl
34 | import datetime
35 | import os
36 | # here is the table we want to load into an index
37 | table = (('f0', 'f1', 'f2', 'f3', 'f4'),
38 |          ('AAA', 12, 4.3, True, datetime.datetime.now()),
39 |          ('BBB', 6, 3.4, False, datetime.datetime(1900, 1, 31)),
40 |          ('CCC', 42, 7.8, True, datetime.datetime(2100, 12, 25)))
41 | # define a schema for the index
42 | from whoosh.fields import *
43 | schema = Schema(f0=TEXT(stored=True),
44 |                 f1=NUMERIC(int, stored=True),
45 |                 f2=NUMERIC(float, stored=True),
46 |                 f3=BOOLEAN(stored=True),
47 |                 f4=DATETIME(stored=True))
48 | # load index
49 | dirname = 'example.whoosh'
50 | if not os.path.exists(dirname):
51 |     os.mkdir(dirname)
52 | 
53 | etl.totextindex(table, dirname, schema=schema)
54 | 
55 | 
56 | # searchtextindex()
57 | ###################
58 | 
59 | import petl as etl
60 | import os
61 | # set up an index and load some documents via the Whoosh API
62 | from whoosh.index import create_in
63 | from whoosh.fields import *
64 | schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
65 | dirname = 'example.whoosh'
66 | if not os.path.exists(dirname):
67 |     os.mkdir(dirname)
68 | 
69 | index = create_in('example.whoosh', schema)
70 | writer = index.writer()
71 | writer.add_document(title=u"Oranges", path=u"/a",
72 |                     content=u"This is the first document we've added!")
73 | writer.add_document(title=u"Apples", path=u"/b",
74 |                     content=u"The second document is even more "
75 |                             u"interesting!")
76 | writer.commit()
77 | # demonstrate the use of searchtextindex()
78 | table1 = etl.searchtextindex('example.whoosh', 'oranges')
79 | table1
80 | table2 = etl.searchtextindex('example.whoosh', 'doc*')
81 | table2
82 | 


--------------------------------------------------------------------------------
/examples/io/xml.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | import petl as etl
 5 | # setup a file to demonstrate with
 6 | d = '''<table>
 7 |     <tr>
 8 |         <td>foo</td><td>bar</td>
 9 |     </tr>
10 |     <tr>
11 |         <td>a</td><td>1</td>
12 |     </tr>
13 |     <tr>
14 |         <td>b</td><td>2</td>
15 |     </tr>
16 |     <tr>
17 |         <td>c</td><td>2</td>
18 |     </tr>
19 | </table>'''
20 | with open('example1.xml', 'w') as f:
21 |     f.write(d)
22 | 
23 | table1 = etl.fromxml('example1.xml', 'tr', 'td')
24 | table1
25 | # if the data values are stored in an attribute, provide the attribute name
26 | # as an extra positional argument
27 | d = '''<table>
28 |     <tr>
29 |         <td v='foo'/><td v='bar'/>
30 |     </tr>
31 |     <tr>
32 |         <td v='a'/><td v='1'/>
33 |     </tr>
34 |     <tr>
35 |         <td v='b'/><td v='2'/>
36 |     </tr>
37 |     <tr>
38 |         <td v='c'/><td v='2'/>
39 |     </tr>
40 | </table>'''
41 | with open('example2.xml', 'w') as f:
42 |     f.write(d)
43 | 
44 | table2 = etl.fromxml('example2.xml', 'tr', 'td', 'v')
45 | table2
46 | # data values can also be extracted by providing a mapping of field
47 | # names to element paths
48 | d = '''<table>
49 |     <row>
50 |         <foo>a</foo><baz><bar v='1'/><bar v='3'/></baz>
51 |     </row>
52 |     <row>
53 |         <foo>b</foo><baz><bar v='2'/></baz>
54 |     </row>
55 |     <row>
56 |         <foo>c</foo><baz><bar v='2'/></baz>
57 |     </row>
58 | </table>'''
59 | with open('example3.xml', 'w') as f:
60 |     f.write(d)
61 | 
62 | table3 = etl.fromxml('example3.xml', 'row',
63 |                      {'foo': 'foo', 'bar': ('baz/bar', 'v')})
64 | table3
65 | 


--------------------------------------------------------------------------------
/examples/notes/.gitignore:
--------------------------------------------------------------------------------
1 | *.csv
2 | *.zip*
3 | 


--------------------------------------------------------------------------------
/examples/notes/20140424_example.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # <nbformat>3.0</nbformat>
 3 | 
 4 | # <codecell>
 5 | 
 6 | data = """type,price,quantity
 7 | Apples
 8 | Cortland,0.30,24
 9 | Red Delicious,0.40,24
10 | Oranges
11 | Navel,0.50,12
12 | """
13 | 
14 | # <codecell>
15 | 
16 | import petl.interactive as etl
17 | from petl.io import StringSource
18 | 
19 | # <codecell>
20 | 
21 | tbl1 = (etl
22 |     .fromcsv(StringSource(data))
23 | )
24 | tbl1
25 | 
26 | # <headingcell level=2>
27 | 
28 | # Option 1 - using existing petl functions
29 | 
30 | # <codecell>
31 | 
32 | def make_room_for_category(row):
33 |     if len(row) == 1:
34 |         return (row[0], 'X', 'X', 'X')
35 |     else:
36 |         return (None,) + tuple(row)
37 | 
38 | tbl2 = tbl1.rowmap(make_room_for_category, fields=['category', 'type', 'price', 'quantity'])
39 | tbl2
40 | 
41 | # <codecell>
42 | 
43 | tbl3 = tbl2.filldown()
44 | tbl3
45 | 
46 | # <codecell>
47 | 
48 | tbl4 = tbl3.ne('type', 'X')
49 | tbl4
50 | 
51 | # <headingcell level=2>
52 | 
53 | # Option 2 - custom transformer
54 | 
55 | # <codecell>
56 | 
57 | class CustomTransformer(object):
58 |     
59 |     def __init__(self, source):
60 |         self.source = source
61 |         
62 |     def __iter__(self):
63 |         it = iter(self.source)
64 |         
65 |         # construct new header
66 |         source_fields = it.next()
67 |         out_fields = ('category',) + tuple(source_fields)
68 |         yield out_fields
69 |         
70 |         # transform data
71 |         current_category = None
72 |         for row in it:
73 |             if len(row) == 1:
74 |                 current_category = row[0]
75 |             else:
76 |                 yield (current_category,) + tuple(row)
77 | 
78 | # <codecell>
79 | 
80 | tbl5 = CustomTransformer(tbl1)
81 | 
82 | # <codecell>
83 | 
84 | # just so it formats nicely as HTML in the notebook...
85 | etl.wrap(tbl5)
86 | 
87 | 


--------------------------------------------------------------------------------
/examples/notes/20150331 split null.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "data": {
 12 |       "text/plain": [
 13 |        "sys.version_info(major=3, minor=4, micro=2, releaselevel='final', serial=0)"
 14 |       ]
 15 |      },
 16 |      "execution_count": 1,
 17 |      "metadata": {},
 18 |      "output_type": "execute_result"
 19 |     }
 20 |    ],
 21 |    "source": [
 22 |     "import sys\n",
 23 |     "sys.version_info"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {
 30 |     "collapsed": false
 31 |    },
 32 |    "outputs": [
 33 |     {
 34 |      "data": {
 35 |       "text/plain": [
 36 |        "'1.0.6'"
 37 |       ]
 38 |      },
 39 |      "execution_count": 2,
 40 |      "metadata": {},
 41 |      "output_type": "execute_result"
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "import petl as etl\n",
 46 |     "etl.__version__"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 3,
 52 |    "metadata": {
 53 |     "collapsed": true
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "tbl1 = [['foo', 'bar'],\n",
 58 |     "        ['a b c', 1],\n",
 59 |     "        ['d e f', 2],\n",
 60 |     "        [None, 3]]"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 4,
 66 |    "metadata": {
 67 |     "collapsed": false
 68 |    },
 69 |    "outputs": [
 70 |     {
 71 |      "data": {
 72 |       "text/html": [
 73 |        "<table class='petl'>\n",
 74 |        "<thead>\n",
 75 |        "<tr>\n",
 76 |        "<th>bar</th>\n",
 77 |        "<th>x</th>\n",
 78 |        "<th>y</th>\n",
 79 |        "<th>z</th>\n",
 80 |        "</tr>\n",
 81 |        "</thead>\n",
 82 |        "<tbody>\n",
 83 |        "<tr>\n",
 84 |        "<td style='text-align: right'>1</td>\n",
 85 |        "<td>a</td>\n",
 86 |        "<td>b</td>\n",
 87 |        "<td>c</td>\n",
 88 |        "</tr>\n",
 89 |        "<tr>\n",
 90 |        "<td style='text-align: right'>2</td>\n",
 91 |        "<td>d</td>\n",
 92 |        "<td>e</td>\n",
 93 |        "<td>f</td>\n",
 94 |        "</tr>\n",
 95 |        "<tr>\n",
 96 |        "<td style='text-align: right'>3</td>\n",
 97 |        "<td>None</td>\n",
 98 |        "<td>None</td>\n",
 99 |        "<td>None</td>\n",
100 |        "</tr>\n",
101 |        "</tbody>\n",
102 |        "</table>\n"
103 |       ],
104 |       "text/plain": [
105 |        "+-----+------+------+------+\n",
106 |        "| bar | x    | y    | z    |\n",
107 |        "+=====+======+======+======+\n",
108 |        "|   1 | 'a'  | 'b'  | 'c'  |\n",
109 |        "+-----+------+------+------+\n",
110 |        "|   2 | 'd'  | 'e'  | 'f'  |\n",
111 |        "+-----+------+------+------+\n",
112 |        "|   3 | None | None | None |\n",
113 |        "+-----+------+------+------+"
114 |       ]
115 |      },
116 |      "execution_count": 4,
117 |      "metadata": {},
118 |      "output_type": "execute_result"
119 |     }
120 |    ],
121 |    "source": [
122 |     "tbl2 = etl.wrap(tbl1).replace('foo', None, '  ').split('foo', ' ', ['x', 'y', 'z']).replaceall('', None)\n",
123 |     "tbl2"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {
130 |     "collapsed": true
131 |    },
132 |    "outputs": [],
133 |    "source": []
134 |   }
135 |  ],
136 |  "metadata": {
137 |   "kernelspec": {
138 |    "display_name": "Python 3",
139 |    "language": "python",
140 |    "name": "python3"
141 |   },
142 |   "language_info": {
143 |    "codemirror_mode": {
144 |     "name": "ipython",
145 |     "version": 3
146 |    },
147 |    "file_extension": ".py",
148 |    "mimetype": "text/x-python",
149 |    "name": "python",
150 |    "nbconvert_exporter": "python",
151 |    "pygments_lexer": "ipython3",
152 |    "version": "3.4.2"
153 |   }
154 |  },
155 |  "nbformat": 4,
156 |  "nbformat_minor": 0
157 | }
158 | 


--------------------------------------------------------------------------------
/examples/notes/issue_219.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # <nbformat>3.0</nbformat>
 3 | 
 4 | # <headingcell level=1>
 5 | 
 6 | # Using server-side cursors with PostgreSQL and MySQL 
 7 | 
 8 | # <codecell>
 9 | 
10 | # see http://pynash.org/2013/03/06/timing-and-profiling.html for setup of profiling magics
11 | 
12 | # <codecell>
13 | 
14 | import MySQLdb
15 | import psycopg2
16 | 
17 | import petl
18 | from petl.fluent import etl
19 | 
20 | # <codecell>
21 | print(petl.VERSION)
22 | tbl_dummy_data = etl().dummytable(100000)
23 | tbl_dummy_data.look()
24 | 
25 | # <codecell>
26 | 
27 | print(tbl_dummy_data.nrows())
28 | 
29 | # <headingcell level=2>
30 | 
31 | # PostgreSQL
32 | 
33 | # <codecell>
34 | 
35 | psql_connection = psycopg2.connect(host='localhost', dbname='petl', user='petl', password='petl')
36 | 
37 | # <codecell>
38 | 
39 | cursor = psql_connection.cursor()
40 | cursor.execute('DROP TABLE IF EXISTS issue_219;')
41 | cursor.execute('CREATE TABLE issue_219 (foo INTEGER, bar TEXT, baz FLOAT);')
42 | 
43 | # <codecell>
44 | 
45 | tbl_dummy_data.progress(10000).todb(psql_connection, 'issue_219')
46 | 
47 | # <codecell>
48 | 
49 | # memory usage using default cursor
50 | print(etl.fromdb(psql_connection, 'select * from issue_219 order by foo').look(2))
51 | 
52 | # <codecell>
53 | 
54 | # memory usage using server-side cursor
55 | print(etl.fromdb(lambda: psql_connection.cursor(name='server-side'), 'select * from issue_219 order by foo').look(2))
56 | 
57 | # <headingcell level=2>
58 | 
59 | # MySQL
60 | 
61 | # <codecell>
62 | 
63 | mysql_connection = MySQLdb.connect(host='127.0.0.1', db='petl', user='petl', passwd='petl')
64 | 
65 | # <codecell>
66 | 
67 | cursor = mysql_connection.cursor()
68 | cursor.execute('SET SQL_MODE=ANSI_QUOTES')
69 | cursor.execute('DROP TABLE IF EXISTS issue_219;')
70 | cursor.execute('CREATE TABLE issue_219 (foo INTEGER, bar TEXT, baz FLOAT);')
71 | 
72 | # <codecell>
73 | 
74 | tbl_dummy_data.progress(10000).todb(mysql_connection, 'issue_219')
75 | 
76 | # <codecell>
77 | 
78 | # memory usage with default cursor
79 | print(etl.fromdb(mysql_connection, 'select * from issue_219 order by foo').look(2))
80 | 
81 | # <codecell>
82 | 
83 | # memory usage with server-side cursor
84 | print(etl.fromdb(lambda: mysql_connection.cursor(MySQLdb.cursors.SSCursor), 'select * from issue_219 order by foo').look(2))
85 | 
86 | 


--------------------------------------------------------------------------------
/examples/notes/issue_256.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # <nbformat>3.0</nbformat>
 3 | 
 4 | # <markdowncell>
 5 | 
 6 | # Notes supporting [issue #256](https://github.com/alimanfoo/petl/issues/256).
 7 | 
 8 | # <codecell>
 9 | 
10 | import petl.interactive as etl
11 | 
12 | # <codecell>
13 | 
14 | t1 = etl.wrap([['foo', 'bar'], [1, 'a'], [2, 'b']])
15 | t1
16 | 
17 | # <codecell>
18 | 
19 | t2 = etl.wrap([['foo', 'bar'], [1, 'a'], [2, 'c']])
20 | t2
21 | 
22 | # <codecell>
23 | 
24 | t3 = etl.merge(t1, t2, key='foo')
25 | t3
26 | 
27 | # <markdowncell>
28 | 
29 | # The problem with the above is that you cannot tell from inspecting *t3* alone which conflicting value comes from which source.
30 | # 
31 | # A workaround as suggested by [@pawl](https://github.com/pawl) is to use the [*conflicts()*](http://petl.readthedocs.org/en/latest/#petl.conflicts) function, e.g.: 
32 | 
33 | # <codecell>
34 | 
35 | t4 = (etl
36 |     .cat(
37 |         t1.addfield('source', 1),
38 |         t2.addfield('source', 2)
39 |     )
40 |     .conflicts(key='foo', exclude='source')
41 | )
42 | t4
43 | 
44 | 


--------------------------------------------------------------------------------
/examples/transform/conversions.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # convert()
 5 | ###########
 6 | 
 7 | import petl as etl
 8 | table1 = [['foo', 'bar', 'baz'],
 9 |           ['A', '2.4', 12],
10 |           ['B', '5.7', 34],
11 |           ['C', '1.2', 56]]
12 | # using a built-in function:
13 | table2 = etl.convert(table1, 'bar', float)
14 | table2
15 | # using a lambda function::
16 | table3 = etl.convert(table1, 'baz', lambda v: v*2)
17 | table3
18 | # a method of the data value can also be invoked by passing
19 | # the method name
20 | table4 = etl.convert(table1, 'foo', 'lower')
21 | table4
22 | # arguments to the method invocation can also be given
23 | table5 = etl.convert(table1, 'foo', 'replace', 'A', 'AA')
24 | table5
25 | # values can also be translated via a dictionary
26 | table7 = etl.convert(table1, 'foo', {'A': 'Z', 'B': 'Y'})
27 | table7
28 | # the same conversion can be applied to multiple fields
29 | table8 = etl.convert(table1, ('foo', 'bar', 'baz'), str)
30 | table8
31 | # multiple conversions can be specified at the same time
32 | table9 = etl.convert(table1, {'foo': 'lower',
33 |                               'bar': float,
34 |                               'baz': lambda v: v * 2})
35 | table9
36 | # ...or alternatively via a list
37 | table10 = etl.convert(table1, ['lower', float, lambda v: v*2])
38 | table10
39 | # conversion can be conditional
40 | table11 = etl.convert(table1, 'baz', lambda v: v * 2,
41 |                       where=lambda r: r.foo == 'B')
42 | table11
43 | # conversion can access other values from the same row
44 | table12 = etl.convert(table1, 'baz',
45 |                       lambda v, row: v * float(row.bar),
46 |                       pass_row=True)
47 | table12
48 | 
49 | 
50 | # convertnumbers()
51 | ##################
52 | 
53 | import petl as etl
54 | table1 = [['foo', 'bar', 'baz', 'quux'],
55 |           ['1', '3.0', '9+3j', 'aaa'],
56 |           ['2', '1.3', '7+2j', None]]
57 | table2 = etl.convertnumbers(table1)
58 | table2
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/examples/transform/dedup.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # duplicates()
 5 | ##############
 6 | 
 7 | import petl as etl
 8 | table1 = [['foo', 'bar', 'baz'],
 9 |           ['A', 1, 2.0],
10 |           ['B', 2, 3.4],
11 |           ['D', 6, 9.3],
12 |           ['B', 3, 7.8],
13 |           ['B', 2, 12.3],
14 |           ['E', None, 1.3],
15 |           ['D', 4, 14.5]]
16 | table2 = etl.duplicates(table1, 'foo')
17 | table2
18 | # compound keys are supported
19 | table3 = etl.duplicates(table1, key=['foo', 'bar'])
20 | table3
21 | 
22 | 
23 | # unique()
24 | ##########
25 | 
26 | import petl as etl
27 | table1 = [['foo', 'bar', 'baz'],
28 |           ['A', 1, 2],
29 |           ['B', '2', '3.4'],
30 |           ['D', 'xyz', 9.0],
31 |           ['B', u'3', u'7.8'],
32 |           ['B', '2', 42],
33 |           ['E', None, None],
34 |           ['D', 4, 12.3],
35 |           ['F', 7, 2.3]]
36 | table2 = etl.unique(table1, 'foo')
37 | table2
38 | 
39 | 
40 | # conflicts()
41 | #############
42 | 
43 | import petl as etl
44 | table1 = [['foo', 'bar', 'baz'],
45 |           ['A', 1, 2.7],
46 |           ['B', 2, None],
47 |           ['D', 3, 9.4],
48 |           ['B', None, 7.8],
49 |           ['E', None],
50 |           ['D', 3, 12.3],
51 |           ['A', 2, None]]
52 | table2 = etl.conflicts(table1, 'foo')
53 | table2
54 | 
55 | 
56 | # isunique()
57 | ############
58 | 
59 | import petl as etl
60 | table1 = [['foo', 'bar'],
61 |           ['a', 1],
62 |           ['b'],
63 |           ['b', 2],
64 |           ['c', 3, True]]
65 | etl.isunique(table1, 'foo')
66 | etl.isunique(table1, 'bar')
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/examples/transform/fills.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # filldown()
 5 | ############
 6 | 
 7 | import petl as etl
 8 | table1 = [['foo', 'bar', 'baz'],
 9 |           [1, 'a', None],
10 |           [1, None, .23],
11 |           [1, 'b', None],
12 |           [2, None, None],
13 |           [2, None, .56],
14 |           [2, 'c', None],
15 |           [None, 'c', .72]]
16 | table2 = etl.filldown(table1)
17 | table2.lookall()
18 | table3 = etl.filldown(table1, 'bar')
19 | table3.lookall()
20 | table4 = etl.filldown(table1, 'bar', 'baz')
21 | table4.lookall()
22 | 
23 | 
24 | # fillright()
25 | #############
26 | 
27 | import petl as etl
28 | table1 = [['foo', 'bar', 'baz'],
29 |           [1, 'a', None],
30 |           [1, None, .23],
31 |           [1, 'b', None],
32 |           [2, None, None],
33 |           [2, None, .56],
34 |           [2, 'c', None],
35 |           [None, 'c', .72]]
36 | table2 = etl.fillright(table1)
37 | table2.lookall()
38 | 
39 | 
40 | # fillleft()
41 | ############
42 | 
43 | import petl as etl
44 | table1 = [['foo', 'bar', 'baz'],
45 |           [1, 'a', None],
46 |           [1, None, .23],
47 |           [1, 'b', None],
48 |           [2, None, None],
49 |           [2, None, .56],
50 |           [2, 'c', None],
51 |           [None, 'c', .72]]
52 | table2 = etl.fillleft(table1)
53 | table2.lookall()
54 | 


--------------------------------------------------------------------------------
/examples/transform/headers.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # rename()
 5 | ##########
 6 | 
 7 | import petl as etl
 8 | table1 = [['sex', 'age'],
 9 |           ['m', 12],
10 |           ['f', 34],
11 |           ['-', 56]]
12 | # rename a single field
13 | table2 = etl.rename(table1, 'sex', 'gender')
14 | table2
15 | # rename multiple fields by passing a dictionary as the second argument
16 | table3 = etl.rename(table1, {'sex': 'gender', 'age': 'age_years'})
17 | table3
18 | 
19 | 
20 | # setheader()
21 | #############
22 | 
23 | import petl as etl
24 | table1 = [['foo', 'bar'],
25 |           ['a', 1],
26 |           ['b', 2]]
27 | table2 = etl.setheader(table1, ['foofoo', 'barbar'])
28 | table2
29 | 
30 | 
31 | # extendheader()
32 | ################
33 | 
34 | import petl as etl
35 | table1 = [['foo'],
36 |           ['a', 1, True],
37 |           ['b', 2, False]]
38 | table2 = etl.extendheader(table1, ['bar', 'baz'])
39 | table2
40 | 
41 | 
42 | # pushheader()
43 | ##############
44 | 
45 | import petl as etl
46 | table1 = [['a', 1],
47 |           ['b', 2]]
48 | table2 = etl.pushheader(table1, ['foo', 'bar'])
49 | table2
50 | 
51 | 
52 | # skip()
53 | #########
54 | 
55 | import petl as etl
56 | table1 = [['#aaa', 'bbb', 'ccc'],
57 |           ['#mmm'],
58 |           ['foo', 'bar'],
59 |           ['a', 1],
60 |           ['b', 2]]
61 | table2 = etl.skip(table1, 2)
62 | table2
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/examples/transform/intervals.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, print_function, absolute_import
  2 | 
  3 | 
  4 | # intervallookup()
  5 | ##################
  6 | 
  7 | import petl as etl
  8 | table = [['start', 'stop', 'value'],
  9 |          [1, 4, 'foo'],
 10 |          [3, 7, 'bar'],
 11 |          [4, 9, 'baz']]
 12 | lkp = etl.intervallookup(table, 'start', 'stop')
 13 | lkp.search(0, 1)
 14 | lkp.search(1, 2)
 15 | lkp.search(2, 4)
 16 | lkp.search(2, 5)
 17 | lkp.search(9, 14)
 18 | lkp.search(19, 140)
 19 | lkp.search(0)
 20 | lkp.search(1)
 21 | lkp.search(2)
 22 | lkp.search(4)
 23 | lkp.search(5)
 24 | 
 25 | import petl as etl
 26 | table = [['start', 'stop', 'value'],
 27 |          [1, 4, 'foo'],
 28 |          [3, 7, 'bar'],
 29 |          [4, 9, 'baz']]
 30 | lkp = etl.intervallookup(table, 'start', 'stop', include_stop=True,
 31 |                          value='value')
 32 | lkp.search(0, 1)
 33 | lkp.search(1, 2)
 34 | lkp.search(2, 4)
 35 | lkp.search(2, 5)
 36 | lkp.search(9, 14)
 37 | lkp.search(19, 140)
 38 | lkp.search(0)
 39 | lkp.search(1)
 40 | lkp.search(2)
 41 | lkp.search(4)
 42 | lkp.search(5)
 43 | 
 44 | 
 45 | # intervallookupone()
 46 | #####################
 47 | 
 48 | import petl as etl
 49 | table = [['start', 'stop', 'value'],
 50 |          [1, 4, 'foo'],
 51 |          [3, 7, 'bar'],
 52 |          [4, 9, 'baz']]
 53 | lkp = etl.intervallookupone(table, 'start', 'stop', strict=False)
 54 | lkp.search(0, 1)
 55 | lkp.search(1, 2)
 56 | lkp.search(2, 4)
 57 | lkp.search(2, 5)
 58 | lkp.search(9, 14)
 59 | lkp.search(19, 140)
 60 | lkp.search(0)
 61 | lkp.search(1)
 62 | lkp.search(2)
 63 | lkp.search(4)
 64 | lkp.search(5)
 65 | 
 66 | 
 67 | # facetintervallookup()
 68 | #######################
 69 | 
 70 | import petl as etl
 71 | table = (('type', 'start', 'stop', 'value'),
 72 |          ('apple', 1, 4, 'foo'),
 73 |          ('apple', 3, 7, 'bar'),
 74 |          ('orange', 4, 9, 'baz'))
 75 | lkp = etl.facetintervallookup(table, key='type', start='start', stop='stop')
 76 | lkp['apple'].search(1, 2)
 77 | lkp['apple'].search(2, 4)
 78 | lkp['apple'].search(2, 5)
 79 | lkp['orange'].search(2, 5)
 80 | lkp['orange'].search(9, 14)
 81 | lkp['orange'].search(19, 140)
 82 | lkp['apple'].search(1)
 83 | lkp['apple'].search(2)
 84 | lkp['apple'].search(4)
 85 | lkp['apple'].search(5)
 86 | lkp['orange'].search(5)
 87 | 
 88 | 
 89 | # intervaljoin()
 90 | ################
 91 | 
 92 | import petl as etl
 93 | left = [['begin', 'end', 'quux'],
 94 |         [1, 2, 'a'],
 95 |         [2, 4, 'b'],
 96 |         [2, 5, 'c'],
 97 |         [9, 14, 'd'],
 98 |         [1, 1, 'e'],
 99 |         [10, 10, 'f']]
100 | right = [['start', 'stop', 'value'],
101 |          [1, 4, 'foo'],
102 |          [3, 7, 'bar'],
103 |          [4, 9, 'baz']]
104 | table1 = etl.intervaljoin(left, right, 
105 |                           lstart='begin', lstop='end', 
106 |                           rstart='start', rstop='stop')
107 | table1.lookall()
108 | # include stop coordinate in intervals
109 | table2 = etl.intervaljoin(left, right, 
110 |                           lstart='begin', lstop='end', 
111 |                           rstart='start', rstop='stop',
112 |                           include_stop=True)
113 | table2.lookall()
114 | 
115 | # with facet key
116 | import petl as etl
117 | left = (('fruit', 'begin', 'end'),
118 |         ('apple', 1, 2),
119 |         ('apple', 2, 4),
120 |         ('apple', 2, 5),
121 |         ('orange', 2, 5),
122 |         ('orange', 9, 14),
123 |         ('orange', 19, 140),
124 |         ('apple', 1, 1))
125 | right = (('type', 'start', 'stop', 'value'),
126 |          ('apple', 1, 4, 'foo'),
127 |          ('apple', 3, 7, 'bar'),
128 |          ('orange', 4, 9, 'baz'))
129 | table3 = etl.intervaljoin(left, right,
130 |                           lstart='begin', lstop='end', lkey='fruit',
131 |                           rstart='start', rstop='stop', rkey='type')
132 | table3.lookall()
133 | 
134 | # intervalleftjoin()
135 | ####################
136 | 
137 | import petl as etl
138 | left = [['begin', 'end', 'quux'],
139 |         [1, 2, 'a'],
140 |         [2, 4, 'b'],
141 |         [2, 5, 'c'],
142 |         [9, 14, 'd'],
143 |         [1, 1, 'e'],
144 |         [10, 10, 'f']]
145 | right = [['start', 'stop', 'value'],
146 |          [1, 4, 'foo'],
147 |          [3, 7, 'bar'],
148 |          [4, 9, 'baz']]
149 | table1 = etl.intervalleftjoin(left, right,
150 |                               lstart='begin', lstop='end',
151 |                               rstart='start', rstop='stop')
152 | table1.lookall()
153 | 


--------------------------------------------------------------------------------
/examples/transform/joins.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | 
  3 | 
  4 | # join()
  5 | ########
  6 | 
  7 | import petl as etl
  8 | table1 = [['id', 'colour'],
  9 |           [1, 'blue'],
 10 |           [2, 'red'],
 11 |           [3, 'purple']]
 12 | table2 = [['id', 'shape'],
 13 |           [1, 'circle'],
 14 |           [3, 'square'],
 15 |           [4, 'ellipse']]
 16 | table3 = etl.join(table1, table2, key='id')
 17 | table3
 18 | # if no key is given, a natural join is tried
 19 | table4 = etl.join(table1, table2)
 20 | table4
 21 | # note behaviour if the key is not unique in either or both tables
 22 | table5 = [['id', 'colour'],
 23 |           [1, 'blue'],
 24 |           [1, 'red'],
 25 |           [2, 'purple']]
 26 | table6 = [['id', 'shape'],
 27 |           [1, 'circle'],
 28 |           [1, 'square'],
 29 |           [2, 'ellipse']]
 30 | table7 = etl.join(table5, table6, key='id')
 31 | table7
 32 | # compound keys are supported
 33 | table8 = [['id', 'time', 'height'],
 34 |           [1, 1, 12.3],
 35 |           [1, 2, 34.5],
 36 |           [2, 1, 56.7]]
 37 | table9 = [['id', 'time', 'weight'],
 38 |           [1, 2, 4.5],
 39 |           [2, 1, 6.7],
 40 |           [2, 2, 8.9]]
 41 | table10 = etl.join(table8, table9, key=['id', 'time'])
 42 | table10
 43 | 
 44 | 
 45 | # leftjoin()
 46 | ############
 47 | 
 48 | import petl as etl
 49 | table1 = [['id', 'colour'],
 50 |           [1, 'blue'],
 51 |           [2, 'red'],
 52 |           [3, 'purple']]
 53 | table2 = [['id', 'shape'],
 54 |           [1, 'circle'],
 55 |           [3, 'square'],
 56 |           [4, 'ellipse']]
 57 | table3 = etl.leftjoin(table1, table2, key='id')
 58 | table3
 59 | 
 60 | 
 61 | # rightjoin()
 62 | #############
 63 | 
 64 | import petl as etl
 65 | table1 = [['id', 'colour'],
 66 |           [1, 'blue'],
 67 |           [2, 'red'],
 68 |           [3, 'purple']]
 69 | table2 = [['id', 'shape'],
 70 |           [1, 'circle'],
 71 |           [3, 'square'],
 72 |           [4, 'ellipse']]
 73 | table3 = etl.rightjoin(table1, table2, key='id')
 74 | table3
 75 | 
 76 | 
 77 | # outerjoin()
 78 | #############
 79 | 
 80 | import petl as etl
 81 | table1 = [['id', 'colour'],
 82 |           [1, 'blue'],
 83 |           [2, 'red'],
 84 |           [3, 'purple']]
 85 | table2 = [['id', 'shape'],
 86 |           [1, 'circle'],
 87 |           [3, 'square'],
 88 |           [4, 'ellipse']]
 89 | table3 = etl.outerjoin(table1, table2, key='id')
 90 | table3
 91 | 
 92 | 
 93 | # crossjoin()
 94 | #############
 95 | 
 96 | import petl as etl
 97 | table1 = [['id', 'colour'],
 98 |           [1, 'blue'],
 99 |           [2, 'red']]
100 | table2 = [['id', 'shape'],
101 |           [1, 'circle'],
102 |           [3, 'square']]
103 | table3 = etl.crossjoin(table1, table2)
104 | table3
105 | 
106 | 
107 | # antijoin()
108 | ############
109 | 
110 | import petl as etl
111 | table1 = [['id', 'colour'],
112 |           [0, 'black'],
113 |           [1, 'blue'],
114 |           [2, 'red'],
115 |           [4, 'yellow'],
116 |           [5, 'white']]
117 | table2 = [['id', 'shape'],
118 |           [1, 'circle'],
119 |           [3, 'square']]
120 | table3 = etl.antijoin(table1, table2, key='id')
121 | table3
122 | 
123 | 
124 | # lookupjoin()
125 | ##############
126 | 
127 | import petl as etl
128 | table1 = [['id', 'color', 'cost'], 
129 |           [1, 'blue', 12], 
130 |           [2, 'red', 8], 
131 |           [3, 'purple', 4]]
132 | table2 = [['id', 'shape', 'size'], 
133 |           [1, 'circle', 'big'], 
134 |           [1, 'circle', 'small'], 
135 |           [2, 'square', 'tiny'], 
136 |           [2, 'square', 'big'], 
137 |           [3, 'ellipse', 'small'], 
138 |           [3, 'ellipse', 'tiny']]
139 | table3 = etl.lookupjoin(table1, table2, key='id')
140 | table3
141 | 
142 | 
143 | # unjoin()
144 | ##########
145 | 
146 | import petl as etl
147 | # join key is present in the table
148 | table1 = (('foo', 'bar', 'baz'),
149 |           ('A', 1, 'apple'),
150 |           ('B', 1, 'apple'),
151 |           ('C', 2, 'orange'))
152 | table2, table3 = etl.unjoin(table1, 'baz', key='bar')
153 | table2
154 | table3
155 | # an integer join key can also be reconstructed
156 | table4 = (('foo', 'bar'),
157 |           ('A', 'apple'),
158 |           ('B', 'apple'),
159 |           ('C', 'orange'))
160 | table5, table6 = etl.unjoin(table4, 'bar')
161 | table5
162 | table6
163 | 
164 | 
165 | 
166 | 
167 | 
168 | 


--------------------------------------------------------------------------------
/examples/transform/maps.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | # fieldmap()
 5 | ############
 6 | 
 7 | import petl as etl
 8 | from collections import OrderedDict
 9 | table1 = [['id', 'sex', 'age', 'height', 'weight'],
10 |           [1, 'male', 16, 1.45, 62.0],
11 |           [2, 'female', 19, 1.34, 55.4],
12 |           [3, 'female', 17, 1.78, 74.4],
13 |           [4, 'male', 21, 1.33, 45.2],
14 |           [5, '-', 25, 1.65, 51.9]]
15 | mappings = OrderedDict()
16 | # rename a field
17 | mappings['subject_id'] = 'id'
18 | # translate a field
19 | mappings['gender'] = 'sex', {'male': 'M', 'female': 'F'}
20 | # apply a calculation to a field
21 | mappings['age_months'] = 'age', lambda v: v * 12
22 | # apply a calculation to a combination of fields
23 | mappings['bmi'] = lambda rec: rec['weight'] / rec['height']**2
24 | # transform and inspect the output
25 | table2 = etl.fieldmap(table1, mappings)
26 | table2
27 | 
28 | 
29 | # rowmap()
30 | ##########
31 | 
32 | 
33 | import petl as etl
34 | table1 = [['id', 'sex', 'age', 'height', 'weight'],
35 |           [1, 'male', 16, 1.45, 62.0],
36 |           [2, 'female', 19, 1.34, 55.4],
37 |           [3, 'female', 17, 1.78, 74.4],
38 |           [4, 'male', 21, 1.33, 45.2],
39 |           [5, '-', 25, 1.65, 51.9]]
40 | def rowmapper(row):
41 |     transmf = {'male': 'M', 'female': 'F'}
42 |     return [row[0],
43 |             transmf[row['sex']] if row['sex'] in transmf else None,
44 |             row.age * 12,
45 |             row.height / row.weight ** 2]
46 | 
47 | table2 = etl.rowmap(table1, rowmapper,
48 |                     fields=['subject_id', 'gender', 'age_months', 'bmi'])
49 | table2
50 | 
51 | 
52 | # rowmapmany()
53 | ##############
54 | 
55 | import petl as etl
56 | table1 = [['id', 'sex', 'age', 'height', 'weight'],
57 |           [1, 'male', 16, 1.45, 62.0],
58 |           [2, 'female', 19, 1.34, 55.4],
59 |           [3, '-', 17, 1.78, 74.4],
60 |           [4, 'male', 21, 1.33]]
61 | def rowgenerator(row):
62 |     transmf = {'male': 'M', 'female': 'F'}
63 |     yield [row[0], 'gender',
64 |            transmf[row['sex']] if row['sex'] in transmf else None]
65 |     yield [row[0], 'age_months', row.age * 12]
66 |     yield [row[0], 'bmi', row.height / row.weight ** 2]
67 | 
68 | table2 = etl.rowmapmany(table1, rowgenerator,
69 |                         fields=['subject_id', 'variable', 'value'])
70 | table2.lookall()
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/examples/transform/reductions.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | 
  3 | 
  4 | # rowreduce()
  5 | #############
  6 | 
  7 | import petl as etl
  8 | table1 = [['foo', 'bar'],
  9 |           ['a', 3],
 10 |           ['a', 7],
 11 |           ['b', 2],
 12 |           ['b', 1],
 13 |           ['b', 9],
 14 |           ['c', 4]]
 15 | def sumbar(key, rows):
 16 |     return [key, sum(row[1] for row in rows)]
 17 | 
 18 | table2 = etl.rowreduce(table1, key='foo', reducer=sumbar,
 19 |                        fields=['foo', 'barsum'])
 20 | table2
 21 | 
 22 | 
 23 | # aggregate()
 24 | #############
 25 | 
 26 | import petl as etl
 27 | 
 28 | table1 = [['foo', 'bar', 'baz'],
 29 |           ['a', 3, True],
 30 |           ['a', 7, False],
 31 |           ['b', 2, True],
 32 |           ['b', 2, False],
 33 |           ['b', 9, False],
 34 |           ['c', 4, True]]
 35 | # aggregate whole rows
 36 | table2 = etl.aggregate(table1, 'foo', len)
 37 | table2
 38 | # aggregate single field
 39 | table3 = etl.aggregate(table1, 'foo', sum, 'bar')
 40 | table3
 41 | # alternative signature using keyword args
 42 | table4 = etl.aggregate(table1, key=('foo', 'bar'),
 43 |                        aggregation=list, value=('bar', 'baz'))
 44 | table4
 45 | # aggregate multiple fields
 46 | from collections import OrderedDict
 47 | import petl as etl
 48 | 
 49 | aggregation = OrderedDict()
 50 | aggregation['count'] = len
 51 | aggregation['minbar'] = 'bar', min
 52 | aggregation['maxbar'] = 'bar', max
 53 | aggregation['sumbar'] = 'bar', sum
 54 | # default aggregation function is list
 55 | aggregation['listbar'] = 'bar'
 56 | aggregation['listbarbaz'] = ('bar', 'baz'), list
 57 | aggregation['bars'] = 'bar', etl.strjoin(', ')
 58 | table5 = etl.aggregate(table1, 'foo', aggregation)
 59 | table5
 60 | 
 61 | 
 62 | # mergeduplicates()
 63 | ###################
 64 | 
 65 | import petl as etl
 66 | table1 = [['foo', 'bar', 'baz'],
 67 |           ['A', 1, 2.7],
 68 |           ['B', 2, None],
 69 |           ['D', 3, 9.4],
 70 |           ['B', None, 7.8],
 71 |           ['E', None, 42.],
 72 |           ['D', 3, 12.3],
 73 |           ['A', 2, None]]
 74 | table2 = etl.mergeduplicates(table1, 'foo')
 75 | table2
 76 | 
 77 | 
 78 | # merge()
 79 | #########
 80 | 
 81 | import petl as etl
 82 | table1 = [['foo', 'bar', 'baz'],
 83 |           [1, 'A', True],
 84 |           [2, 'B', None],
 85 |           [4, 'C', True]]
 86 | table2 = [['bar', 'baz', 'quux'],
 87 |           ['A', True, 42.0],
 88 |           ['B', False, 79.3],
 89 |           ['C', False, 12.4]]
 90 | table3 = etl.merge(table1, table2, key='bar')
 91 | table3
 92 | 
 93 | 
 94 | # fold()
 95 | ########
 96 | 
 97 | import petl as etl
 98 | table1 = [['id', 'count'], 
 99 |           [1, 3], 
100 |           [1, 5],
101 |           [2, 4], 
102 |           [2, 8]]        
103 | import operator
104 | table2 = etl.fold(table1, 'id', operator.add, 'count',
105 |                   presorted=True)
106 | table2
107 | 


--------------------------------------------------------------------------------
/examples/transform/regex.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | # capture()
 5 | ############
 6 | 
 7 | import petl as etl
 8 | table1 = [['id', 'variable', 'value'],
 9 |           ['1', 'A1', '12'],
10 |           ['2', 'A2', '15'],
11 |           ['3', 'B1', '18'],
12 |           ['4', 'C12', '19']]
13 | table2 = etl.capture(table1, 'variable', '(\\w)(\\d+)',
14 |                      ['treat', 'time'])
15 | table2
16 | # using the include_original argument
17 | table3 = etl.capture(table1, 'variable', '(\\w)(\\d+)',
18 |                      ['treat', 'time'],
19 |                      include_original=True)
20 | table3
21 | 
22 | 
23 | # split()
24 | #########
25 | 
26 | import petl as etl
27 | table1 = [['id', 'variable', 'value'],
28 |           ['1', 'parad1', '12'],
29 |           ['2', 'parad2', '15'],
30 |           ['3', 'tempd1', '18'],
31 |           ['4', 'tempd2', '19']]
32 | table2 = etl.split(table1, 'variable', 'd', ['variable', 'day'])
33 | table2
34 | 
35 | 
36 | # search()
37 | ##########
38 | 
39 | import petl as etl
40 | table1 = [['foo', 'bar', 'baz'],
41 |           ['orange', 12, 'oranges are nice fruit'],
42 |           ['mango', 42, 'I like them'],
43 |           ['banana', 74, 'lovely too'],
44 |           ['cucumber', 41, 'better than mango']]
45 | # search any field
46 | table2 = etl.search(table1, '.g.')
47 | table2
48 | # search a specific field
49 | table3 = etl.search(table1, 'foo', '.g.')
50 | table3
51 | 
52 | 
53 | # searchcomplement()
54 | ####################
55 | 
56 | import petl as etl
57 | table1 = [['foo', 'bar', 'baz'],
58 |           ['orange', 12, 'oranges are nice fruit'],
59 |           ['mango', 42, 'I like them'],
60 |           ['banana', 74, 'lovely too'],
61 |           ['cucumber', 41, 'better than mango']]
62 | # search any field
63 | table2 = etl.searchcomplement(table1, '.g.')
64 | table2
65 | # search a specific field
66 | table3 = etl.searchcomplement(table1, 'foo', '.g.')
67 | table3
68 | 
69 | 


--------------------------------------------------------------------------------
/examples/transform/reshape.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | 
  3 | 
  4 | # melt()
  5 | ########
  6 | 
  7 | import petl as etl
  8 | table1 = [['id', 'gender', 'age'],
  9 |           [1, 'F', 12],
 10 |           [2, 'M', 17],
 11 |           [3, 'M', 16]]
 12 | table2 = etl.melt(table1, 'id')
 13 | table2.lookall()
 14 | # compound keys are supported
 15 | table3 = [['id', 'time', 'height', 'weight'],
 16 |           [1, 11, 66.4, 12.2],
 17 |           [2, 16, 53.2, 17.3],
 18 |           [3, 12, 34.5, 9.4]]
 19 | table4 = etl.melt(table3, key=['id', 'time'])
 20 | table4.lookall()
 21 | # a subset of variable fields can be selected
 22 | table5 = etl.melt(table3, key=['id', 'time'],
 23 |                   variables=['height'])
 24 | table5.lookall()
 25 | 
 26 | 
 27 | # recast()
 28 | ##########
 29 | 
 30 | import petl as etl
 31 | table1 = [['id', 'variable', 'value'],
 32 |           [3, 'age', 16],
 33 |           [1, 'gender', 'F'],
 34 |           [2, 'gender', 'M'],
 35 |           [2, 'age', 17],
 36 |           [1, 'age', 12],
 37 |           [3, 'gender', 'M']]
 38 | table2 = etl.recast(table1)
 39 | table2
 40 | # specifying variable and value fields
 41 | table3 = [['id', 'vars', 'vals'],
 42 |           [3, 'age', 16],
 43 |           [1, 'gender', 'F'],
 44 |           [2, 'gender', 'M'],
 45 |           [2, 'age', 17],
 46 |           [1, 'age', 12],
 47 |           [3, 'gender', 'M']]
 48 | table4 = etl.recast(table3, variablefield='vars', valuefield='vals')
 49 | table4
 50 | # if there are multiple values for each key/variable pair, and no
 51 | # reducers function is provided, then all values will be listed
 52 | table6 = [['id', 'time', 'variable', 'value'],
 53 |           [1, 11, 'weight', 66.4],
 54 |           [1, 14, 'weight', 55.2],
 55 |           [2, 12, 'weight', 53.2],
 56 |           [2, 16, 'weight', 43.3],
 57 |           [3, 12, 'weight', 34.5],
 58 |           [3, 17, 'weight', 49.4]]
 59 | table7 = etl.recast(table6, key='id')
 60 | table7
 61 | # multiple values can be reduced via an aggregation function
 62 | def mean(values):
 63 |     return float(sum(values)) / len(values)
 64 | 
 65 | table8 = etl.recast(table6, key='id', reducers={'weight': mean})
 66 | table8
 67 | # missing values are padded with whatever is provided via the
 68 | # missing keyword argument (None by default)
 69 | table9 = [['id', 'variable', 'value'],
 70 |           [1, 'gender', 'F'],
 71 |           [2, 'age', 17],
 72 |           [1, 'age', 12],
 73 |           [3, 'gender', 'M']]
 74 | table10 = etl.recast(table9, key='id')
 75 | table10
 76 | 
 77 | 
 78 | # transpose()
 79 | #############
 80 | 
 81 | import petl as etl
 82 | table1 = [['id', 'colour'],
 83 |           [1, 'blue'],
 84 |           [2, 'red'],
 85 |           [3, 'purple'],
 86 |           [5, 'yellow'],
 87 |           [7, 'orange']]
 88 | table2 = etl.transpose(table1)
 89 | table2
 90 | 
 91 | 
 92 | # pivot()
 93 | #########
 94 | 
 95 | import petl as etl
 96 | table1 = [['region', 'gender', 'style', 'units'],
 97 |           ['east', 'boy', 'tee', 12],
 98 |           ['east', 'boy', 'golf', 14],
 99 |           ['east', 'boy', 'fancy', 7],
100 |           ['east', 'girl', 'tee', 3],
101 |           ['east', 'girl', 'golf', 8],
102 |           ['east', 'girl', 'fancy', 18],
103 |           ['west', 'boy', 'tee', 12],
104 |           ['west', 'boy', 'golf', 15],
105 |           ['west', 'boy', 'fancy', 8],
106 |           ['west', 'girl', 'tee', 6],
107 |           ['west', 'girl', 'golf', 16],
108 |           ['west', 'girl', 'fancy', 1]]
109 | table2 = etl.pivot(table1, 'region', 'gender', 'units', sum)
110 | table2
111 | table3 = etl.pivot(table1, 'region', 'style', 'units', sum)
112 | table3
113 | table4 = etl.pivot(table1, 'gender', 'style', 'units', sum)
114 | table4
115 | 
116 | 
117 | # flatten()
118 | ###########
119 | 
120 | import petl as etl
121 | table1 = [['foo', 'bar', 'baz'],
122 |           ['A', 1, True],
123 |           ['C', 7, False],
124 |           ['B', 2, False],
125 |           ['C', 9, True]]
126 | list(etl.flatten(table1))
127 | 
128 | 
129 | # unflatten()
130 | #############
131 | 
132 | import petl as etl
133 | a = ['A', 1, True, 'C', 7, False, 'B', 2, False, 'C', 9]
134 | table1 = etl.unflatten(a, 3)
135 | table1
136 | # a table and field name can also be provided as arguments
137 | table2 = [['lines'],
138 |           ['A'],
139 |           [1],
140 |           [True],
141 |           ['C'],
142 |           [7],
143 |           [False],
144 |           ['B'],
145 |           [2],
146 |           [False],
147 |           ['C'],
148 |           [9]]
149 | table3 = etl.unflatten(table2, 'lines', 3)
150 | table3
151 | 


--------------------------------------------------------------------------------
/examples/transform/selects.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | # select()
 5 | ##########
 6 | 
 7 | import petl as etl
 8 | table1 = [['foo', 'bar', 'baz'],
 9 |           ['a', 4, 9.3],
10 |           ['a', 2, 88.2],
11 |           ['b', 1, 23.3],
12 |           ['c', 8, 42.0],
13 |           ['d', 7, 100.9],
14 |           ['c', 2]]
15 | # the second positional argument can be a function accepting
16 | # a row
17 | table2 = etl.select(table1,
18 |                     lambda rec: rec.foo == 'a' and rec.baz > 88.1)
19 | table2
20 | # the second positional argument can also be an expression
21 | # string, which will be converted to a function using petl.expr()
22 | table3 = etl.select(table1, "{foo} == 'a' and {baz} > 88.1")
23 | table3
24 | # the condition can also be applied to a single field
25 | table4 = etl.select(table1, 'foo', lambda v: v == 'a')
26 | table4
27 | 
28 | 
29 | # selectre()
30 | ############
31 | 
32 | import petl as etl
33 | table1 = [['foo', 'bar', 'baz'],
34 |           ['aa', 4, 9.3],
35 |           ['aaa', 2, 88.2],
36 |           ['b', 1, 23.3],
37 |           ['ccc', 8, 42.0],
38 |           ['bb', 7, 100.9],
39 |           ['c', 2]]
40 | table2 = etl.selectre(table1, 'foo', '[ab]{2}')
41 | table2
42 | 
43 | 
44 | # selectusingcontext()
45 | ######################
46 | 
47 | import petl as etl
48 | table1 = [['foo', 'bar'],
49 |           ['A', 1],
50 |           ['B', 4],
51 |           ['C', 5],
52 |           ['D', 9]]
53 | def query(prv, cur, nxt):
54 |     return ((prv is not None and (cur.bar - prv.bar) < 2)
55 |             or (nxt is not None and (nxt.bar - cur.bar) < 2))
56 | 
57 | table2 = etl.selectusingcontext(table1, query)
58 | table2
59 | 
60 | 
61 | # facet()
62 | #########
63 | 
64 | import petl as etl
65 | table1 = [['foo', 'bar', 'baz'],
66 |           ['a', 4, 9.3],
67 |           ['a', 2, 88.2],
68 |           ['b', 1, 23.3],
69 |           ['c', 8, 42.0],
70 |           ['d', 7, 100.9],
71 |           ['c', 2]]
72 | foo = etl.facet(table1, 'foo')
73 | sorted(foo.keys())
74 | foo['a']
75 | foo['c']
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/examples/transform/setops.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | 
  3 | 
  4 | # complement()
  5 | ##############
  6 | 
  7 | import petl as etl
  8 | a = [['foo', 'bar', 'baz'],
  9 |      ['A', 1, True],
 10 |      ['C', 7, False],
 11 |      ['B', 2, False],
 12 |      ['C', 9, True]]
 13 | b = [['x', 'y', 'z'],
 14 |      ['B', 2, False],
 15 |      ['A', 9, False],
 16 |      ['B', 3, True],
 17 |      ['C', 9, True]]
 18 | aminusb = etl.complement(a, b)
 19 | aminusb
 20 | bminusa = etl.complement(b, a)
 21 | bminusa
 22 | 
 23 | 
 24 | # recordcomplement()
 25 | ####################
 26 | 
 27 | import petl as etl
 28 | a = [['foo', 'bar', 'baz'],
 29 |      ['A', 1, True],
 30 |      ['C', 7, False],
 31 |      ['B', 2, False],
 32 |      ['C', 9, True]]
 33 | b = [['bar', 'foo', 'baz'],
 34 |      [2, 'B', False],
 35 |      [9, 'A', False],
 36 |      [3, 'B', True],
 37 |      [9, 'C', True]]
 38 | aminusb = etl.recordcomplement(a, b)
 39 | aminusb
 40 | bminusa = etl.recordcomplement(b, a)
 41 | bminusa
 42 | 
 43 | 
 44 | # diff()
 45 | ########
 46 | 
 47 | import petl as etl
 48 | a = [['foo', 'bar', 'baz'],
 49 |      ['A', 1, True],
 50 |      ['C', 7, False],
 51 |      ['B', 2, False],
 52 |      ['C', 9, True]]
 53 | b = [['x', 'y', 'z'],
 54 |      ['B', 2, False],
 55 |      ['A', 9, False],
 56 |      ['B', 3, True],
 57 |      ['C', 9, True]]
 58 | added, subtracted = etl.diff(a, b)
 59 | # rows in b not in a
 60 | added
 61 | # rows in a not in b
 62 | subtracted
 63 | 
 64 | 
 65 | # recorddiff()
 66 | ##############
 67 | 
 68 | import petl as etl
 69 | a = [['foo', 'bar', 'baz'],
 70 |      ['A', 1, True],
 71 |      ['C', 7, False],
 72 |      ['B', 2, False],
 73 |      ['C', 9, True]]
 74 | b = [['bar', 'foo', 'baz'],
 75 |      [2, 'B', False],
 76 |      [9, 'A', False],
 77 |      [3, 'B', True],
 78 |      [9, 'C', True]]
 79 | added, subtracted = etl.recorddiff(a, b)
 80 | added
 81 | subtracted
 82 | 
 83 | 
 84 | # intersection()
 85 | ################
 86 | 
 87 | import petl as etl
 88 | table1 = [['foo', 'bar', 'baz'],
 89 |           ['A', 1, True],
 90 |           ['C', 7, False],
 91 |           ['B', 2, False],
 92 |           ['C', 9, True]]
 93 | table2 = [['x', 'y', 'z'],
 94 |           ['B', 2, False],
 95 |           ['A', 9, False],
 96 |           ['B', 3, True],
 97 |           ['C', 9, True]]
 98 | table3 = etl.intersection(table1, table2)
 99 | table3
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/examples/transform/sorts.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | # sort()
 5 | ########
 6 | 
 7 | import petl as etl
 8 | table1 = [['foo', 'bar'],
 9 |           ['C', 2],
10 |           ['A', 9],
11 |           ['A', 6],
12 |           ['F', 1],
13 |           ['D', 10]]
14 | table2 = etl.sort(table1, 'foo')
15 | table2
16 | # sorting by compound key is supported
17 | table3 = etl.sort(table1, key=['foo', 'bar'])
18 | table3
19 | # if no key is specified, the default is a lexical sort
20 | table4 = etl.sort(table1)
21 | table4
22 | 
23 | 
24 | # mergesort()
25 | #############
26 | 
27 | import petl as etl
28 | table1 = [['foo', 'bar'],
29 |           ['A', 9],
30 |           ['C', 2],
31 |           ['D', 10],
32 |           ['A', 6],
33 |           ['F', 1]]
34 | table2 = [['foo', 'bar'],
35 |           ['B', 3],
36 |           ['D', 10],
37 |           ['A', 10],
38 |           ['F', 4]]
39 | table3 = etl.mergesort(table1, table2, key='foo')
40 | table3.lookall()
41 | 
42 | 
43 | # issorted()
44 | ############
45 | 
46 | import petl as etl
47 | table1 = [['foo', 'bar', 'baz'],
48 |           ['a', 1, True],
49 |           ['b', 3, True],
50 |           ['b', 2]]
51 | etl.issorted(table1, key='foo')
52 | etl.issorted(table1, key='bar')
53 | etl.issorted(table1, key='foo', strict=True)
54 | etl.issorted(table1, key='foo', reverse=True)
55 | 
56 | 


--------------------------------------------------------------------------------
/examples/transform/unpacks.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | # unpack()
 5 | ##########
 6 | 
 7 | import petl as etl
 8 | table1 = [['foo', 'bar'],
 9 |           [1, ['a', 'b']],
10 |           [2, ['c', 'd']],
11 |           [3, ['e', 'f']]]
12 | table2 = etl.unpack(table1, 'bar', ['baz', 'quux'])
13 | table2
14 | 
15 | 
16 | # unpackdict()
17 | ##############
18 | 
19 | import petl as etl
20 | table1 = [['foo', 'bar'],
21 |           [1, {'baz': 'a', 'quux': 'b'}],
22 |           [2, {'baz': 'c', 'quux': 'd'}],
23 |           [3, {'baz': 'e', 'quux': 'f'}]]
24 | table2 = etl.unpackdict(table1, 'bar')
25 | table2
26 | 


--------------------------------------------------------------------------------
/examples/transform/validation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, print_function, division
 3 | 
 4 | 
 5 | # validate()
 6 | ############
 7 | 
 8 | import petl as etl
 9 | # define some validation constraints
10 | header = ('foo', 'bar', 'baz')
11 | constraints = [
12 |     dict(name='foo_int', field='foo', test=int),
13 |     dict(name='bar_date', field='bar', test=etl.dateparser('%Y-%m-%d')),
14 |     dict(name='baz_enum', field='baz', assertion=lambda v: v in ['Y', 'N']),
15 |     dict(name='not_none', assertion=lambda row: None not in row)
16 | ]
17 | # now validate a table
18 | table = (('foo', 'bar', 'bazzz'),
19 |          (1, '2000-01-01', 'Y'),
20 |          ('x', '2010-10-10', 'N'),
21 |          (2, '2000/01/01', 'Y'),
22 |          (3, '2015-12-12', 'x'),
23 |          (4, None, 'N'),
24 |          ('y', '1999-99-99', 'z'),
25 |          (6, '2000-01-01'),
26 |          (7, '2001-02-02', 'N', True))
27 | problems = etl.validate(table, constraints=constraints, header=header)
28 | problems.lookall()
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/examples/util/base.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, print_function, absolute_import, \
  2 |     unicode_literals
  3 | 
  4 | 
  5 | # values()
  6 | ##########
  7 | 
  8 | import petl as etl
  9 | table1 = [['foo', 'bar'],
 10 |           ['a', True],
 11 |           ['b'],
 12 |           ['b', True],
 13 |           ['c', False]]
 14 | foo = etl.values(table1, 'foo')
 15 | foo
 16 | list(foo)
 17 | bar = etl.values(table1, 'bar')
 18 | bar
 19 | list(bar)
 20 | # values from multiple fields
 21 | table2 = [['foo', 'bar', 'baz'],
 22 |           [1, 'a', True],
 23 |           [2, 'bb', True],
 24 |           [3, 'd', False]]
 25 | foobaz = etl.values(table2, 'foo', 'baz')
 26 | foobaz
 27 | list(foobaz)
 28 | 
 29 | 
 30 | # header()
 31 | ##########
 32 | 
 33 | 
 34 | import petl as etl
 35 | table = [['foo', 'bar'], ['a', 1], ['b', 2]]
 36 | etl.header(table)
 37 | 
 38 | 
 39 | # fieldnames()
 40 | ##############
 41 | 
 42 | import petl as etl
 43 | table = [['foo', 'bar'], ['a', 1], ['b', 2]]
 44 | etl.fieldnames(table)
 45 | etl.header(table)
 46 | 
 47 | 
 48 | # data()
 49 | ########
 50 | 
 51 | import petl as etl
 52 | table = [['foo', 'bar'], ['a', 1], ['b', 2]]
 53 | d = etl.data(table)
 54 | list(d)
 55 | 
 56 | 
 57 | # dicts()
 58 | #########
 59 | 
 60 | import petl as etl
 61 | table = [['foo', 'bar'], ['a', 1], ['b', 2]]
 62 | d = etl.dicts(table)
 63 | d
 64 | list(d)
 65 | 
 66 | 
 67 | # namedtuples()
 68 | ###############
 69 | 
 70 | import petl as etl
 71 | table = [['foo', 'bar'], ['a', 1], ['b', 2]]
 72 | d = etl.namedtuples(table)
 73 | d
 74 | list(d)
 75 | 
 76 | 
 77 | # records()
 78 | ###############
 79 | 
 80 | import petl as etl
 81 | table = [['foo', 'bar'], ['a', 1], ['b', 2]]
 82 | d = etl.records(table)
 83 | d
 84 | list(d)
 85 | 
 86 | 
 87 | # rowgroupby()
 88 | ##############
 89 | 
 90 | import petl as etl
 91 | table1 = [['foo', 'bar', 'baz'],
 92 |           ['a', 1, True],
 93 |           ['b', 3, True],
 94 |           ['b', 2]]
 95 | # group entire rows
 96 | for key, group in etl.rowgroupby(table1, 'foo'):
 97 |     print(key, list(group))
 98 | 
 99 | # group specific values
100 | for key, group in etl.rowgroupby(table1, 'foo', 'bar'):
101 |     print(key, list(group))
102 | 
103 | 
104 | # empty()
105 | #########
106 | 
107 | import petl as etl
108 | table = (
109 |     etl
110 |     .empty()
111 |     .addcolumn('foo', ['A', 'B'])
112 |     .addcolumn('bar', [1, 2])
113 | )
114 | table
115 | 


--------------------------------------------------------------------------------
/examples/util/counting.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, print_function, absolute_import
  2 | 
  3 | 
  4 | # nrows()
  5 | #########
  6 | 
  7 | import petl as etl
  8 | table = [['foo', 'bar'], ['a', 1], ['b', 2]]
  9 | etl.nrows(table)
 10 | 
 11 | 
 12 | # valuecount()
 13 | ##############
 14 | 
 15 | import petl as etl
 16 | table = [['foo', 'bar'],
 17 |          ['a', 1],
 18 |          ['b', 2],
 19 |          ['b', 7]]
 20 | etl.valuecount(table, 'foo', 'b')
 21 | 
 22 | 
 23 | # valuecounter()
 24 | ################
 25 | 
 26 | import petl as etl
 27 | table = [['foo', 'bar'],
 28 |          ['a', True],
 29 |          ['b'],
 30 |          ['b', True],
 31 |          ['c', False]]
 32 | etl.valuecounter(table, 'foo').most_common()
 33 | 
 34 | 
 35 | # valuecounts()
 36 | ###############
 37 | 
 38 | import petl as etl
 39 | table = [['foo', 'bar', 'baz'],
 40 |          ['a', True, 0.12],
 41 |          ['a', True, 0.17],
 42 |          ['b', False, 0.34],
 43 |          ['b', False, 0.44],
 44 |          ['b']]
 45 | etl.valuecounts(table, 'foo')
 46 | etl.valuecounts(table, 'foo', 'bar')
 47 | 
 48 | 
 49 | # parsecounter()
 50 | ################
 51 | 
 52 | import petl as etl
 53 | table = [['foo', 'bar', 'baz'],
 54 |          ['A', 'aaa', 2],
 55 |          ['B', u'2', '3.4'],
 56 |          [u'B', u'3', u'7.8', True],
 57 |          ['D', '3.7', 9.0],
 58 |          ['E', 42]]
 59 | counter, errors = etl.parsecounter(table, 'bar')
 60 | counter.most_common()
 61 | errors.most_common()
 62 | 
 63 | 
 64 | # parsecounts()
 65 | ###############
 66 | 
 67 | import petl as etl
 68 | table = [['foo', 'bar', 'baz'],
 69 |          ['A', 'aaa', 2],
 70 |          ['B', u'2', '3.4'],
 71 |          [u'B', u'3', u'7.8', True],
 72 |          ['D', '3.7', 9.0],
 73 |          ['E', 42]]
 74 | etl.parsecounts(table, 'bar')
 75 | 
 76 | 
 77 | # typecounter()
 78 | ###############
 79 | 
 80 | import petl as etl
 81 | table = [['foo', 'bar', 'baz'],
 82 |          ['A', 1, 2],
 83 |          ['B', u'2', '3.4'],
 84 |          [u'B', u'3', u'7.8', True],
 85 |          ['D', u'xyz', 9.0],
 86 |          ['E', 42]]
 87 | etl.typecounter(table, 'foo').most_common()
 88 | etl.typecounter(table, 'bar').most_common()
 89 | etl.typecounter(table, 'baz').most_common()
 90 | 
 91 | 
 92 | # typecounts()
 93 | ##############
 94 | 
 95 | import petl as etl
 96 | table = [['foo', 'bar', 'baz'],
 97 |          [b'A', 1, 2],
 98 |          [b'B', '2', b'3.4'],
 99 |          ['B', '3', '7.8', True],
100 |          ['D', u'xyz', 9.0],
101 |          ['E', 42]]
102 | etl.typecounts(table, 'foo')
103 | etl.typecounts(table, 'bar')
104 | etl.typecounts(table, 'baz')
105 | 
106 | 
107 | # stringpatterns()
108 | ##################
109 | 
110 | import petl as etl
111 | table = [['foo', 'bar'],
112 |          ['Mr. Foo', '123-1254'],
113 |          ['Mrs. Bar', '234-1123'],
114 |          ['Mr. Spo', '123-1254'],
115 |          [u'Mr. Baz', u'321 1434'],
116 |          [u'Mrs. Baz', u'321 1434'],
117 |          ['Mr. Quux', '123-1254-XX']]
118 | etl.stringpatterns(table, 'foo')
119 | etl.stringpatterns(table, 'bar')
120 | 
121 | 
122 | # rowlengths()
123 | ###############
124 | 
125 | import petl as etl
126 | table = [['foo', 'bar', 'baz'],
127 |          ['A', 1, 2],
128 |          ['B', '2', '3.4'],
129 |          [u'B', u'3', u'7.8', True],
130 |          ['D', 'xyz', 9.0],
131 |          ['E', None],
132 |          ['F', 9]]
133 | etl.rowlengths(table)
134 | 


--------------------------------------------------------------------------------
/examples/util/lookups.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | # lookup()
  4 | ##########
  5 | import petl as etl
  6 | 
  7 | table1 = [['foo', 'bar'], 
  8 |           ['a', 1], 
  9 |           ['b', 2], 
 10 |           ['b', 3]]
 11 | lkp = etl.lookup(table1, 'foo', 'bar')
 12 | lkp['a']
 13 | lkp['b']
 14 | # if no valuespec argument is given, defaults to the whole
 15 | # row (as a tuple)
 16 | lkp = etl.lookup(table1, 'foo')
 17 | lkp['a']
 18 | lkp['b']
 19 | # compound keys are supported
 20 | table2 = [['foo', 'bar', 'baz'],
 21 |           ['a', 1, True],
 22 |           ['b', 2, False],
 23 |           ['b', 3, True],
 24 |           ['b', 3, False]]
 25 | lkp = etl.lookup(table2, ('foo', 'bar'), 'baz')
 26 | lkp[('a', 1)]
 27 | lkp[('b', 2)]
 28 | lkp[('b', 3)]
 29 | # data can be loaded into an existing dictionary-like 
 30 | # object, including persistent dictionaries created via the 
 31 | # shelve module
 32 | import shelve
 33 | 
 34 | lkp = shelve.open('example1.dat', flag='n')
 35 | lkp = etl.lookup(table1, 'foo', 'bar', lkp)
 36 | lkp.close()
 37 | lkp = shelve.open('example1.dat', flag='r')
 38 | lkp['a']
 39 | lkp['b']
 40 | 
 41 | 
 42 | # lookupone()
 43 | #############
 44 | 
 45 | import petl as etl
 46 | 
 47 | table1 = [['foo', 'bar'], 
 48 |           ['a', 1], 
 49 |           ['b', 2], 
 50 |           ['b', 3]]
 51 | # if the specified key is not unique and strict=False (default),
 52 | # the first value wins
 53 | lkp = etl.lookupone(table1, 'foo', 'bar')
 54 | lkp['a']
 55 | lkp['b']
 56 | # if the specified key is not unique and strict=True, will raise
 57 | # DuplicateKeyError
 58 | try:
 59 |     lkp = etl.lookupone(table1, 'foo', strict=True)
 60 | except etl.errors.DuplicateKeyError as e:
 61 |     print(e)
 62 | 
 63 | # compound keys are supported
 64 | table2 = [['foo', 'bar', 'baz'],
 65 |           ['a', 1, True],
 66 |           ['b', 2, False],
 67 |           ['b', 3, True],
 68 |           ['b', 3, False]]
 69 | lkp = etl.lookupone(table2, ('foo', 'bar'), 'baz')
 70 | lkp[('a', 1)]
 71 | lkp[('b', 2)]
 72 | lkp[('b', 3)]
 73 | # data can be loaded into an existing dictionary-like 
 74 | # object, including persistent dictionaries created via the 
 75 | # shelve module
 76 | import shelve
 77 | 
 78 | lkp = shelve.open('example2.dat', flag='n')
 79 | lkp = etl.lookupone(table1, 'foo', 'bar', lkp)
 80 | lkp.close()
 81 | lkp = shelve.open('example2.dat', flag='r')
 82 | lkp['a']
 83 | lkp['b']
 84 | 
 85 | 
 86 | # dictlookup()
 87 | ##############
 88 | 
 89 | import petl as etl
 90 | 
 91 | table1 = [['foo', 'bar'], 
 92 |           ['a', 1], 
 93 |           ['b', 2], 
 94 |           ['b', 3]]
 95 | lkp = etl.dictlookup(table1, 'foo')
 96 | lkp['a']
 97 | lkp['b']
 98 | # compound keys are supported
 99 | table2 = [['foo', 'bar', 'baz'],
100 |           ['a', 1, True],
101 |           ['b', 2, False],
102 |           ['b', 3, True],
103 |           ['b', 3, False]]
104 | lkp = etl.dictlookup(table2, ('foo', 'bar'))
105 | lkp[('a', 1)]
106 | lkp[('b', 2)]
107 | lkp[('b', 3)]
108 | # data can be loaded into an existing dictionary-like 
109 | # object, including persistent dictionaries created via the 
110 | # shelve module
111 | import shelve
112 | 
113 | lkp = shelve.open('example3.dat', flag='n')
114 | lkp = etl.dictlookup(table1, 'foo', lkp)
115 | lkp.close()
116 | lkp = shelve.open('example3.dat', flag='r')
117 | lkp['a']
118 | lkp['b']
119 | 
120 | 
121 | # dictlookupone()
122 | #################
123 | 
124 | import petl as etl
125 | 
126 | table1 = [['foo', 'bar'],
127 |           ['a', 1],
128 |           ['b', 2],
129 |           ['b', 3]]
130 | # if the specified key is not unique and strict=False (default),
131 | # the first value wins
132 | lkp = etl.dictlookupone(table1, 'foo')
133 | lkp['a']
134 | lkp['b']
135 | # if the specified key is not unique and strict=True, will raise
136 | # DuplicateKeyError
137 | try:
138 |     lkp = etl.dictlookupone(table1, 'foo', strict=True)
139 | except etl.errors.DuplicateKeyError as e:
140 |     print(e)
141 | 
142 | # compound keys are supported
143 | table2 = [['foo', 'bar', 'baz'],
144 |           ['a', 1, True],
145 |           ['b', 2, False],
146 |           ['b', 3, True],
147 |           ['b', 3, False]]
148 | lkp = etl.dictlookupone(table2, ('foo', 'bar'))
149 | lkp[('a', 1)]
150 | lkp[('b', 2)]
151 | lkp[('b', 3)]
152 | # data can be loaded into an existing dictionary-like
153 | # object, including persistent dictionaries created via the
154 | # shelve module
155 | import shelve
156 | 
157 | lkp = shelve.open('example4.dat', flag='n')
158 | lkp = etl.dictlookupone(table1, 'foo', lkp)
159 | lkp.close()
160 | lkp = shelve.open('example4.dat', flag='r')
161 | lkp['a']
162 | lkp['b']
163 | 
164 | 
165 | 


--------------------------------------------------------------------------------
/examples/util/materialise.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # columns()
 5 | ###########
 6 | 
 7 | import petl as etl
 8 | table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]]
 9 | cols = etl.columns(table)
10 | cols['foo']
11 | cols['bar']
12 | 
13 | 
14 | # facetcolumns()
15 | ################
16 | 
17 | import petl as etl
18 | table = [['foo', 'bar', 'baz'],
19 |          ['a', 1, True],
20 |          ['b', 2, True],
21 |          ['b', 3]]
22 | fc = etl.facetcolumns(table, 'foo')
23 | fc['a']
24 | fc['b']
25 | 


--------------------------------------------------------------------------------
/examples/util/misc.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # typeset()
 5 | ###########
 6 | 
 7 | import petl as etl
 8 | table = [['foo', 'bar', 'baz'],
 9 |          ['A', 1, '2'],
10 |          ['B', u'2', '3.4'],
11 |          [u'B', u'3', '7.8', True],
12 |          ['D', u'xyz', 9.0],
13 |          ['E', 42]]
14 | sorted(etl.typeset(table, 'foo'))
15 | sorted(etl.typeset(table, 'bar'))
16 | sorted(etl.typeset(table, 'baz'))
17 | 
18 | 
19 | # diffheaders()
20 | ###############
21 | 
22 | import petl as etl
23 | table1 = [['foo', 'bar', 'baz'],
24 |           ['a', 1, .3]]
25 | table2 = [['baz', 'bar', 'quux'],
26 |           ['a', 1, .3]]
27 | add, sub = etl.diffheaders(table1, table2)
28 | add
29 | sub
30 | 
31 | 
32 | # diffvalues()
33 | ##############
34 | 
35 | import petl as etl
36 | table1 = [['foo', 'bar'],
37 |           ['a', 1],
38 |           ['b', 3]]
39 | table2 = [['bar', 'foo'],
40 |           [1, 'a'],
41 |           [3, 'c']]
42 | add, sub = etl.diffvalues(table1, table2, 'foo')
43 | add
44 | sub
45 | 
46 | 
47 | # nthword()
48 | ###########
49 | 
50 | import petl as etl
51 | s = 'foo bar'
52 | f = etl.nthword(0)
53 | f(s)
54 | g = etl.nthword(1)
55 | g(s)
56 | 
57 | 


--------------------------------------------------------------------------------
/examples/util/parsers.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # datetimeparser()
 5 | ##################
 6 | 
 7 | from petl import datetimeparser
 8 | isodatetime = datetimeparser('%Y-%m-%dT%H:%M:%S')
 9 | isodatetime('2002-12-25T00:00:00')
10 | try:
11 |     isodatetime('2002-12-25T00:00:99')
12 | except ValueError as e:
13 |     print(e)
14 | 
15 | 
16 | # dateparser()
17 | ##############
18 | 
19 | from petl import dateparser
20 | isodate = dateparser('%Y-%m-%d')
21 | isodate('2002-12-25')
22 | try:
23 |     isodate('2002-02-30')
24 | except ValueError as e:
25 |     print(e)
26 | 
27 | 
28 | # timeparser()
29 | ##############
30 | 
31 | from petl import timeparser
32 | isotime = timeparser('%H:%M:%S')
33 | isotime('00:00:00')
34 | isotime('13:00:00')
35 | try:
36 |     isotime('12:00:99')
37 | except ValueError as e:
38 |     print(e)
39 | 
40 | try:
41 |     isotime('25:00:00')
42 | except ValueError as e:
43 |     print(e)
44 | 
45 | 
46 | # boolparser()
47 | ##############
48 | 
49 | from petl import boolparser
50 | mybool = boolparser(true_strings=['yes', 'y'], false_strings=['no', 'n'])
51 | mybool('y')
52 | mybool('yes')
53 | mybool('Y')
54 | mybool('No')
55 | try:
56 |     mybool('foo')
57 | except ValueError as e:
58 |     print(e)
59 | 
60 | try:
61 |     mybool('True')
62 | except ValueError as e:
63 |     print(e)
64 | 


--------------------------------------------------------------------------------
/examples/util/random.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # randomtable()
 5 | ###############
 6 | 
 7 | import petl as etl
 8 | table = etl.randomtable(3, 100, seed=42)
 9 | table
10 | 
11 | 
12 | # dummytable()
13 | ##############
14 | 
15 | import petl as etl
16 | table1 = etl.dummytable(100, seed=42)
17 | table1
18 | # customise fields
19 | import random
20 | from functools import partial
21 | fields = [('foo', random.random),
22 |           ('bar', partial(random.randint, 0, 500)),
23 |           ('baz', partial(random.choice,
24 |                           ['chocolate', 'strawberry', 'vanilla']))]
25 | table2 = etl.dummytable(100, fields=fields, seed=42)
26 | table2
27 | 


--------------------------------------------------------------------------------
/examples/util/statistics.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # limits()
 5 | ##########
 6 | 
 7 | import petl as etl
 8 | table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]]
 9 | minv, maxv = etl.limits(table, 'bar')
10 | minv
11 | maxv
12 | 
13 | 
14 | # stats()
15 | #########
16 | 
17 | import petl as etl
18 | table = [['foo', 'bar', 'baz'],
19 |          ['A', 1, 2],
20 |          ['B', '2', '3.4'],
21 |          [u'B', u'3', u'7.8', True],
22 |          ['D', 'xyz', 9.0],
23 |          ['E', None]]
24 | etl.stats(table, 'bar')
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/examples/util/timing.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # progress()
 5 | ############
 6 | 
 7 | import petl as etl
 8 | table = etl.dummytable(100000)
 9 | table.progress(10000).tocsv('example.csv')
10 | 
11 | 
12 | # clock()
13 | #########
14 | 
15 | import petl as etl
16 | t1 = etl.dummytable(100000)
17 | c1 = etl.clock(t1)
18 | t2 = etl.convert(c1, 'foo', lambda v: v**2)
19 | c2 = etl.clock(t2)
20 | p = etl.progress(c2, 10000)
21 | etl.tocsv(p, 'example.csv')
22 | # time consumed retrieving rows from t1
23 | c1.time
24 | # time consumed retrieving rows from t2
25 | c2.time
26 | # actual time consumed by the convert step
27 | c2.time - c1.time
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/examples/util/vis.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | # look()
 5 | ########
 6 | 
 7 | import petl as etl
 8 | table1 = [['foo', 'bar'],
 9 |           ['a', 1],
10 |           ['b', 2]]
11 | etl.look(table1)
12 | # alternative formatting styles
13 | etl.look(table1, style='simple')
14 | etl.look(table1, style='minimal')
15 | # any irregularities in the length of header and/or data
16 | # rows will appear as blank cells
17 | table2 = [['foo', 'bar'],
18 |           ['a'],
19 |           ['b', 2, True]]
20 | etl.look(table2)
21 | 
22 | 
23 | # see()
24 | #######
25 | 
26 | import petl as etl
27 | table = [['foo', 'bar'], ['a', 1], ['b', 2]]
28 | etl.see(table)
29 | 


--------------------------------------------------------------------------------
/petl/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | from petl.version import version as __version__
 5 | from petl import comparison
 6 | from petl.comparison import Comparable
 7 | from petl import util
 8 | from petl.util import *
 9 | from petl import io
10 | from petl.io import *
11 | from petl import transform
12 | from petl.transform import *
13 | from petl import config
14 | from petl import errors
15 | from petl.errors import *
16 | 


--------------------------------------------------------------------------------
/petl/comparison.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | 
  3 | 
  4 | import operator
  5 | from functools import partial
  6 | 
  7 | from petl.compat import text_type, binary_type, numeric_types
  8 | 
  9 | 
 10 | class Comparable(object):
 11 |     """Wrapper class to allow for flexible comparison of objects of different
 12 |     types, preserving the relaxed sorting behaviour of Python 2 with
 13 |     additional flexibility to allow for comparison of arbitrary objects with
 14 |     the `None` value (for example, the date and time objects from the standard
 15 |     library cannot be directly compared with `None` in Python 2).
 16 | 
 17 |     """
 18 | 
 19 |     __slots__ = ['obj', 'inner']
 20 | 
 21 |     def __init__(self, obj):
 22 |         # store wrapped object unchanged
 23 |         self.inner = obj
 24 |         # handle lists and tuples
 25 |         if isinstance(obj, (list, tuple)):
 26 |             obj = tuple(Comparable(o) for o in obj)
 27 |         self.obj = obj
 28 | 
 29 |     def __lt__(self, other):
 30 | 
 31 |         # convenience
 32 |         obj = self.obj
 33 |         if isinstance(other, Comparable):
 34 |             other = other.obj
 35 | 
 36 |         # None < everything else
 37 |         if other is None:
 38 |             return False
 39 |         if obj is None:
 40 |             return True
 41 | 
 42 |         # numbers < everything else (except None)
 43 |         if isinstance(obj, numeric_types) \
 44 |                 and not isinstance(other, numeric_types):
 45 |             return True
 46 |         if not isinstance(obj, numeric_types) \
 47 |                 and isinstance(other, numeric_types):
 48 |             return False
 49 | 
 50 |         # binary < unicode
 51 |         if isinstance(obj, text_type) and isinstance(other, binary_type):
 52 |             return False
 53 |         if isinstance(obj, binary_type) and isinstance(other, text_type):
 54 |             return True
 55 | 
 56 |         try:
 57 |             # attempt native comparison
 58 |             return obj < other
 59 | 
 60 |         except TypeError:
 61 |             # fall back to comparing type names
 62 |             return _typestr(obj) < _typestr(other)
 63 | 
 64 |     def __eq__(self, other):
 65 |         if isinstance(other, Comparable):
 66 |             return self.obj == other.obj
 67 |         return self.obj == other
 68 | 
 69 |     def __le__(self, other):
 70 |         return self < other or self == other
 71 | 
 72 |     def __gt__(self, other):
 73 |         return not (self < other or self == other)
 74 | 
 75 |     def __ge__(self, other):
 76 |         return not (self < other)
 77 | 
 78 |     def __str__(self):
 79 |         return str(self.obj)
 80 | 
 81 |     def __unicode__(self):
 82 |         return text_type(self.obj)
 83 | 
 84 |     def __repr__(self):
 85 |         return 'Comparable(' + repr(self.obj) + ')'
 86 | 
 87 |     def __iter__(self, *args, **kwargs):
 88 |         return iter(self.obj, *args, **kwargs)
 89 | 
 90 |     def __len__(self):
 91 |         return len(self.obj)
 92 | 
 93 |     def __getitem__(self, item):
 94 |         return self.obj.__getitem__(item)
 95 | 
 96 | 
 97 | def _typestr(x):
 98 |     # attempt to preserve Python 2 name orderings
 99 |     if isinstance(x, binary_type):
100 |         return 'str'
101 |     if isinstance(x, text_type):
102 |         return 'unicode'
103 |     return type(x).__name__
104 | 
105 | 
106 | def comparable_itemgetter(*args):
107 |     getter = operator.itemgetter(*args)
108 |     getter_with_default = _itemgetter_with_default(*args)
109 | 
110 |     def _getter_with_fallback(obj):
111 |         try:
112 |             return getter(obj)
113 |         except (IndexError, KeyError):
114 |             return getter_with_default(obj)
115 |     g = lambda x: Comparable(_getter_with_fallback(x))
116 |     return g
117 | 
118 | 
119 | def _itemgetter_with_default(*args):
120 |     """ itemgetter compatible with `operator.itemgetter` behavior, filling missing
121 |     values with default instead of raising IndexError or KeyError """
122 |     def _get_default(obj, item, default):
123 |         try:
124 |             return obj[item]
125 |         except (IndexError, KeyError):
126 |             return default
127 |     if len(args) == 1:
128 |         return partial(_get_default, item=args[0], default=None)
129 |     return lambda obj: tuple(_get_default(obj, item=item, default=None) for item in args)
130 | 


--------------------------------------------------------------------------------
/petl/compat.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | import sys
 5 | 
 6 | 
 7 | 
 8 | ##########################
 9 | # Python 3 compatibility #
10 | ##########################
11 | 
12 | PY2 = sys.version_info.major == 2
13 | PY3 = sys.version_info.major == 3
14 | 
15 | if PY2:
16 |     from itertools import ifilter, ifilterfalse, imap, izip, izip_longest
17 |     from string import maketrans
18 |     from decimal import Decimal
19 |     string_types = basestring,
20 |     integer_types = int, long
21 |     numeric_types = bool, int, long, float, Decimal
22 |     text_type = unicode
23 |     binary_type = str
24 |     from urllib2 import urlopen
25 |     try:
26 |         from cStringIO import StringIO
27 |     except ImportError:
28 |         from StringIO import StringIO
29 |     BytesIO = StringIO
30 |     try:
31 |         import cPickle as pickle
32 |     except ImportError:
33 |         import pickle
34 |     maxint = sys.maxint
35 |     long = long
36 |     xrange = xrange
37 |     reduce = reduce
38 | 
39 | else:
40 |     ifilter = filter
41 |     imap = map
42 |     izip = zip
43 |     xrange = range
44 |     from decimal import Decimal
45 |     from itertools import filterfalse as ifilterfalse
46 |     from itertools import zip_longest as izip_longest
47 |     from functools import reduce
48 |     maketrans = str.maketrans
49 |     string_types = str,
50 |     integer_types = int,
51 |     numeric_types = bool, int, float, Decimal
52 |     class_types = type,
53 |     text_type = str
54 |     binary_type = bytes
55 |     long = int
56 |     from urllib.request import urlopen
57 |     from io import StringIO, BytesIO
58 |     import pickle
59 |     maxint = sys.maxsize
60 | 
61 | try:
62 |     advance_iterator = next
63 | except NameError:
64 |     def advance_iterator(it):
65 |         return it.next()
66 | next = advance_iterator
67 | 
68 | try:
69 |     callable = callable
70 | except NameError:
71 |     def callable(obj):
72 |         return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
73 | 


--------------------------------------------------------------------------------
/petl/config.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | 
 4 | from petl.compat import text_type
 5 | 
 6 | 
 7 | look_style = 'grid'  # alternatives: 'simple', 'minimal'
 8 | look_limit = 5
 9 | look_index_header = False
10 | look_vrepr = repr
11 | look_width = None
12 | see_limit = 5
13 | see_index_header = False
14 | see_vrepr = repr
15 | display_limit = 5
16 | display_index_header = False
17 | display_vrepr = text_type
18 | sort_buffersize = 100000
19 | failonerror=False # False, True, 'inline'
20 | """
21 | Controls what happens when unhandled exceptions are raised in a
22 | transformation:
23 | 
24 |     - If `False`, exceptions are suppressed.  If present, the value
25 |       provided in the `errorvalue` argument is returned.
26 | 
27 |     - If `True`, the first unhandled exception is raised.
28 | 
29 |     - If `'inline'`, unhandled exceptions are returned.
30 | """
31 | 


--------------------------------------------------------------------------------
/petl/errors.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | class DuplicateKeyError(Exception):
 5 | 
 6 |     def __init__(self, key):
 7 |         self.key = key
 8 | 
 9 |     def __str__(self):
10 |         return 'duplicate key: %r' % self.key
11 | 
12 | 
13 | class FieldSelectionError(Exception):
14 | 
15 |     def __init__(self, value):
16 |         self.value = value
17 | 
18 |     def __str__(self):
19 |         return 'selection is not a field or valid field index: %r' % self.value
20 | 
21 | 
22 | class ArgumentError(Exception):
23 | 
24 |     def __init__(self, message):
25 |         self.message = message
26 | 
27 |     def __str__(self):
28 |         return 'argument error: %s' % self.message
29 | 


--------------------------------------------------------------------------------
/petl/io/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | from petl.io.base import fromcolumns
 4 | 
 5 | from petl.io.sources import FileSource, GzipSource, BZ2Source, ZipSource, \
 6 |     StdinSource, StdoutSource, URLSource, StringSource, PopenSource, \
 7 |     MemorySource
 8 | 
 9 | from petl.io.csv import fromcsv, fromtsv, tocsv, appendcsv, totsv, appendtsv, \
10 |     teecsv, teetsv
11 | 
12 | from petl.io.pickle import frompickle, topickle, appendpickle, teepickle
13 | 
14 | from petl.io.text import fromtext, totext, appendtext, teetext
15 | 
16 | from petl.io.xml import fromxml, toxml
17 | 
18 | from petl.io.html import tohtml, teehtml
19 | 
20 | from petl.io.json import fromjson, tojson, tojsonarrays, fromdicts
21 | 
22 | from petl.io.db import fromdb, todb, appenddb
23 | 
24 | from petl.io.xls import fromxls, toxls
25 | 
26 | from petl.io.xlsx import fromxlsx, toxlsx, appendxlsx
27 | 
28 | from petl.io.numpy import fromarray, toarray, torecarray
29 | 
30 | from petl.io.pandas import fromdataframe, todataframe
31 | 
32 | from petl.io.pytables import fromhdf5, fromhdf5sorted, tohdf5, appendhdf5
33 | 
34 | from petl.io.whoosh import fromtextindex, searchtextindex, \
35 |     searchtextindexpage, totextindex, appendtextindex
36 | 
37 | from petl.io.bcolz import frombcolz, tobcolz, appendbcolz
38 | 
39 | from petl.io.avro import fromavro, toavro, appendavro
40 | 
41 | from petl.io.sources import register_codec, register_reader, register_writer
42 | 
43 | from petl.io.remotes import RemoteSource
44 | 
45 | from petl.io.remotes import SMBSource
46 | 
47 | from petl.io.gsheet import fromgsheet, togsheet, appendgsheet
48 | 


--------------------------------------------------------------------------------
/petl/io/base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division, print_function, absolute_import
 3 | 
 4 | 
 5 | import locale
 6 | import codecs
 7 | from petl.compat import izip_longest
 8 | 
 9 | from petl.util.base import Table
10 | 
11 | 
12 | def getcodec(encoding):
13 |     if encoding is None:
14 |         encoding = locale.getpreferredencoding()
15 |     codec = codecs.lookup(encoding)
16 |     return codec
17 | 
18 | 
19 | def fromcolumns(cols, header=None, missing=None):
20 |     """View a sequence of columns as a table, e.g.::
21 | 
22 |         >>> import petl as etl
23 |         >>> cols = [[0, 1, 2], ['a', 'b', 'c']]
24 |         >>> tbl = etl.fromcolumns(cols)
25 |         >>> tbl
26 |         +----+-----+
27 |         | f0 | f1  |
28 |         +====+=====+
29 |         |  0 | 'a' |
30 |         +----+-----+
31 |         |  1 | 'b' |
32 |         +----+-----+
33 |         |  2 | 'c' |
34 |         +----+-----+
35 | 
36 |     If columns are not the same length, values will be padded to the length
37 |     of the longest column with `missing`, which is None by default, e.g.::
38 | 
39 |         >>> cols = [[0, 1, 2], ['a', 'b']]
40 |         >>> tbl = etl.fromcolumns(cols, missing='NA')
41 |         >>> tbl
42 |         +----+------+
43 |         | f0 | f1   |
44 |         +====+======+
45 |         |  0 | 'a'  |
46 |         +----+------+
47 |         |  1 | 'b'  |
48 |         +----+------+
49 |         |  2 | 'NA' |
50 |         +----+------+
51 | 
52 |     See also :func:`petl.io.json.fromdicts`.
53 | 
54 |     .. versionadded:: 1.1.0
55 | 
56 |     """
57 | 
58 |     return ColumnsView(cols, header=header, missing=missing)
59 | 
60 | 
61 | class ColumnsView(Table):
62 | 
63 |     def __init__(self, cols, header=None, missing=None):
64 |         self.cols = cols
65 |         self.header = header
66 |         self.missing = missing
67 | 
68 |     def __iter__(self):
69 |         return itercolumns(self.cols, self.header, self.missing)
70 | 
71 | 
72 | def itercolumns(cols, header, missing):
73 |     if header is None:
74 |         header = ['f%s' % i for i in range(len(cols))]
75 |     yield tuple(header)
76 |     for row in izip_longest(*cols, **dict(fillvalue=missing)):
77 |         yield row
78 | 


--------------------------------------------------------------------------------
/petl/io/csv_py3.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import io
  3 | import csv
  4 | import logging
  5 | 
  6 | 
  7 | from petl.util.base import Table, data
  8 | 
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | warning = logger.warning
 12 | info = logger.info
 13 | debug = logger.debug
 14 | 
 15 | 
 16 | def fromcsv_impl(source, **kwargs):
 17 |     return CSVView(source, **kwargs)
 18 | 
 19 | 
 20 | class CSVView(Table):
 21 | 
 22 |     def __init__(self, source, encoding, errors, header, **csvargs):
 23 |         self.source = source
 24 |         self.encoding = encoding
 25 |         self.errors = errors
 26 |         self.csvargs = csvargs
 27 |         self.header = header
 28 | 
 29 |     def __iter__(self):
 30 |         if self.header is not None:
 31 |             yield tuple(self.header)
 32 |         with self.source.open('rb') as buf:
 33 |             csvfile = io.TextIOWrapper(buf, encoding=self.encoding,
 34 |                                        errors=self.errors, newline='')
 35 |             try:
 36 |                 reader = csv.reader(csvfile, **self.csvargs)
 37 |                 for row in reader:
 38 |                     yield tuple(row)
 39 |             finally:
 40 |                 csvfile.detach()
 41 | 
 42 | 
 43 | def tocsv_impl(table, source, **kwargs):
 44 |     _writecsv(table, source=source, mode='wb', **kwargs)
 45 | 
 46 | 
 47 | def appendcsv_impl(table, source, **kwargs):
 48 |     _writecsv(table, source=source, mode='ab', **kwargs)
 49 | 
 50 | 
 51 | def _writecsv(table, source, mode, write_header, encoding, errors, **csvargs):
 52 |     rows = table if write_header else data(table)
 53 |     with source.open(mode) as buf:
 54 |         # wrap buffer for text IO
 55 |         csvfile = io.TextIOWrapper(buf, encoding=encoding, errors=errors,
 56 |                                    newline='')
 57 |         try:
 58 |             writer = csv.writer(csvfile, **csvargs)
 59 |             for row in rows:
 60 |                 writer.writerow(row)
 61 |             csvfile.flush()
 62 |         finally:
 63 |             csvfile.detach()
 64 | 
 65 | 
 66 | def teecsv_impl(table, source, **kwargs):
 67 |     return TeeCSVView(table, source=source, **kwargs)
 68 | 
 69 | 
 70 | class TeeCSVView(Table):
 71 | 
 72 |     def __init__(self, table, source=None, encoding=None,
 73 |                  errors='strict', write_header=True, **csvargs):
 74 |         self.table = table
 75 |         self.source = source
 76 |         self.write_header = write_header
 77 |         self.encoding = encoding
 78 |         self.errors = errors
 79 |         self.csvargs = csvargs
 80 | 
 81 |     def __iter__(self):
 82 |         with self.source.open('wb') as buf:
 83 |             # wrap buffer for text IO
 84 |             csvfile = io.TextIOWrapper(buf, encoding=self.encoding,
 85 |                                        errors=self.errors, newline='')
 86 |             try:
 87 |                 writer = csv.writer(csvfile, **self.csvargs)
 88 |                 it = iter(self.table)
 89 |                 try:
 90 |                     hdr = next(it)
 91 |                 except StopIteration:
 92 |                     return
 93 |                 if self.write_header:
 94 |                     writer.writerow(hdr)
 95 |                 yield tuple(hdr)
 96 |                 for row in it:
 97 |                     writer.writerow(row)
 98 |                     yield tuple(row)
 99 |                 csvfile.flush()
100 |             finally:
101 |                 csvfile.detach()
102 | 


--------------------------------------------------------------------------------
/petl/io/db_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import, print_function, division
  3 | 
  4 | 
  5 | import logging
  6 | 
  7 | 
  8 | from petl.compat import callable
  9 | 
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | debug = logger.debug
 13 | 
 14 | 
 15 | def _is_dbapi_connection(dbo):
 16 |     return _hasmethod(dbo, 'cursor')
 17 | 
 18 | 
 19 | def _is_clikchouse_dbapi_connection(dbo):
 20 |     return 'clickhouse_driver' in str(type(dbo))
 21 | 
 22 |     
 23 | def _is_dbapi_cursor(dbo):
 24 |     return _hasmethods(dbo, 'execute', 'executemany', 'fetchone', 'fetchmany',
 25 |                        'fetchall')
 26 | 
 27 | 
 28 | def _is_sqlalchemy_engine(dbo):
 29 |     return (_hasmethods(dbo, 'execute', 'connect', 'raw_connection')
 30 |             and _hasprop(dbo, 'driver'))
 31 | 
 32 | 
 33 | def _is_sqlalchemy_session(dbo):
 34 |     return _hasmethods(dbo, 'execute', 'connection', 'get_bind')
 35 | 
 36 | 
 37 | def _is_sqlalchemy_connection(dbo):
 38 |     # N.B., this are not completely selective conditions, this test needs
 39 |     # to be applied after ruling out DB-API cursor
 40 |     return _hasmethod(dbo, 'execute') and _hasprop(dbo, 'connection')
 41 | 
 42 | 
 43 | def _hasmethod(o, n):
 44 |     return hasattr(o, n) and callable(getattr(o, n))
 45 | 
 46 | 
 47 | def _hasmethods(o, *l):
 48 |     return all(_hasmethod(o, n) for n in l)
 49 | 
 50 | 
 51 | def _hasprop(o, n):
 52 |     return hasattr(o, n) and not callable(getattr(o, n))
 53 | 
 54 | 
 55 | # default DB quote char per SQL-92
 56 | quotechar = '"'
 57 | 
 58 | 
 59 | def _quote(s):
 60 |     # crude way to sanitise table and field names
 61 |     # conform with the SQL-92 standard. See http://stackoverflow.com/a/214344
 62 |     return quotechar + s.replace(quotechar, quotechar+quotechar) + quotechar
 63 | 
 64 | 
 65 | def _placeholders(connection, names):
 66 |     # discover the paramstyle
 67 |     if connection is None:
 68 |         # default to using question mark
 69 |         debug('connection is None, default to using qmark paramstyle')
 70 |         placeholders = ', '.join(['?'] * len(names))
 71 |     else:
 72 |         mod = __import__(connection.__class__.__module__)
 73 | 
 74 |         if not hasattr(mod, 'paramstyle'):
 75 |             debug('module %r from connection %r has no attribute paramstyle, '
 76 |                   'defaulting to qmark', mod, connection)
 77 |             # default to using question mark
 78 |             placeholders = ', '.join(['?'] * len(names))
 79 | 
 80 |         elif mod.paramstyle == 'qmark':
 81 |             debug('found paramstyle qmark')
 82 |             placeholders = ', '.join(['?'] * len(names))
 83 | 
 84 |         elif mod.paramstyle in ('format', 'pyformat'):
 85 |             debug('found paramstyle pyformat')
 86 |             placeholders = ', '.join(['%s'] * len(names))
 87 | 
 88 |         elif mod.paramstyle == 'numeric':
 89 |             debug('found paramstyle numeric')
 90 |             placeholders = ', '.join([':' + str(i + 1)
 91 |                                       for i in range(len(names))])
 92 | 
 93 |         elif mod.paramstyle == 'named':
 94 |             debug('found paramstyle named')
 95 |             placeholders = ', '.join([':%s' % name
 96 |                                       for name in names])
 97 | 
 98 |         else:
 99 |             debug('found unexpected paramstyle %r, defaulting to qmark',
100 |                   mod.paramstyle)
101 |             placeholders = ', '.join(['?'] * len(names))
102 | 
103 |     return placeholders
104 | 


--------------------------------------------------------------------------------
/petl/io/pandas.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division, print_function, absolute_import
 3 | 
 4 | 
 5 | import inspect
 6 | 
 7 | 
 8 | from petl.util.base import Table
 9 | 
10 | 
11 | def todataframe(table, index=None, exclude=None, columns=None,
12 |                 coerce_float=False, nrows=None):
13 |     """
14 |     Load data from the given `table` into a
15 |     `pandas <http://pandas.pydata.org/>`_ DataFrame. E.g.::
16 | 
17 |         >>> import petl as etl
18 |         >>> table = [('foo', 'bar', 'baz'),
19 |         ...          ('apples', 1, 2.5),
20 |         ...          ('oranges', 3, 4.4),
21 |         ...          ('pears', 7, .1)]
22 |         >>> df = etl.todataframe(table)
23 |         >>> df
24 |                foo  bar  baz
25 |         0   apples    1  2.5
26 |         1  oranges    3  4.4
27 |         2    pears    7  0.1
28 | 
29 |     """
30 |     import pandas as pd
31 |     it = iter(table)
32 |     try:
33 |         header = next(it)
34 |     except StopIteration:
35 |         header = None  # Will create an Empty DataFrame
36 |     if columns is None:
37 |         columns = header
38 |     return pd.DataFrame.from_records(it, index=index, exclude=exclude,
39 |                                      columns=columns, coerce_float=coerce_float,
40 |                                      nrows=nrows)
41 | 
42 | 
43 | Table.todataframe = todataframe
44 | Table.todf = todataframe
45 | 
46 | 
47 | def fromdataframe(df, include_index=False):
48 |     """
49 |     Extract a table from a `pandas <http://pandas.pydata.org/>`_ DataFrame.
50 |     E.g.::
51 | 
52 |         >>> import petl as etl
53 |         >>> import pandas as pd
54 |         >>> records = [('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, 0.1)]
55 |         >>> df = pd.DataFrame.from_records(records, columns=('foo', 'bar', 'baz'))
56 |         >>> table = etl.fromdataframe(df)
57 |         >>> table
58 |         +-----------+-----+-----+
59 |         | foo       | bar | baz |
60 |         +===========+=====+=====+
61 |         | 'apples'  |   1 | 2.5 |
62 |         +-----------+-----+-----+
63 |         | 'oranges' |   3 | 4.4 |
64 |         +-----------+-----+-----+
65 |         | 'pears'   |   7 | 0.1 |
66 |         +-----------+-----+-----+
67 | 
68 |     """
69 | 
70 |     return DataFrameView(df, include_index=include_index)
71 | 
72 | 
73 | class DataFrameView(Table):
74 | 
75 |     def __init__(self, df, include_index=False):
76 |         assert hasattr(df, 'columns') \
77 |             and hasattr(df, 'iterrows') \
78 |             and inspect.ismethod(df.iterrows), \
79 |             'bad argument, expected pandas.DataFrame, found %r' % df
80 |         self.df = df
81 |         self.include_index = include_index
82 | 
83 |     def __iter__(self):
84 |         if self.include_index:
85 |             yield ('index',) + tuple(self.df.columns)
86 |             for i, row in self.df.iterrows():
87 |                 yield (i,) + tuple(row)
88 |         else:
89 |             yield tuple(self.df.columns)
90 |             for _, row in self.df.iterrows():
91 |                 yield tuple(row)
92 | 


--------------------------------------------------------------------------------
/petl/io/xls.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | 
  4 | 
  5 | import locale
  6 | 
  7 | 
  8 | from petl.compat import izip_longest, next, xrange, BytesIO
  9 | from petl.util.base import Table
 10 | from petl.io.sources import read_source_from_arg, write_source_from_arg
 11 | 
 12 | 
 13 | def fromxls(filename, sheet=None, use_view=True, **kwargs):
 14 |     """
 15 |     Extract a table from a sheet in an Excel .xls file.
 16 |     
 17 |     Sheet is identified by its name or index number.
 18 |     
 19 |     N.B., the sheet name is case sensitive.
 20 | 
 21 |     """
 22 |     
 23 |     return XLSView(filename, sheet=sheet, use_view=use_view, **kwargs)
 24 | 
 25 | 
 26 | class XLSView(Table):
 27 | 
 28 |     def __init__(self, filename, sheet=None, use_view=True, **kwargs):
 29 |         self.filename = filename
 30 |         self.sheet = sheet
 31 |         self.use_view = use_view
 32 |         self.kwargs = kwargs
 33 | 
 34 |     def __iter__(self):
 35 | 
 36 |         # prefer implementation using xlutils.view as dates are automatically
 37 |         # converted
 38 |         if self.use_view:
 39 |             from petl.io import xlutils_view
 40 |             source = read_source_from_arg(self.filename)
 41 |             with source.open('rb') as source2:
 42 |                 source3 = source2.read()
 43 |                 wb = xlutils_view.View(source3, **self.kwargs)
 44 |                 if self.sheet is None:
 45 |                     ws = wb[0]
 46 |                 else:
 47 |                     ws = wb[self.sheet]
 48 |                 for row in ws:
 49 |                     yield tuple(row)
 50 |         else:
 51 |             import xlrd
 52 |             source = read_source_from_arg(self.filename)
 53 |             with source.open('rb') as source2:
 54 |                 source3 = source2.read()
 55 |                 with xlrd.open_workbook(file_contents=source3,
 56 |                                         on_demand=True, **self.kwargs) as wb:
 57 |                     if self.sheet is None:
 58 |                         ws = wb.sheet_by_index(0)
 59 |                     elif isinstance(self.sheet, int):
 60 |                         ws = wb.sheet_by_index(self.sheet)
 61 |                     else:
 62 |                         ws = wb.sheet_by_name(str(self.sheet))
 63 |                     for rownum in xrange(ws.nrows):
 64 |                         yield tuple(ws.row_values(rownum))
 65 | 
 66 | 
 67 | def toxls(tbl, filename, sheet, encoding=None, style_compression=0,
 68 |           styles=None):
 69 |     """
 70 |     Write a table to a new Excel .xls file.
 71 | 
 72 |     """
 73 | 
 74 |     import xlwt
 75 |     if encoding is None:
 76 |         encoding = locale.getpreferredencoding()
 77 |     wb = xlwt.Workbook(encoding=encoding, style_compression=style_compression)
 78 |     ws = wb.add_sheet(sheet)
 79 | 
 80 |     if styles is None:
 81 |         # simple version, don't worry about styles
 82 |         for r, row in enumerate(tbl):
 83 |             for c, v in enumerate(row):
 84 |                 ws.write(r, c, label=v)
 85 |     else:
 86 |         # handle styles
 87 |         it = iter(tbl)
 88 |         try:
 89 |             hdr = next(it)
 90 |             flds = list(map(str, hdr))
 91 |             for c, f in enumerate(flds):
 92 |                 ws.write(0, c, label=f)
 93 |                 if f not in styles or styles[f] is None:
 94 |                     styles[f] = xlwt.Style.default_style
 95 |         except StopIteration:
 96 |             pass  # no header written
 97 |         # convert to list for easy zipping
 98 |         styles = [styles[f] for f in flds]
 99 |         for r, row in enumerate(it):
100 |             for c, (v, style) in enumerate(izip_longest(row, styles,
101 |                                                         fillvalue=None)):
102 |                 ws.write(r+1, c, label=v, style=style)
103 | 
104 |     target = write_source_from_arg(filename)
105 |     with target.open('wb') as target2:
106 |         wb.save(target2)
107 | 
108 | 
109 | Table.toxls = toxls
110 | 


--------------------------------------------------------------------------------
/petl/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/petl-developers/petl/43925d008bd1d98f90204b3db74d88b3fee27a69/petl/test/__init__.py


--------------------------------------------------------------------------------
/petl/test/conftest.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def pytest_configure():
 5 |     org = logging.Logger.debug
 6 | 
 7 |     def debug(self, msg, *args, **kwargs):
 8 |         org(self, str(msg), *args, **kwargs)
 9 | 
10 |     logging.Logger.debug = debug
11 | 


--------------------------------------------------------------------------------
/petl/test/failonerror.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from petl.test.helpers import ieq, eq_
  4 | import petl.config as config
  5 | 
  6 | 
  7 | 
  8 | def assert_failonerror(input_fn, expected_output):
  9 |     """In the input rows, the first row should process through the
 10 |     transformation cleanly.  The second row should generate an
 11 |     exception.  There are no requirements for any other rows."""
 12 |     #=========================================================
 13 |     # Test function parameters with default config settings
 14 |     #=========================================================
 15 |     # test the default config setting: failonerror == False
 16 |     eq_(config.failonerror, False)
 17 | 
 18 |     # By default, a bad conversion does not raise an exception, and
 19 |     # values for the failed conversion are returned as None
 20 |     table2 = input_fn()
 21 |     ieq(expected_output, table2)
 22 |     ieq(expected_output, table2)
 23 | 
 24 |     # When called with failonerror is False or None, a bad conversion
 25 |     # does not raise an exception, and values for the failed conversion
 26 |     # are returned as None
 27 |     table3 = input_fn(failonerror=False)
 28 |     ieq(expected_output, table3)
 29 |     ieq(expected_output, table3)
 30 |     table3 = input_fn(failonerror=None)
 31 |     ieq(expected_output, table3)
 32 |     ieq(expected_output, table3)
 33 | 
 34 |     # When called with failonerror=True, a bad conversion raises an
 35 |     # exception
 36 |     with pytest.raises(Exception):
 37 |         table4 = input_fn(failonerror=True)
 38 |         table4.nrows()
 39 | 
 40 |     # When called with failonerror='inline', a bad conversion
 41 |     # does not raise an exception, and an Exception for the failed
 42 |     # conversion is returned in the result.
 43 |     expect5 = expected_output[0], expected_output[1]
 44 |     table5 = input_fn(failonerror='inline')
 45 |     ieq(expect5, table5.head(1))
 46 |     ieq(expect5, table5.head(1))
 47 |     excp = table5[2][0]
 48 |     assert isinstance(excp, Exception)
 49 | 
 50 |     #=========================================================
 51 |     # Test config settings
 52 |     #=========================================================
 53 |     # Save config setting
 54 |     saved_config_failonerror = config.failonerror
 55 | 
 56 |     # When config.failonerror == True, a bad conversion raises an
 57 |     # exception
 58 |     config.failonerror = True
 59 |     with pytest.raises(Exception):
 60 |         table6 = input_fn()
 61 |         table6.nrows()
 62 | 
 63 |     # When config.failonerror == 'inline', a bad conversion
 64 |     # does not raise an exception, and an Exception for the failed
 65 |     # conversion is returned in the result.
 66 |     expect7 = expected_output[0], expected_output[1]
 67 |     config.failonerror = 'inline'
 68 |     table7 = input_fn()
 69 |     ieq(expect7, table7.head(1))
 70 |     ieq(expect7, table7.head(1))
 71 |     excp = table7[2][0]
 72 |     assert isinstance(excp, Exception)
 73 | 
 74 |     # When config.failonerror is an invalid value, but still truthy, it
 75 |     # behaves the same as if == True
 76 |     config.failonerror = 'invalid'
 77 |     with pytest.raises(Exception):
 78 |         table8 = input_fn()
 79 |         table8.nrows()
 80 | 
 81 |     # When config.failonerror is None, it behaves the same as if
 82 |     # config.failonerror is False
 83 |     config.failonerror = None
 84 |     table9 = input_fn()
 85 |     ieq(expected_output, table9)
 86 |     ieq(expected_output, table9)
 87 | 
 88 |     # A False keyword parameter overrides config.failonerror == True
 89 |     config.failonerror = True
 90 |     table10 = input_fn(failonerror=False)
 91 |     ieq(expected_output, table10)
 92 |     ieq(expected_output, table10)
 93 | 
 94 |     # A None keyword parameter uses config.failonerror == True
 95 |     config.failonerror = True
 96 |     with pytest.raises(Exception):
 97 |         table11 = input_fn(failonerror=None)
 98 |         table11.nrows()
 99 | 
100 |     # restore config setting
101 |     config.failonerror = saved_config_failonerror
102 | 
103 | 


--------------------------------------------------------------------------------
/petl/test/helpers.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | import os
 4 | import sys
 5 | 
 6 | import pytest
 7 | 
 8 | from petl.compat import izip_longest
 9 | 
10 | 
11 | def eq_(expect, actual, msg=None):
12 |     """Test when two values from a python variable are exactly equals (==)"""
13 |     assert expect == actual, msg or ('%r != %s' % (expect, actual))
14 | 
15 | 
16 | def assert_almost_equal(first, second, places=None, msg=None):
17 |     """Test when the values are aproximatedly equals by a places exponent"""
18 |     vabs = None if places is None else 10 ** (- places)
19 |     assert pytest.approx(first, second, abs=vabs), msg
20 | 
21 | 
22 | def ieq(expect, actual, cast=None):
23 |     """Test when values of a iterable are equals for each row and column"""
24 |     ie = iter(expect)
25 |     ia = iter(actual)
26 |     ir = 0
27 |     for re, ra in izip_longest(ie, ia, fillvalue=None):
28 |         if cast:
29 |             ra = cast(ra)
30 |         if re is None and ra is None:
31 |             continue
32 |         if type(re) in (int, float, bool, str):
33 |             eq_(re, ra)
34 |             continue
35 |         _ieq_row(re, ra, ir)
36 |         ir = ir + 1
37 | 
38 | 
39 | def _ieq_row(re, ra, ir):
40 |     assert ra is not None, "Expected row #%d is None, but result row is not None" % ir
41 |     assert re is not None, "Expected row #%d is not None, but result row is None" % ir
42 |     ic = 0
43 |     for ve, va in izip_longest(re, ra, fillvalue=None):
44 |         if isinstance(ve, list):
45 |             for je, ja in izip_longest(ve, va, fillvalue=None):
46 |                 _ieq_col(je, ja, re, ra, ir, ic)
47 |         elif not isinstance(ve, dict):
48 |             _ieq_col(ve, va, re, ra, ir, ic)
49 |         ic = ic + 1
50 | 
51 | 
52 | def _ieq_col(ve, va, re, ra, ir, ic):
53 |     """Print two values when they aren't exactly equals (==)"""
54 |     try:
55 |         eq_(ve, va)
56 |     except AssertionError as ea:
57 |         # Show the values but only when they differ
58 |         print('\nrow #%d' % ir, re, ' != ', ra, file=sys.stderr)
59 |         print('col #%d: ' % ic, ve, ' != ', va, file=sys.stderr)
60 |         raise ea
61 | 
62 | 
63 | def ieq2(expect, actual, cast=None):
64 |     """Test when iterables values are equals twice looking for side effects"""
65 |     ieq(expect, actual, cast)
66 |     ieq(expect, actual, cast)
67 | 
68 | 
69 | def get_env_vars_named(prefix, remove_prefix=True):
70 |     """Get all named variables starting with prefix"""
71 |     res = {}
72 |     varlen = len(prefix)
73 |     for varname, varvalue in os.environ.items():
74 |         if varname.upper().startswith(prefix.upper()):
75 |             if remove_prefix:
76 |                 varname = varname[varlen:]
77 |             res[varname] = varvalue
78 |     if len(res) == 0:
79 |         return None
80 |     return res
81 | 


--------------------------------------------------------------------------------
/petl/test/io/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, print_function, division
2 | 


--------------------------------------------------------------------------------
/petl/test/io/test_bcolz.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, print_function, division
 3 | import tempfile
 4 | 
 5 | import pytest
 6 | 
 7 | from petl.test.helpers import ieq, eq_
 8 | from petl.io.bcolz import frombcolz, tobcolz, appendbcolz
 9 | 
10 | 
11 | try:
12 |     import bcolz
13 | except ImportError as e:
14 |     pytest.skip('SKIP bcolz tests: %s' % e, allow_module_level=True)
15 | else:
16 | 
17 |     def test_frombcolz():
18 | 
19 |         cols = [
20 |             ['apples', 'oranges', 'pears'],
21 |             [1, 3, 7],
22 |             [2.5, 4.4, .1]
23 |         ]
24 |         names = ('foo', 'bar', 'baz')
25 |         rootdir = tempfile.mkdtemp()
26 |         ctbl = bcolz.ctable(cols, names=names, rootdir=rootdir, mode='w')
27 |         ctbl.flush()
28 | 
29 |         expect = [names] + list(zip(*cols))
30 | 
31 |         # from ctable object
32 |         actual = frombcolz(ctbl)
33 |         ieq(expect, actual)
34 |         ieq(expect, actual)
35 | 
36 |         # from rootdir
37 |         actual = frombcolz(rootdir)
38 |         ieq(expect, actual)
39 |         ieq(expect, actual)
40 | 
41 |     def test_tobcolz():
42 |         t = [('foo', 'bar', 'baz'),
43 |              ('apples', 1, 2.5),
44 |              ('oranges', 3, 4.4),
45 |              ('pears', 7, .1)]
46 | 
47 |         ctbl = tobcolz(t)
48 |         assert isinstance(ctbl, bcolz.ctable)
49 |         eq_(t[0], tuple(ctbl.names))
50 |         ieq(t[1:], (tuple(r) for r in ctbl.iter()))
51 | 
52 |         ctbl = tobcolz(t, chunklen=2)
53 |         assert isinstance(ctbl, bcolz.ctable)
54 |         eq_(t[0], tuple(ctbl.names))
55 |         ieq(t[1:], (tuple(r) for r in ctbl.iter()))
56 |         eq_(2, ctbl.cols[ctbl.names[0]].chunklen)
57 | 
58 |     def test_appendbcolz():
59 |         t = [('foo', 'bar', 'baz'),
60 |              ('apples', 1, 2.5),
61 |              ('oranges', 3, 4.4),
62 |              ('pears', 7, .1)]
63 | 
64 |         # append to in-memory ctable
65 |         ctbl = tobcolz(t)
66 |         appendbcolz(t, ctbl)
67 |         eq_(t[0], tuple(ctbl.names))
68 |         ieq(t[1:] + t[1:], (tuple(r) for r in ctbl.iter()))
69 | 
70 |         # append to on-disk ctable
71 |         rootdir = tempfile.mkdtemp()
72 |         tobcolz(t, rootdir=rootdir)
73 |         appendbcolz(t, rootdir)
74 |         ctbl = bcolz.open(rootdir, mode='r')
75 |         eq_(t[0], tuple(ctbl.names))
76 |         ieq(t[1:] + t[1:], (tuple(r) for r in ctbl.iter()))
77 | 


--------------------------------------------------------------------------------
/petl/test/io/test_csv_unicode.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import, print_function, division
  3 | 
  4 | 
  5 | import io
  6 | from tempfile import NamedTemporaryFile
  7 | 
  8 | 
  9 | from petl.test.helpers import ieq, eq_
 10 | from petl.io.csv import fromcsv, tocsv, appendcsv
 11 | 
 12 | 
 13 | def test_fromcsv():
 14 | 
 15 |     data = (
 16 |         u"name,id\n"
 17 |         u"Արամ Խաչատրյան,1\n"
 18 |         u"Johann Strauß,2\n"
 19 |         u"Вагиф Сәмәдоғлу,3\n"
 20 |         u"章子怡,4\n"
 21 |     )
 22 |     fn = NamedTemporaryFile().name
 23 |     uf = io.open(fn, encoding='utf-8', mode='wt')
 24 |     uf.write(data)
 25 |     uf.close()
 26 | 
 27 |     actual = fromcsv(fn, encoding='utf-8')
 28 |     expect = ((u'name', u'id'),
 29 |               (u'Արամ Խաչատրյան', u'1'),
 30 |               (u'Johann Strauß', u'2'),
 31 |               (u'Вагиф Сәмәдоғлу', u'3'),
 32 |               (u'章子怡', u'4'))
 33 |     ieq(expect, actual)
 34 |     ieq(expect, actual)  # verify can iterate twice
 35 | 
 36 | 
 37 | def test_fromcsv_lineterminators():
 38 |     data = (u'name,id',
 39 |             u'Արամ Խաչատրյան,1',
 40 |             u'Johann Strauß,2',
 41 |             u'Вагиф Сәмәдоғлу,3',
 42 |             u'章子怡,4')
 43 |     expect = ((u'name', u'id'),
 44 |               (u'Արամ Խաչատրյան', u'1'),
 45 |               (u'Johann Strauß', u'2'),
 46 |               (u'Вагиф Сәмәдоғлу', u'3'),
 47 |               (u'章子怡', u'4'))
 48 | 
 49 |     for lt in u'\r', u'\n', u'\r\n':
 50 |         fn = NamedTemporaryFile().name
 51 |         uf = io.open(fn, encoding='utf-8', mode='wt', newline='')
 52 |         uf.write(lt.join(data))
 53 |         uf.close()
 54 |         actual = fromcsv(fn, encoding='utf-8')
 55 |         ieq(expect, actual)
 56 | 
 57 | 
 58 | def test_tocsv():
 59 | 
 60 |     tbl = ((u'name', u'id'),
 61 |            (u'Արամ Խաչատրյան', 1),
 62 |            (u'Johann Strauß', 2),
 63 |            (u'Вагиф Сәмәдоғлу', 3),
 64 |            (u'章子怡', 4))
 65 |     fn = NamedTemporaryFile().name
 66 |     tocsv(tbl, fn, encoding='utf-8', lineterminator='\n')
 67 | 
 68 |     expect = (
 69 |         u"name,id\n"
 70 |         u"Արամ Խաչատրյան,1\n"
 71 |         u"Johann Strauß,2\n"
 72 |         u"Вагиф Сәмәдоғлу,3\n"
 73 |         u"章子怡,4\n"
 74 |     )
 75 |     uf = io.open(fn, encoding='utf-8', mode='rt', newline='')
 76 |     actual = uf.read()
 77 |     eq_(expect, actual)
 78 | 
 79 |     # Test with write_header=False
 80 |     tbl = ((u'name', u'id'),
 81 |            (u'Արամ Խաչատրյան', 1),
 82 |            (u'Johann Strauß', 2),
 83 |            (u'Вагиф Сәмәдоғлу', 3),
 84 |            (u'章子怡', 4))
 85 |     tocsv(tbl, fn, encoding='utf-8', lineterminator='\n', write_header=False)
 86 | 
 87 |     expect = (
 88 |         u"Արամ Խաչատրյան,1\n"
 89 |         u"Johann Strauß,2\n"
 90 |         u"Вагиф Сәмәдоғлу,3\n"
 91 |         u"章子怡,4\n"
 92 |     )
 93 |     uf = io.open(fn, encoding='utf-8', mode='rt', newline='')
 94 |     actual = uf.read()
 95 |     eq_(expect, actual)
 96 | 
 97 | 
 98 | def test_appendcsv():
 99 | 
100 |     data = (
101 |         u"name,id\n"
102 |         u"Արամ Խաչատրյան,1\n"
103 |         u"Johann Strauß,2\n"
104 |         u"Вагиф Сәмәдоғлу,3\n"
105 |         u"章子怡,4\n"
106 |     )
107 |     fn = NamedTemporaryFile().name
108 |     uf = io.open(fn, encoding='utf-8', mode='wt')
109 |     uf.write(data)
110 |     uf.close()
111 | 
112 |     tbl = ((u'name', u'id'),
113 |            (u'ኃይሌ ገብረሥላሴ', 5),
114 |            (u'ედუარდ შევარდნაძე', 6))
115 |     appendcsv(tbl, fn, encoding='utf-8', lineterminator='\n')
116 | 
117 |     expect = (
118 |         u"name,id\n"
119 |         u"Արամ Խաչատրյան,1\n"
120 |         u"Johann Strauß,2\n"
121 |         u"Вагиф Сәмәдоғлу,3\n"
122 |         u"章子怡,4\n"
123 |         u"ኃይሌ ገብረሥላሴ,5\n"
124 |         u"ედუარდ შევარდნაძე,6\n"
125 |     )
126 |     uf = io.open(fn, encoding='utf-8', mode='rt')
127 |     actual = uf.read()
128 |     eq_(expect, actual)
129 | 
130 | 
131 | def test_tocsv_none():
132 | 
133 |     tbl = ((u'col1', u'colNone'),
134 |            (u'a', 1),
135 |            (u'b', None),
136 |            (u'c', None),
137 |            (u'd', 4))
138 |     fn = NamedTemporaryFile().name
139 |     tocsv(tbl, fn, encoding='utf-8', lineterminator='\n')
140 | 
141 |     expect = (
142 |         u'col1,colNone\n'
143 |         u'a,1\n'
144 |         u'b,\n'
145 |         u'c,\n'
146 |         u'd,4\n'
147 |     )
148 | 
149 |     uf = io.open(fn, encoding='utf-8', mode='rt', newline='')
150 |     actual = uf.read()
151 |     eq_(expect, actual)
152 | 


--------------------------------------------------------------------------------
/petl/test/io/test_html.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import, print_function, division
  3 | 
  4 | 
  5 | from tempfile import NamedTemporaryFile
  6 | import io
  7 | from petl.test.helpers import eq_
  8 | 
  9 | 
 10 | from petl.io.html import tohtml
 11 | 
 12 | 
 13 | def test_tohtml():
 14 | 
 15 |     # exercise function
 16 |     table = (('foo', 'bar'),
 17 |              ('a', 1),
 18 |              ('b', (1, 2)),
 19 |              ('c', False))
 20 | 
 21 |     f = NamedTemporaryFile(delete=False)
 22 |     tohtml(table, f.name, encoding='ascii', lineterminator='\n')
 23 | 
 24 |     # check what it did
 25 |     with io.open(f.name, mode='rt', encoding='ascii', newline='') as o:
 26 |         actual = o.read()
 27 |         expect = (
 28 |             u"<table class='petl'>\n"
 29 |             u"<thead>\n"
 30 |             u"<tr>\n"
 31 |             u"<th>foo</th>\n"
 32 |             u"<th>bar</th>\n"
 33 |             u"</tr>\n"
 34 |             u"</thead>\n"
 35 |             u"<tbody>\n"
 36 |             u"<tr>\n"
 37 |             u"<td>a</td>\n"
 38 |             u"<td style='text-align: right'>1</td>\n"
 39 |             u"</tr>\n"
 40 |             u"<tr>\n"
 41 |             u"<td>b</td>\n"
 42 |             u"<td>(1, 2)</td>\n"
 43 |             u"</tr>\n"
 44 |             u"<tr>\n"
 45 |             u"<td>c</td>\n"
 46 |             u"<td>False</td>\n"
 47 |             u"</tr>\n"
 48 |             u"</tbody>\n"
 49 |             u"</table>\n"
 50 |         )
 51 |         eq_(expect, actual)
 52 | 
 53 | 
 54 | def test_tohtml_caption():
 55 | 
 56 |     # exercise function
 57 |     table = (('foo', 'bar'),
 58 |              ('a', 1),
 59 |              ('b', (1, 2)))
 60 |     f = NamedTemporaryFile(delete=False)
 61 |     tohtml(table, f.name, encoding='ascii', caption='my table',
 62 |            lineterminator='\n')
 63 | 
 64 |     # check what it did
 65 |     with io.open(f.name, mode='rt', encoding='ascii', newline='') as o:
 66 |         actual = o.read()
 67 |         expect = (
 68 |             u"<table class='petl'>\n"
 69 |             u"<caption>my table</caption>\n"
 70 |             u"<thead>\n"
 71 |             u"<tr>\n"
 72 |             u"<th>foo</th>\n"
 73 |             u"<th>bar</th>\n"
 74 |             u"</tr>\n"
 75 |             u"</thead>\n"
 76 |             u"<tbody>\n"
 77 |             u"<tr>\n"
 78 |             u"<td>a</td>\n"
 79 |             u"<td style='text-align: right'>1</td>\n"
 80 |             u"</tr>\n"
 81 |             u"<tr>\n"
 82 |             u"<td>b</td>\n"
 83 |             u"<td>(1, 2)</td>\n"
 84 |             u"</tr>\n"
 85 |             u"</tbody>\n"
 86 |             u"</table>\n"
 87 |         )
 88 |         eq_(expect, actual)
 89 | 
 90 | 
 91 | def test_tohtml_with_style():
 92 | 
 93 |     # exercise function
 94 |     table = (('foo', 'bar'),
 95 |              ('a', 1))
 96 | 
 97 |     f = NamedTemporaryFile(delete=False)
 98 |     tohtml(table, f.name, encoding='ascii', lineterminator='\n',
 99 |         tr_style='text-align: right', td_styles='text-align: center')
100 | 
101 |     # check what it did
102 |     with io.open(f.name, mode='rt', encoding='ascii', newline='') as o:
103 |         actual = o.read()
104 |         expect = (
105 |             u"<table class='petl'>\n"
106 |             u"<thead>\n"
107 |             u"<tr>\n"
108 |             u"<th>foo</th>\n"
109 |             u"<th>bar</th>\n"
110 |             u"</tr>\n"
111 |             u"</thead>\n"
112 |             u"<tbody>\n"
113 |             u"<tr style='text-align: right'>\n"
114 |             u"<td style='text-align: center'>a</td>\n"
115 |             u"<td style='text-align: center'>1</td>\n"
116 |             u"</tr>\n"
117 |             u"</tbody>\n"
118 |             u"</table>\n"
119 |         )
120 |         eq_(expect, actual)
121 | 
122 | 
123 | def test_tohtml_headerless():
124 |     table = []
125 | 
126 |     f = NamedTemporaryFile(delete=False)
127 |     tohtml(table, f.name, encoding='ascii', lineterminator='\n')
128 | 
129 |     # check what it did
130 |     with io.open(f.name, mode='rt', encoding='ascii', newline='') as o:
131 |         actual = o.read()
132 |         expect = (
133 |             u"<table class='petl'>\n"
134 |             u"<tbody>\n"
135 |             u"</tbody>\n"
136 |             u"</table>\n"
137 |         )
138 |         eq_(expect, actual)
139 | 


--------------------------------------------------------------------------------
/petl/test/io/test_html_unicode.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, print_function, division
 3 | 
 4 | 
 5 | import io
 6 | from tempfile import NamedTemporaryFile
 7 | from petl.test.helpers import eq_
 8 | 
 9 | 
10 | from petl.io.html import tohtml
11 | 
12 | 
13 | def test_tohtml():
14 | 
15 |     # exercise function
16 |     tbl = ((u'name', u'id'),
17 |            (u'Արամ Խաչատրյան', 1),
18 |            (u'Johann Strauß', 2),
19 |            (u'Вагиф Сәмәдоғлу', 3),
20 |            (u'章子怡', 4))
21 |     fn = NamedTemporaryFile().name
22 |     tohtml(tbl, fn, encoding='utf-8', lineterminator='\n')
23 | 
24 |     # check what it did
25 |     f = io.open(fn, mode='rt', encoding='utf-8', newline='')
26 |     actual = f.read()
27 |     expect = (
28 |         u"<table class='petl'>\n"
29 |         u"<thead>\n"
30 |         u"<tr>\n"
31 |         u"<th>name</th>\n"
32 |         u"<th>id</th>\n"
33 |         u"</tr>\n"
34 |         u"</thead>\n"
35 |         u"<tbody>\n"
36 |         u"<tr>\n"
37 |         u"<td>Արամ Խաչատրյան</td>\n"
38 |         u"<td style='text-align: right'>1</td>\n"
39 |         u"</tr>\n"
40 |         u"<tr>\n"
41 |         u"<td>Johann Strauß</td>\n"
42 |         u"<td style='text-align: right'>2</td>\n"
43 |         u"</tr>\n"
44 |         u"<tr>\n"
45 |         u"<td>Вагиф Сәмәдоғлу</td>\n"
46 |         u"<td style='text-align: right'>3</td>\n"
47 |         u"</tr>\n"
48 |         u"<tr>\n"
49 |         u"<td>章子怡</td>\n"
50 |         u"<td style='text-align: right'>4</td>\n"
51 |         u"</tr>\n"
52 |         u"</tbody>\n"
53 |         u"</table>\n"
54 |     )
55 |     eq_(expect, actual)
56 | 


--------------------------------------------------------------------------------
/petl/test/io/test_json_unicode.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, print_function, division
 3 | 
 4 | 
 5 | import json
 6 | from tempfile import NamedTemporaryFile
 7 | 
 8 | 
 9 | from petl.test.helpers import ieq
10 | from petl.io.json import tojson, fromjson
11 | 
12 | 
13 | def test_json_unicode():
14 | 
15 |     tbl = ((u'id', u'name'),
16 |            (1, u'Արամ Խաչատրյան'),
17 |            (2, u'Johann Strauß'),
18 |            (3, u'Вагиф Сәмәдоғлу'),
19 |            (4, u'章子怡'),
20 |            )
21 |     fn = NamedTemporaryFile().name
22 |     tojson(tbl, fn)
23 | 
24 |     result = json.load(open(fn))
25 |     assert len(result) == 4
26 |     for a, b in zip(tbl[1:], result):
27 |         assert a[0] == b['id']
28 |         assert a[1] == b['name']
29 | 
30 |     actual = fromjson(fn, header=['id', 'name'])
31 |     ieq(tbl, actual)
32 | 


--------------------------------------------------------------------------------
/petl/test/io/test_jsonl.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, print_function, division
 3 | 
 4 | from tempfile import NamedTemporaryFile
 5 | import json
 6 | 
 7 | from petl import fromjson, tojson
 8 | from petl.test.helpers import ieq
 9 | 
10 | 
11 | def test_fromjson_1():
12 |     f = NamedTemporaryFile(delete=False, mode='w')
13 |     data = '{"name": "Gilbert", "wins": [["straight", "7S"], ["one pair", "10H"]]}\n' \
14 |            '{"name": "Alexa", "wins": [["two pair", "4S"], ["two pair", "9S"]]}\n' \
15 |            '{"name": "May", "wins": []}\n' \
16 |            '{"name": "Deloise", "wins": [["three of a kind", "5S"]]}'
17 | 
18 |     f.write(data)
19 |     f.close()
20 | 
21 |     actual = fromjson(f.name, header=['name', 'wins'], lines=True)
22 | 
23 |     expect = (('name', 'wins'),
24 |               ('Gilbert', [["straight", "7S"], ["one pair", "10H"]]),
25 |               ('Alexa', [["two pair", "4S"], ["two pair", "9S"]]),
26 |               ('May', []),
27 |               ('Deloise', [["three of a kind", "5S"]]))
28 | 
29 |     ieq(expect, actual)
30 |     ieq(expect, actual)  # verify can iterate twice
31 | 
32 | 
33 | def test_fromjson_2():
34 |     f = NamedTemporaryFile(delete=False, mode='w')
35 |     data = '{"foo": "bar1", "baz": 1}\n' \
36 |            '{"foo": "bar2", "baz": 2}\n' \
37 |            '{"foo": "bar3", "baz": 3}\n' \
38 |            '{"foo": "bar4", "baz": 4}\n'
39 | 
40 |     f.write(data)
41 |     f.close()
42 | 
43 |     actual = fromjson(f.name, header=['foo', 'baz'], lines=True)
44 | 
45 |     expect = (('foo', 'baz'),
46 |               ('bar1', 1),
47 |               ('bar2', 2),
48 |               ('bar3', 3),
49 |               ('bar4', 4))
50 | 
51 |     ieq(expect, actual)
52 |     ieq(expect, actual)  # verify can iterate twice
53 | 
54 | 
55 | def test_tojson_1():
56 |     table = (('foo', 'bar'),
57 |              ('a', 1),
58 |              ('b', 2),
59 |              ('c', 2))
60 |     f = NamedTemporaryFile(delete=False, mode='r')
61 |     tojson(table, f.name, lines=True)
62 |     result = []
63 |     for line in f:
64 |         result.append(json.loads(line))
65 |     assert len(result) == 3
66 |     assert result[0]['foo'] == 'a'
67 |     assert result[0]['bar'] == 1
68 |     assert result[1]['foo'] == 'b'
69 |     assert result[1]['bar'] == 2
70 |     assert result[2]['foo'] == 'c'
71 |     assert result[2]['bar'] == 2
72 | 
73 | 
74 | def test_tojson_2():
75 |     table = [['name', 'wins'],
76 |              ['Gilbert', [['straight', '7S'], ['one pair', '10H']]],
77 |              ['Alexa', [['two pair', '4S'], ['two pair', '9S']]],
78 |              ['May', []],
79 |              ['Deloise', [['three of a kind', '5S']]]]
80 |     f = NamedTemporaryFile(delete=False, mode='r')
81 |     tojson(table, f.name, lines=True)
82 |     result = []
83 |     for line in f:
84 |         result.append(json.loads(line))
85 |     assert len(result) == 4
86 |     assert result[0]['name'] == 'Gilbert'
87 |     assert result[0]['wins'] == [['straight', '7S'], ['one pair', '10H']]
88 |     assert result[1]['name'] == 'Alexa'
89 |     assert result[1]['wins'] == [['two pair', '4S'], ['two pair', '9S']]
90 |     assert result[2]['name'] == 'May'
91 |     assert result[2]['wins'] == []
92 |     assert result[3]['name'] == 'Deloise'
93 |     assert result[3]['wins'] == [['three of a kind', '5S']]
94 | 


--------------------------------------------------------------------------------
/petl/test/io/test_pandas.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division, print_function, absolute_import
 3 | 
 4 | 
 5 | import pytest
 6 | 
 7 | import petl as etl
 8 | from petl.test.helpers import ieq
 9 | from petl.io.pandas import todataframe, fromdataframe
10 | 
11 | 
12 | try:
13 |     # noinspection PyUnresolvedReferences
14 |     import pandas as pd
15 | except ImportError as e:
16 |     pytest.skip('SKIP pandas tests: %s' % e, allow_module_level=True)
17 | else:
18 | 
19 |     def test_todataframe():
20 |         tbl = [('foo', 'bar', 'baz'),
21 |                ('apples', 1, 2.5),
22 |                ('oranges', 3, 4.4),
23 |                ('pears', 7, .1)]
24 | 
25 |         expect = pd.DataFrame.from_records(tbl[1:], columns=tbl[0])
26 |         actual = todataframe(tbl)
27 |         assert expect.equals(actual)
28 | 
29 |     def test_headerless():
30 |         tbl = []
31 |         expect = pd.DataFrame()
32 |         actual = todataframe(tbl)
33 |         assert expect.equals(actual)
34 | 
35 |     def test_fromdataframe():
36 |         tbl = [('foo', 'bar', 'baz'),
37 |                ('apples', 1, 2.5),
38 |                ('oranges', 3, 4.4),
39 |                ('pears', 7, .1)]
40 |         df = pd.DataFrame.from_records(tbl[1:], columns=tbl[0])
41 |         ieq(tbl, fromdataframe(df))
42 |         ieq(tbl, fromdataframe(df))
43 | 
44 |     def test_integration():
45 |         tbl = [('foo', 'bar', 'baz'),
46 |                ('apples', 1, 2.5),
47 |                ('oranges', 3, 4.4),
48 |                ('pears', 7, .1)]
49 |         df = etl.wrap(tbl).todataframe()
50 |         tbl2 = etl.fromdataframe(df)
51 |         ieq(tbl, tbl2)
52 |         ieq(tbl, tbl2)
53 | 


--------------------------------------------------------------------------------
/petl/test/io/test_pickle.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, print_function, division
 3 | 
 4 | 
 5 | from tempfile import NamedTemporaryFile
 6 | from petl.compat import pickle
 7 | 
 8 | 
 9 | from petl.test.helpers import ieq
10 | from petl.io.pickle import frompickle, topickle, appendpickle
11 | 
12 | 
13 | def picklereader(fl):
14 |     try:
15 |         while True:
16 |             yield pickle.load(fl)
17 |     except EOFError:
18 |         pass
19 | 
20 | 
21 | def test_frompickle():
22 | 
23 |     f = NamedTemporaryFile(delete=False)
24 |     table = (('foo', 'bar'),
25 |              ('a', 1),
26 |              ('b', 2),
27 |              ('c', 2))
28 |     for row in table:
29 |         pickle.dump(row, f)
30 |     f.close()
31 | 
32 |     actual = frompickle(f.name)
33 |     ieq(table, actual)
34 |     ieq(table, actual)  # verify can iterate twice
35 | 
36 | 
37 | def test_topickle_appendpickle():
38 | 
39 |     # exercise function
40 |     table = (('foo', 'bar'),
41 |              ('a', 1),
42 |              ('b', 2),
43 |              ('c', 2))
44 |     f = NamedTemporaryFile(delete=False)
45 |     topickle(table, f.name)
46 | 
47 |     # check what it did
48 |     with open(f.name, 'rb') as o:
49 |         actual = picklereader(o)
50 |         ieq(table, actual)
51 | 
52 |     # check appending
53 |     table2 = (('foo', 'bar'),
54 |               ('d', 7),
55 |               ('e', 9),
56 |               ('f', 1))
57 |     appendpickle(table2, f.name)
58 | 
59 |     # check what it did
60 |     with open(f.name, 'rb') as o:
61 |         actual = picklereader(o)
62 |         expect = (('foo', 'bar'),
63 |                   ('a', 1),
64 |                   ('b', 2),
65 |                   ('c', 2),
66 |                   ('d', 7),
67 |                   ('e', 9),
68 |                   ('f', 1))
69 |         ieq(expect, actual)
70 | 
71 | 
72 | def test_topickle_headerless():
73 |     table = []
74 |     f = NamedTemporaryFile(delete=False)
75 |     topickle(table, f.name)
76 |     expect = []
77 |     with open(f.name, 'rb') as o:
78 |         ieq(expect, picklereader(o))
79 | 


--------------------------------------------------------------------------------
/petl/test/io/test_text_unicode.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, print_function, division
 3 | 
 4 | 
 5 | import io
 6 | from tempfile import NamedTemporaryFile
 7 | from petl.test.helpers import ieq, eq_
 8 | 
 9 | 
10 | from petl.io.text import fromtext, totext
11 | 
12 | 
13 | def test_fromtext():
14 |     data = (
15 |         u"name,id\n"
16 |         u"Արամ Խաչատրյան,1\n"
17 |         u"Johann Strauß,2\n"
18 |         u"Вагиф Сәмәдоғлу,3\n"
19 |         u"章子怡,4\n"
20 |     )
21 |     fn = NamedTemporaryFile().name
22 |     f = io.open(fn, encoding='utf-8', mode='wt')
23 |     f.write(data)
24 |     f.close()
25 | 
26 |     actual = fromtext(fn, encoding='utf-8')
27 |     expect = ((u'lines',),
28 |               (u'name,id',),
29 |               (u'Արամ Խաչատրյան,1',),
30 |               (u'Johann Strauß,2',),
31 |               (u'Вагиф Сәмәдоғлу,3',),
32 |               (u'章子怡,4',),
33 |               )
34 |     ieq(expect, actual)
35 |     ieq(expect, actual)  # verify can iterate twice
36 | 
37 | 
38 | def test_totext():
39 | 
40 |     # exercise function
41 |     tbl = ((u'name', u'id'),
42 |            (u'Արամ Խաչատրյան', 1),
43 |            (u'Johann Strauß', 2),
44 |            (u'Вагиф Сәмәдоғлу', 3),
45 |            (u'章子怡', 4),
46 |            )
47 |     prologue = (
48 |         u"{| class='wikitable'\n"
49 |         u"|-\n"
50 |         u"! name\n"
51 |         u"! id\n"
52 |     )
53 |     template = (
54 |         u"|-\n"
55 |         u"| {name}\n"
56 |         u"| {id}\n"
57 |     )
58 |     epilogue = u"|}\n"
59 |     fn = NamedTemporaryFile().name
60 |     totext(tbl, fn, template=template, prologue=prologue,
61 |            epilogue=epilogue, encoding='utf-8')
62 | 
63 |     # check what it did
64 |     f = io.open(fn, encoding='utf-8', mode='rt')
65 |     actual = f.read()
66 |     expect = (
67 |         u"{| class='wikitable'\n"
68 |         u"|-\n"
69 |         u"! name\n"
70 |         u"! id\n"
71 |         u"|-\n"
72 |         u"| Արամ Խաչատրյան\n"
73 |         u"| 1\n"
74 |         u"|-\n"
75 |         u"| Johann Strauß\n"
76 |         u"| 2\n"
77 |         u"|-\n"
78 |         u"| Вагиф Сәмәдоғлу\n"
79 |         u"| 3\n"
80 |         u"|-\n"
81 |         u"| 章子怡\n"
82 |         u"| 4\n"
83 |         u"|}\n"
84 |     )
85 |     eq_(expect, actual)
86 | 


--------------------------------------------------------------------------------
/petl/test/resources/test.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/petl-developers/petl/43925d008bd1d98f90204b3db74d88b3fee27a69/petl/test/resources/test.xls


--------------------------------------------------------------------------------
/petl/test/resources/test.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/petl-developers/petl/43925d008bd1d98f90204b3db74d88b3fee27a69/petl/test/resources/test.xlsx


--------------------------------------------------------------------------------
/petl/test/resources/test.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <?eclipse-pydev version="1.0"?>
 3 | 
 4 | <pydev_project>
 5 | <pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
 6 | <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
 7 | <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
 8 | <path>/petl/src</path>
 9 | </pydev_pathproperty>
10 | </pydev_project>
11 | 


--------------------------------------------------------------------------------
/petl/test/test_fluent.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | 
  3 | 
  4 | from tempfile import NamedTemporaryFile
  5 | import csv
  6 | from petl.compat import PY2
  7 | 
  8 | 
  9 | import petl as etl
 10 | from petl.test.helpers import ieq, eq_
 11 | 
 12 | 
 13 | def test_basics():
 14 |     
 15 |     t1 = (('foo', 'bar'),
 16 |           ('A', 1),
 17 |           ('B', 2))
 18 |     w1 = etl.wrap(t1)
 19 |     
 20 |     eq_(('foo', 'bar'), w1.header())
 21 |     eq_(etl.header(w1), w1.header())
 22 |     ieq((('A', 1), ('B', 2)), w1.data())
 23 |     ieq(etl.data(w1), w1.data())
 24 |     
 25 |     w2 = w1.cut('bar', 'foo')
 26 |     expect2 = (('bar', 'foo'),
 27 |                (1, 'A'),
 28 |                (2, 'B'))
 29 |     ieq(expect2, w2)
 30 |     ieq(etl.cut(w1, 'bar', 'foo'), w2)
 31 |     
 32 |     w3 = w1.cut('bar', 'foo').cut('foo', 'bar')
 33 |     ieq(t1, w3)
 34 |     
 35 |     
 36 | def test_staticmethods():
 37 |     
 38 |     data = [b'foo,bar',
 39 |             b'a,1',
 40 |             b'b,2',
 41 |             b'c,2']
 42 |     f = NamedTemporaryFile(mode='wb', delete=False)
 43 |     f.write(b'\n'.join(data))
 44 |     f.close()
 45 | 
 46 |     expect = (('foo', 'bar'),
 47 |               ('a', '1'),
 48 |               ('b', '2'),
 49 |               ('c', '2'))
 50 |     actual = etl.fromcsv(f.name, encoding='ascii')
 51 |     ieq(expect, actual)
 52 |     ieq(expect, actual)  # verify can iterate twice
 53 | 
 54 | 
 55 | def test_container():
 56 |     table = (('foo', 'bar'),
 57 |              ('a', 1),
 58 |              ('b', 2),
 59 |              ('c', 2))
 60 |     actual = etl.wrap(table)[0]
 61 |     expect = ('foo', 'bar')
 62 |     eq_(expect, actual)
 63 |     actual = etl.wrap(table)['bar']
 64 |     expect = (1, 2, 2)
 65 |     ieq(expect, actual)
 66 |     actual = len(etl.wrap(table))
 67 |     expect = 4
 68 |     eq_(expect, actual)
 69 |     
 70 |     
 71 | def test_values_container_convenience_methods():
 72 |     table = etl.wrap((('foo', 'bar'),
 73 |                       ('a', 1),
 74 |                       ('b', 2),
 75 |                       ('c', 2)))
 76 |     
 77 |     actual = table.values('foo').set()
 78 |     expect = {'a', 'b', 'c'}
 79 |     eq_(expect, actual)
 80 |     
 81 |     actual = table.values('foo').list()
 82 |     expect = ['a', 'b', 'c']
 83 |     eq_(expect, actual)
 84 |     
 85 |     actual = table.values('foo').tuple()
 86 |     expect = ('a', 'b', 'c')
 87 |     eq_(expect, actual)
 88 |     
 89 |     actual = table.values('bar').sum()
 90 |     expect = 5
 91 |     eq_(expect, actual)
 92 |     
 93 |     actual = table.data().dict()
 94 |     expect = {'a': 1, 'b': 2, 'c': 2}
 95 |     eq_(expect, actual)
 96 |     
 97 |     
 98 | def test_empty():
 99 | 
100 |     actual = (
101 |         etl
102 |         .empty()
103 |         .addcolumn('foo', ['a', 'b', 'c'])
104 |         .addcolumn('bar', [1, 2, 2])
105 |     )
106 |     expect = (('foo', 'bar'),
107 |               ('a', 1),
108 |               ('b', 2),
109 |               ('c', 2))
110 |     ieq(expect, actual)
111 |     ieq(expect, actual)
112 | 
113 | 
114 | def test_wrap_tuple_return():
115 |     tablea = etl.wrap((('foo', 'bar'),
116 |                        ('A', 1),
117 |                        ('C', 7)))
118 |     tableb = etl.wrap((('foo', 'bar'),
119 |                        ('B', 5),
120 |                        ('C', 7)))
121 | 
122 |     added, removed = tablea.diff(tableb)
123 |     eq_(('foo', 'bar'), added.header())
124 |     eq_(('foo', 'bar'), removed.header())
125 |     ieq(etl.data(added), added.data())
126 |     ieq(etl.data(removed), removed.data())
127 | 


--------------------------------------------------------------------------------
/petl/test/test_helpers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, print_function, division
 3 | 
 4 | import pytest
 5 | 
 6 | from petl.test.helpers import eq_, ieq, get_env_vars_named
 7 | 
 8 | GET_ENV_PREFIX = "PETL_TEST_HELPER_ENVVAR_"
 9 | 
10 | 
11 | def _testcase_get_env_vars_named(num_vals, prefix=""):
12 |     res = {}
13 |     for i in range(1, num_vals, 1):
14 |         reskey = prefix + str(i)
15 |         res[reskey] = str(i)
16 |     return res
17 | 
18 | 
19 | @pytest.fixture()
20 | def setup_helpers_get_env_vars_named(monkeypatch):
21 |     varlist = _testcase_get_env_vars_named(3, prefix=GET_ENV_PREFIX)
22 |     for k, v in varlist.items():
23 |         monkeypatch.setenv(k, v)
24 | 
25 | 
26 | def test_helper_get_env_vars_named_prefixed(setup_helpers_get_env_vars_named):
27 |     expected = _testcase_get_env_vars_named(3, GET_ENV_PREFIX)
28 |     found = get_env_vars_named(GET_ENV_PREFIX, remove_prefix=False)
29 |     ieq(found, expected)
30 | 
31 | 
32 | def test_helper_get_env_vars_named_unprefixed(setup_helpers_get_env_vars_named):
33 |     expected = _testcase_get_env_vars_named(3)
34 |     found = get_env_vars_named(GET_ENV_PREFIX, remove_prefix=True)
35 |     ieq(found, expected)
36 | 
37 | 
38 | def test_helper_get_env_vars_named_not_found(setup_helpers_get_env_vars_named):
39 |     expected = None
40 |     found = get_env_vars_named("PETL_TEST_HELPER_ENVVAR_NOT_FOUND_")
41 |     eq_(found, expected)
42 | 


--------------------------------------------------------------------------------
/petl/test/test_interactive.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | import petl as etl
 5 | from petl.test.helpers import eq_
 6 | 
 7 | 
 8 | def test_repr():
 9 |     table = (('foo', 'bar'),
10 |              ('a', 1),
11 |              ('b', 2),
12 |              ('c', 2))
13 |     expect = str(etl.look(table))
14 |     actual = repr(etl.wrap(table))
15 |     eq_(expect, actual)
16 | 
17 | 
18 | def test_str():
19 |     table = (('foo', 'bar'),
20 |              ('a', 1),
21 |              ('b', 2),
22 |              ('c', 2))
23 |     expect = str(etl.look(table, vrepr=str))
24 |     actual = str(etl.wrap(table))
25 |     eq_(expect, actual)
26 | 
27 | 
28 | def test_repr_html():
29 |     table = (('foo', 'bar'),
30 |              ('a', 1),
31 |              ('b', 2),
32 |              ('c', 2))
33 |     expect = """<table class='petl'>
34 | <thead>
35 | <tr>
36 | <th>foo</th>
37 | <th>bar</th>
38 | </tr>
39 | </thead>
40 | <tbody>
41 | <tr>
42 | <td>a</td>
43 | <td style='text-align: right'>1</td>
44 | </tr>
45 | <tr>
46 | <td>b</td>
47 | <td style='text-align: right'>2</td>
48 | </tr>
49 | <tr>
50 | <td>c</td>
51 | <td style='text-align: right'>2</td>
52 | </tr>
53 | </tbody>
54 | </table>
55 | """
56 |     actual = etl.wrap(table)._repr_html_()
57 |     for l1, l2 in zip(expect.split('\n'), actual.split('\n')):
58 |         eq_(l1, l2)
59 | 
60 | 
61 | def test_repr_html_limit():
62 |     table = (('foo', 'bar'),
63 |              ('a', 1),
64 |              ('b', 2),
65 |              ('c', 2))
66 | 
67 |     # lower limit
68 |     etl.config.display_limit = 2
69 | 
70 |     expect = """<table class='petl'>
71 | <thead>
72 | <tr>
73 | <th>foo</th>
74 | <th>bar</th>
75 | </tr>
76 | </thead>
77 | <tbody>
78 | <tr>
79 | <td>a</td>
80 | <td style='text-align: right'>1</td>
81 | </tr>
82 | <tr>
83 | <td>b</td>
84 | <td style='text-align: right'>2</td>
85 | </tr>
86 | </tbody>
87 | </table>
88 | <p><strong>...</strong></p>
89 | """
90 |     actual = etl.wrap(table)._repr_html_()
91 |     print(actual)
92 |     for l1, l2 in zip(expect.split('\n'), actual.split('\n')):
93 |         eq_(l1, l2)
94 | 


--------------------------------------------------------------------------------
/petl/test/transform/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, print_function, division
2 | 


--------------------------------------------------------------------------------
/petl/test/transform/test_fills.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | 
  3 | 
  4 | from petl.test.helpers import ieq
  5 | from petl.transform.fills import filldown, fillleft, fillright
  6 | 
  7 | 
  8 | def test_filldown():
  9 | 
 10 |     table = (('foo', 'bar', 'baz'),
 11 |              (1, 'a', None),
 12 |              (1, None, .23),
 13 |              (1, 'b', None),
 14 |              (2, None, None),
 15 |              (2, None, .56),
 16 |              (2, 'c', None),
 17 |              (None, 'c', .72))
 18 | 
 19 |     actual = filldown(table)
 20 |     expect = (('foo', 'bar', 'baz'),
 21 |               (1, 'a', None),
 22 |               (1, 'a', .23),
 23 |               (1, 'b', .23),
 24 |               (2, 'b', .23),
 25 |               (2, 'b', .56),
 26 |               (2, 'c', .56),
 27 |               (2, 'c', .72))
 28 |     ieq(expect, actual)
 29 |     ieq(expect, actual)
 30 | 
 31 |     actual = filldown(table, 'bar')
 32 |     expect = (('foo', 'bar', 'baz'),
 33 |               (1, 'a', None),
 34 |               (1, 'a', .23),
 35 |               (1, 'b', None),
 36 |               (2, 'b', None),
 37 |               (2, 'b', .56),
 38 |               (2, 'c', None),
 39 |               (None, 'c', .72))
 40 |     ieq(expect, actual)
 41 |     ieq(expect, actual)
 42 | 
 43 |     actual = filldown(table, 'foo', 'bar')
 44 |     expect = (('foo', 'bar', 'baz'),
 45 |               (1, 'a', None),
 46 |               (1, 'a', .23),
 47 |               (1, 'b', None),
 48 |               (2, 'b', None),
 49 |               (2, 'b', .56),
 50 |               (2, 'c', None),
 51 |               (2, 'c', .72))
 52 |     ieq(expect, actual)
 53 |     ieq(expect, actual)
 54 | 
 55 | 
 56 | def test_filldown_headerless():
 57 |     table = []
 58 |     actual = filldown(table, 'foo')
 59 |     expect = []
 60 |     ieq(expect, actual)
 61 | 
 62 | 
 63 | def test_fillright():
 64 | 
 65 |     table = (('foo', 'bar', 'baz'),
 66 |              (1, 'a', None),
 67 |              (1, None, .23),
 68 |              (1, 'b', None),
 69 |              (2, None, None),
 70 |              (2, None, .56),
 71 |              (2, 'c', None),
 72 |              (None, 'c', .72))
 73 | 
 74 |     actual = fillright(table)
 75 |     expect = (('foo', 'bar', 'baz'),
 76 |               (1, 'a', 'a'),
 77 |               (1, 1, .23),
 78 |               (1, 'b', 'b'),
 79 |               (2, 2, 2),
 80 |               (2, 2, .56),
 81 |               (2, 'c', 'c'),
 82 |               (None, 'c', .72))
 83 |     ieq(expect, actual)
 84 |     ieq(expect, actual)
 85 | 
 86 | 
 87 | def test_fillright_headerless():
 88 |     table = []
 89 |     actual = fillright(table, 'foo')
 90 |     expect = []
 91 |     ieq(expect, actual)
 92 | 
 93 | 
 94 | def test_fillleft():
 95 | 
 96 |     table = (('foo', 'bar', 'baz'),
 97 |              (1, 'a', None),
 98 |              (1, None, .23),
 99 |              (1, 'b', None),
100 |              (2, None, None),
101 |              (None, None, .56),
102 |              (2, 'c', None),
103 |              (None, 'c', .72))
104 | 
105 |     actual = fillleft(table)
106 |     expect = (('foo', 'bar', 'baz'),
107 |               (1, 'a', None),
108 |               (1, .23, .23),
109 |               (1, 'b', None),
110 |               (2, None, None),
111 |               (.56, .56, .56),
112 |               (2, 'c', None),
113 |               ('c', 'c', .72))
114 |     ieq(expect, actual)
115 |     ieq(expect, actual)
116 | 
117 | 
118 | def test_fillleft_headerless():
119 |     table = []
120 |     actual = fillleft(table, 'foo')
121 |     expect = []
122 |     ieq(expect, actual)
123 | 


--------------------------------------------------------------------------------
/petl/test/transform/test_validation.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import, print_function, division
  3 | 
  4 | 
  5 | import logging
  6 | 
  7 | import pytest
  8 | 
  9 | import petl as etl
 10 | from petl.transform.validation import validate
 11 | from petl.test.helpers import ieq
 12 | from petl.errors import FieldSelectionError
 13 | 
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | debug = logger.debug
 17 | 
 18 | 
 19 | def test_constraints():
 20 | 
 21 |     constraints = [
 22 |         dict(name='C1', field='foo', test=int),
 23 |         dict(name='C2', field='bar', test=etl.dateparser('%Y-%m-%d')),
 24 |         dict(name='C3', field='baz', assertion=lambda v: v in ['Y', 'N']),
 25 |         dict(name='C4', assertion=lambda row: None not in row)
 26 |     ]
 27 | 
 28 |     table = (('foo', 'bar', 'baz'),
 29 |              (1, '2000-01-01', 'Y'),
 30 |              ('x', '2010-10-10', 'N'),
 31 |              (2, '2000/01/01', 'Y'),
 32 |              (3, '2015-12-12', 'x'),
 33 |              (4, None, 'N'),
 34 |              ('y', '1999-99-99', 'z'))
 35 | 
 36 |     expect = (('name', 'row', 'field', 'value', 'error'),
 37 |               ('C1', 2, 'foo', 'x', 'ValueError'),
 38 |               ('C2', 3, 'bar', '2000/01/01', 'ValueError'),
 39 |               ('C3', 4, 'baz', 'x', 'AssertionError'),
 40 |               ('C2', 5, 'bar', None, 'AttributeError'),
 41 |               ('C4', 5, None, None, 'AssertionError'),
 42 |               ('C1', 6, 'foo', 'y', 'ValueError'),
 43 |               ('C2', 6, 'bar', '1999-99-99', 'ValueError'),
 44 |               ('C3', 6, 'baz', 'z', 'AssertionError'))
 45 | 
 46 |     actual = validate(table, constraints)
 47 |     debug(actual)
 48 | 
 49 |     ieq(expect, actual)
 50 |     ieq(expect, actual)
 51 | 
 52 | 
 53 | def test_non_optional_constraint_with_missing_field():
 54 |     constraints = [
 55 |         dict(name='C1', field='foo', test=int),
 56 |     ]
 57 | 
 58 |     table = (('bar', 'baz'),
 59 |              ('1999-99-99', 'z'))
 60 | 
 61 |     actual = validate(table, constraints)
 62 |     with pytest.raises(FieldSelectionError):
 63 |         debug(actual)
 64 | 
 65 | 
 66 | def test_optional_constraint_with_missing_field():
 67 |     constraints = [
 68 |         dict(name='C1', field='foo', test=int, optional=True),
 69 |     ]
 70 | 
 71 |     table = (('bar', 'baz'),
 72 |              ('1999-99-99', 'z'))
 73 | 
 74 |     expect = (('name', 'row', 'field', 'value', 'error'),)
 75 | 
 76 |     actual = validate(table, constraints)
 77 |     debug(actual)
 78 | 
 79 |     ieq(expect, actual)
 80 | 
 81 | 
 82 | def test_row_length():
 83 | 
 84 |     table = (('foo', 'bar', 'baz'),
 85 |              (1, '2000-01-01', 'Y'),
 86 |              ('x', '2010-10-10'),
 87 |              (2, '2000/01/01', 'Y', True))
 88 | 
 89 |     expect = (('name', 'row', 'field', 'value', 'error'),
 90 |               ('__len__', 2, None, 2, 'AssertionError'),
 91 |               ('__len__', 3, None, 4, 'AssertionError'))
 92 | 
 93 |     actual = validate(table)
 94 |     debug(actual)
 95 | 
 96 |     ieq(expect, actual)
 97 |     ieq(expect, actual)
 98 | 
 99 | 
100 | def test_header():
101 | 
102 |     header = ('foo', 'bar', 'baz')
103 | 
104 |     table = (('foo', 'bar', 'bazzz'),
105 |              (1, '2000-01-01', 'Y'),
106 |              ('x', '2010-10-10', 'N'))
107 | 
108 |     expect = (('name', 'row', 'field', 'value', 'error'),
109 |               ('__header__', 0, None, None, 'AssertionError'))
110 | 
111 |     actual = validate(table, header=header)
112 |     debug(actual)
113 | 
114 |     ieq(expect, actual)
115 |     ieq(expect, actual)
116 | 
117 |     header = ('foo', 'bar', 'baz', 'quux')
118 | 
119 |     table = (('foo', 'bar', 'baz'),
120 |              (1, '2000-01-01', 'Y'),
121 |              ('x', '2010-10-10', 'N'))
122 | 
123 |     expect = (('name', 'row', 'field', 'value', 'error'),
124 |               ('__header__', 0, None, None, 'AssertionError'),
125 |               ('__len__', 1, None, 3, 'AssertionError'),
126 |               ('__len__', 2, None, 3, 'AssertionError'))
127 | 
128 |     actual = validate(table, header=header)
129 |     debug(actual)
130 | 
131 |     ieq(expect, actual)
132 |     ieq(expect, actual)
133 | 
134 | 
135 | def test_validation_headerless():
136 |     header = ('foo', 'bar', 'baz')
137 |     table = []
138 |     # Expect only a missing header - no exceptions please
139 |     expect = (('name', 'row', 'field', 'value', 'error'),
140 |               ('__header__', 0, None, None, 'AssertionError'))
141 |     actual = validate(table, header=header)
142 |     ieq(expect, actual)
143 |     ieq(expect, actual)
144 | 


--------------------------------------------------------------------------------
/petl/test/util/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, print_function, division


--------------------------------------------------------------------------------
/petl/test/util/test_materialise.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | import pytest
 4 | 
 5 | from petl.errors import FieldSelectionError
 6 | from petl.test.helpers import eq_
 7 | from petl.util.materialise import columns, facetcolumns
 8 | 
 9 | 
10 | def test_columns():
11 | 
12 |     table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]]
13 |     cols = columns(table)
14 |     eq_(['a', 'b', 'b'], cols['foo'])
15 |     eq_([1, 2, 3], cols['bar'])
16 | 
17 | 
18 | def test_columns_empty():
19 |     table = [('foo', 'bar')]
20 |     cols = columns(table)
21 |     eq_([], cols['foo'])
22 |     eq_([], cols['bar'])
23 | 
24 | 
25 | def test_columns_headerless():
26 |     table = []
27 |     cols = columns(table)
28 |     eq_({}, cols)
29 | 
30 | 
31 | def test_facetcolumns():
32 | 
33 |     table = [['foo', 'bar', 'baz'],
34 |              ['a', 1, True],
35 |              ['b', 2, True],
36 |              ['b', 3]]
37 | 
38 |     fc = facetcolumns(table, 'foo')
39 |     eq_(['a'], fc['a']['foo'])
40 |     eq_([1], fc['a']['bar'])
41 |     eq_([True], fc['a']['baz'])
42 |     eq_(['b', 'b'], fc['b']['foo'])
43 |     eq_([2, 3], fc['b']['bar'])
44 |     eq_([True, None], fc['b']['baz'])
45 | 
46 | 
47 | def test_facetcolumns_headerless():
48 |     table = []
49 |     with pytest.raises(FieldSelectionError):
50 |         facetcolumns(table, 'foo')
51 | 


--------------------------------------------------------------------------------
/petl/test/util/test_misc.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | from petl.test.helpers import eq_
 5 | from petl.compat import PY2
 6 | from petl.util.misc import typeset, diffvalues, diffheaders
 7 | 
 8 | 
 9 | def test_typeset():
10 | 
11 |     table = (('foo', 'bar', 'baz'),
12 |              (b'A', 1, u'2'),
13 |              (b'B', '2', u'3.4'),
14 |              (b'B', '3', u'7.8', True),
15 |              (u'D', u'xyz', 9.0),
16 |              (b'E', 42))
17 | 
18 |     actual = typeset(table, 'foo')
19 |     if PY2:
20 |         expect = {'str', 'unicode'}
21 |     else:
22 |         expect = {'bytes', 'str'}
23 |     eq_(expect, actual)
24 | 
25 | 
26 | def test_diffheaders():
27 | 
28 |     table1 = (('foo', 'bar', 'baz'),
29 |               ('a', 1, .3))
30 | 
31 |     table2 = (('baz', 'bar', 'quux'),
32 |               ('a', 1, .3))
33 | 
34 |     add, sub = diffheaders(table1, table2)
35 |     eq_({'quux'}, add)
36 |     eq_({'foo'}, sub)
37 | 
38 | 
39 | def test_diffvalues():
40 | 
41 |     table1 = (('foo', 'bar'),
42 |               ('a', 1),
43 |               ('b', 3))
44 | 
45 |     table2 = (('bar', 'foo'),
46 |               (1, 'a'),
47 |               (3, 'c'))
48 | 
49 |     add, sub = diffvalues(table1, table2, 'foo')
50 |     eq_({'c'}, add)
51 |     eq_({'b'}, sub)
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/petl/test/util/test_parsers.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | from petl.compat import maxint
 5 | from petl.test.helpers import eq_
 6 | from petl.util.parsers import numparser, datetimeparser
 7 | 
 8 | 
 9 | def test_numparser():
10 | 
11 |     parsenumber = numparser()
12 |     assert parsenumber('1') == 1
13 |     assert parsenumber('1.0') == 1.0
14 |     assert parsenumber(str(maxint + 1)) == maxint + 1
15 |     assert parsenumber('3+4j') == 3 + 4j
16 |     assert parsenumber('aaa') == 'aaa'
17 |     assert parsenumber(None) is None
18 | 
19 | 
20 | def test_numparser_strict():
21 | 
22 |     parsenumber = numparser(strict=True)
23 |     assert parsenumber('1') == 1
24 |     assert parsenumber('1.0') == 1.0
25 |     assert parsenumber(str(maxint + 1)) == maxint + 1
26 |     assert parsenumber('3+4j') == 3 + 4j
27 |     try:
28 |         parsenumber('aaa')
29 |     except ValueError:
30 |         pass  # expected
31 |     else:
32 |         assert False, 'expected exception'
33 |     try:
34 |         parsenumber(None)
35 |     except TypeError:
36 |         pass  # expected
37 |     else:
38 |         assert False, 'expected exception'
39 | 
40 | 
41 | def test_laxparsers():
42 | 
43 |     p1 = datetimeparser('%Y-%m-%dT%H:%M:%S')
44 |     try:
45 |         p1('2002-12-25 00:00:00')
46 |     except ValueError:
47 |         pass
48 |     else:
49 |         assert False, 'expected exception'
50 | 
51 |     p2 = datetimeparser('%Y-%m-%dT%H:%M:%S', strict=False)
52 |     try:
53 |         v = p2('2002-12-25 00:00:00')
54 |     except ValueError:
55 |         assert False, 'did not expect exception'
56 |     else:
57 |         eq_('2002-12-25 00:00:00', v)
58 | 


--------------------------------------------------------------------------------
/petl/test/util/test_random.py:
--------------------------------------------------------------------------------
 1 | import random as pyrandom
 2 | import time
 3 | from functools import partial
 4 | 
 5 | from petl.util.random import randomseed, randomtable, RandomTable, dummytable, DummyTable
 6 | 
 7 | 
 8 | def test_randomseed():
 9 |     """
10 |     Ensure that randomseed provides a non-empty string that changes.
11 |     """
12 |     seed_1 = randomseed()
13 |     time.sleep(1)
14 |     seed_2 = randomseed()
15 | 
16 |     assert isinstance(seed_1, str)
17 |     assert seed_1 != ""
18 |     assert seed_1 != seed_2
19 | 
20 | 
21 | def test_randomtable():
22 |     """
23 |     Ensure that randomtable provides a table with the right number of rows and columns.
24 |     """
25 |     columns, rows = 3, 10
26 |     table = randomtable(columns, rows)
27 | 
28 |     assert len(table[0]) == columns
29 |     assert len(table) == rows + 1
30 | 
31 | 
32 | def test_randomtable_class():
33 |     """
34 |     Ensure that RandomTable provides a table with the right number of rows and columns.
35 |     """
36 |     columns, rows = 4, 60
37 |     table = RandomTable(numflds=columns, numrows=rows)
38 | 
39 |     assert len(table[0]) == columns
40 |     assert len(table) == rows + 1
41 | 
42 | 
43 | def test_dummytable_custom_fields():
44 |     """
45 |     Ensure that dummytable provides a table with the right number of rows
46 |     and that it accepts and uses custom column names provided.
47 |     """
48 |     columns = (
49 |         ('count', partial(pyrandom.randint, 0, 100)),
50 |         ('pet', partial(pyrandom.choice, ['dog', 'cat', 'cow', ])),
51 |         ('color', partial(pyrandom.choice, ['yellow', 'orange', 'brown'])),
52 |         ('value', pyrandom.random),
53 |     )
54 |     rows = 35
55 | 
56 |     table = dummytable(numrows=rows, fields=columns)
57 |     assert table[0] == ('count', 'pet', 'color', 'value')
58 |     assert len(table) == rows + 1
59 | 
60 | 
61 | def test_dummytable_no_seed():
62 |     """
63 |     Ensure that dummytable provides a table with the right number of rows
64 |     and columns when not provided with a seed.
65 |     """
66 |     rows = 35
67 | 
68 |     table = dummytable(numrows=rows)
69 |     assert len(table[0]) == 3
70 |     assert len(table) == rows + 1
71 | 
72 | 
73 | def test_dummytable_int_seed():
74 |     """
75 |     Ensure that dummytable provides a table with the right number of rows
76 |     and columns when provided with an integer as a seed.
77 |     """
78 |     rows = 35
79 |     seed = 42
80 |     table = dummytable(numrows=rows, seed=seed)
81 |     assert len(table[0]) == 3
82 |     assert len(table) == rows + 1
83 | 
84 | 
85 | def test_dummytable_class():
86 |     """
87 |     Ensure that DummyTable provides a table with the right number of rows
88 |     and columns.
89 |     """
90 |     rows = 70
91 |     table = DummyTable(numrows=rows)
92 | 
93 |     assert len(table) == rows + 1
94 | 


--------------------------------------------------------------------------------
/petl/test/util/test_statistics.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | from petl.test.helpers import eq_
 5 | from petl.util.statistics import stats
 6 | 
 7 | 
 8 | def test_stats():
 9 | 
10 |     table = (('foo', 'bar', 'baz'),
11 |              ('A', 1, 2),
12 |              ('B', '2', '3.4'),
13 |              ('B', '3', '7.8', True),
14 |              ('D', 'xyz', 9.0),
15 |              ('E', None))
16 | 
17 |     result = stats(table, 'bar')
18 |     eq_(1.0, result.min)
19 |     eq_(3.0, result.max)
20 |     eq_(6.0, result.sum)
21 |     eq_(3, result.count)
22 |     eq_(2, result.errors)
23 |     eq_(2.0, result.mean)
24 |     eq_(2/3, result.pvariance)
25 |     eq_((2/3)**.5, result.pstdev)
26 | 


--------------------------------------------------------------------------------
/petl/test/util/test_timing.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | from petl.util.counting import nrows
 5 | from petl.util.timing import progress, log_progress
 6 | 
 7 | 
 8 | def test_progress():
 9 |     # make sure progress doesn't raise exception
10 |     table = (('foo', 'bar', 'baz'),
11 |              ('a', 1, True),
12 |              ('b', 2, True),
13 |              ('b', 3))
14 |     nrows(progress(table))
15 | 
16 | def test_log_progress():
17 |     # make sure log_progress doesn't raise exception
18 |     table = (('foo', 'bar', 'baz'),
19 |              ('a', 1, True),
20 |              ('b', 2, True),
21 |              ('b', 3))
22 |     nrows(log_progress(table))
23 | 


--------------------------------------------------------------------------------
/petl/transform/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | from petl.transform.basics import cut, cutout, movefield, cat, annex, \
 4 |     addfield, addfieldusingcontext, addrownumbers, addcolumn, rowslice, head, \
 5 |     tail, skipcomments, stack, addfields
 6 | 
 7 | from petl.transform.headers import rename, setheader, extendheader, \
 8 |     pushheader, skip, prefixheader, suffixheader, sortheader
 9 | 
10 | from petl.transform.conversions import convert, convertall, replace, \
11 |     replaceall, update, convertnumbers, format, formatall, interpolate, \
12 |     interpolateall
13 | 
14 | from petl.transform.sorts import sort, mergesort, issorted
15 | 
16 | from petl.transform.selects import select, selectop, selectcontains, \
17 |     selecteq, selectfalse, selectge, selectgt, selectin, selectis, \
18 |     selectisinstance, selectisnot, selectle, selectlt, selectne, selectnone, \
19 |     selectnotin, selectnotnone, selectrangeclosed, selectrangeopen, \
20 |     selectrangeopenleft, selectrangeopenright, selecttrue, \
21 |     selectusingcontext, rowlenselect, facet, biselect
22 | 
23 | from petl.transform.joins import join, leftjoin, rightjoin, outerjoin, \
24 |     crossjoin, antijoin, lookupjoin, unjoin
25 | 
26 | from petl.transform.hashjoins import hashjoin, hashleftjoin, hashrightjoin, \
27 |     hashantijoin, hashlookupjoin
28 | 
29 | from petl.transform.reductions import rowreduce, mergeduplicates,\
30 |     aggregate, groupcountdistinctvalues, groupselectfirst, groupselectmax, \
31 |     groupselectmin, merge, fold, Conflict, groupselectlast
32 | 
33 | from petl.transform.fills import filldown, fillright, fillleft
34 | 
35 | from petl.transform.regex import capture, split, search, searchcomplement, \
36 |     sub, splitdown
37 | 
38 | from petl.transform.reshape import melt, recast, transpose, pivot, flatten, \
39 |     unflatten
40 | 
41 | from petl.transform.maps import fieldmap, rowmap, rowmapmany, rowgroupmap
42 | 
43 | from petl.transform.unpacks import unpack, unpackdict
44 | 
45 | from petl.transform.dedup import duplicates, unique, distinct, conflicts, \
46 |     isunique
47 | 
48 | from petl.transform.setops import complement, intersection, \
49 |     recordcomplement, diff, recorddiff, hashintersection, hashcomplement
50 | 
51 | from petl.transform.intervals import intervaljoin, intervalleftjoin, \
52 |     intervaljoinvalues, intervalantijoin, intervallookup, intervallookupone, \
53 |     intervalrecordlookup, intervalrecordlookupone, intervalsubtract, \
54 |     facetintervallookup, facetintervallookupone, facetintervalrecordlookup, \
55 |     facetintervalrecordlookupone, collapsedintervals
56 | 
57 | from petl.transform.validation import validate
58 | 


--------------------------------------------------------------------------------
/petl/util/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | from petl.util.base import Table, Record, values, header, data, \
 5 |     fieldnames, records, dicts, namedtuples, expr, rowgroupby, empty, wrap
 6 | 
 7 | from petl.util.lookups import lookup, lookupone, dictlookup, dictlookupone, \
 8 |     recordlookup, recordlookupone
 9 | 
10 | from petl.util.parsers import dateparser, timeparser, datetimeparser, \
11 |     numparser, boolparser
12 | 
13 | from petl.util.vis import look, lookall, lookstr, lookallstr, see
14 | 
15 | from petl.util.random import randomtable, dummytable
16 | 
17 | from petl.util.counting import parsecounter, parsecounts, typecounter, \
18 |     typecounts, valuecount, valuecounter, valuecounts, stringpatterncounter, \
19 |     stringpatterns, rowlengths, nrows
20 | 
21 | from petl.util.materialise import listoflists, listoftuples, tupleoflists, \
22 |     tupleoftuples, columns, facetcolumns
23 | 
24 | from petl.util.timing import progress, log_progress, clock
25 | 
26 | from petl.util.statistics import limits, stats
27 | 
28 | from petl.util.misc import typeset, diffheaders, diffvalues, nthword, strjoin, \
29 |     coalesce
30 | 


--------------------------------------------------------------------------------
/petl/util/misc.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | 
  3 | 
  4 | from petl.util.base import values, header, Table
  5 | 
  6 | 
  7 | def typeset(table, field):
  8 |     """
  9 |     Return a set containing all Python types found for values in the given
 10 |     field. E.g.::
 11 | 
 12 |         >>> import petl as etl
 13 |         >>> table = [['foo', 'bar', 'baz'],
 14 |         ...          ['A', 1, '2'],
 15 |         ...          ['B', u'2', '3.4'],
 16 |         ...          [u'B', u'3', '7.8', True],
 17 |         ...          ['D', u'xyz', 9.0],
 18 |         ...          ['E', 42]]
 19 |         >>> sorted(etl.typeset(table, 'foo'))
 20 |         ['str']
 21 |         >>> sorted(etl.typeset(table, 'bar'))
 22 |         ['int', 'str']
 23 |         >>> sorted(etl.typeset(table, 'baz'))
 24 |         ['NoneType', 'float', 'str']
 25 | 
 26 |     The `field` argument can be a field name or index (starting from zero).
 27 | 
 28 |     """
 29 | 
 30 |     s = set()
 31 |     for v in values(table, field):
 32 |         try:
 33 |             s.add(type(v).__name__)
 34 |         except IndexError:
 35 |             pass  # ignore short rows
 36 |     return s
 37 | 
 38 | 
 39 | Table.typeset = typeset
 40 | 
 41 | 
 42 | def diffheaders(t1, t2):
 43 |     """
 44 |     Return the difference between the headers of the two tables as a pair of
 45 |     sets. E.g.::
 46 | 
 47 |         >>> import petl as etl
 48 |         >>> table1 = [['foo', 'bar', 'baz'],
 49 |         ...           ['a', 1, .3]]
 50 |         >>> table2 = [['baz', 'bar', 'quux'],
 51 |         ...           ['a', 1, .3]]
 52 |         >>> add, sub = etl.diffheaders(table1, table2)
 53 |         >>> add
 54 |         {'quux'}
 55 |         >>> sub
 56 |         {'foo'}
 57 | 
 58 |     """
 59 | 
 60 |     t1h = set(header(t1))
 61 |     t2h = set(header(t2))
 62 |     return t2h - t1h, t1h - t2h
 63 | 
 64 | 
 65 | Table.diffheaders = diffheaders
 66 | 
 67 | 
 68 | def diffvalues(t1, t2, f):
 69 |     """
 70 |     Return the difference between the values under the given field in the two
 71 |     tables, e.g.::
 72 | 
 73 |         >>> import petl as etl
 74 |         >>> table1 = [['foo', 'bar'],
 75 |         ...           ['a', 1],
 76 |         ...           ['b', 3]]
 77 |         >>> table2 = [['bar', 'foo'],
 78 |         ...           [1, 'a'],
 79 |         ...           [3, 'c']]
 80 |         >>> add, sub = etl.diffvalues(table1, table2, 'foo')
 81 |         >>> add
 82 |         {'c'}
 83 |         >>> sub
 84 |         {'b'}
 85 | 
 86 |     """
 87 | 
 88 |     t1v = set(values(t1, f))
 89 |     t2v = set(values(t2, f))
 90 |     return t2v - t1v, t1v - t2v
 91 | 
 92 | 
 93 | Table.diffvalues = diffvalues
 94 | 
 95 | 
 96 | def strjoin(s):
 97 |     """
 98 |     Return a function to join sequences using `s` as the separator. Intended
 99 |     for use with :func:`petl.transform.conversions.convert`.
100 | 
101 |     """
102 | 
103 |     return lambda l: s.join(map(str, l))
104 | 
105 | 
106 | def nthword(n, sep=None):
107 |     """
108 |     Construct a function to return the nth word in a string. E.g.::
109 | 
110 |         >>> import petl as etl
111 |         >>> s = 'foo bar'
112 |         >>> f = etl.nthword(0)
113 |         >>> f(s)
114 |         'foo'
115 |         >>> g = etl.nthword(1)
116 |         >>> g(s)
117 |         'bar'
118 | 
119 |     Intended for use with :func:`petl.transform.conversions.convert`.
120 | 
121 |     """
122 | 
123 |     return lambda s: s.split(sep)[n]
124 | 
125 | 
126 | def coalesce(*fields, **kwargs):
127 |     """
128 |     Return a function which accepts a row and returns the first non-missing
129 |     value from the specified fields. Intended for use with
130 |     :func:`petl.transform.basics.addfield`.
131 | 
132 |     """
133 |     missing = kwargs.get('missing', None)
134 |     default = kwargs.get('default', None)
135 | 
136 |     def _coalesce(row):
137 |         for f in fields:
138 |             v = row[f]
139 |             if v is not missing:
140 |                 return v
141 |         return default
142 | 
143 |     return _coalesce
144 | 


--------------------------------------------------------------------------------
/petl/util/statistics.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function, division
 2 | 
 3 | 
 4 | from collections import namedtuple
 5 | 
 6 | 
 7 | from petl.util.base import values, Table
 8 | 
 9 | 
10 | def limits(table, field):
11 |     """
12 |     Find minimum and maximum values under the given field. E.g.::
13 | 
14 |         >>> import petl as etl
15 |         >>> table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]]
16 |         >>> minv, maxv = etl.limits(table, 'bar')
17 |         >>> minv
18 |         1
19 |         >>> maxv
20 |         3
21 | 
22 |     The `field` argument can be a field name or index (starting from zero).
23 | 
24 |     """
25 | 
26 |     vals = iter(values(table, field))
27 |     try:
28 |         minv = maxv = next(vals)
29 |     except StopIteration:
30 |         return None, None
31 |     else:
32 |         for v in vals:
33 |             if v < minv:
34 |                 minv = v
35 |             if v > maxv:
36 |                 maxv = v
37 |         return minv, maxv
38 | 
39 | 
40 | Table.limits = limits
41 | 
42 | 
43 | _stats = namedtuple('stats', ('count', 'errors', 'sum', 'min', 'max', 'mean',
44 |                               'pvariance', 'pstdev'))
45 | 
46 | 
47 | def stats(table, field):
48 |     """
49 |     Calculate basic descriptive statistics on a given field. E.g.::
50 | 
51 |         >>> import petl as etl
52 |         >>> table = [['foo', 'bar', 'baz'],
53 |         ...          ['A', 1, 2],
54 |         ...          ['B', '2', '3.4'],
55 |         ...          [u'B', u'3', u'7.8', True],
56 |         ...          ['D', 'xyz', 9.0],
57 |         ...          ['E', None]]
58 |         >>> etl.stats(table, 'bar')
59 |         stats(count=3, errors=2, sum=6.0, min=1.0, max=3.0, mean=2.0, pvariance=0.6666666666666666, pstdev=0.816496580927726)
60 | 
61 |     The `field` argument can be a field name or index (starting from zero).
62 | 
63 |     """
64 | 
65 |     _min = None
66 |     _max = None
67 |     _sum = 0
68 |     _mean = 0
69 |     _var = 0
70 |     _count = 0
71 |     _errors = 0
72 |     for v in values(table, field):
73 |         try:
74 |             v = float(v)
75 |         except (ValueError, TypeError):
76 |             _errors += 1
77 |         else:
78 |             _count += 1
79 |             if _min is None or v < _min:
80 |                 _min = v
81 |             if _max is None or v > _max:
82 |                 _max = v
83 |             _sum += v
84 |             _mean, _var = onlinestats(v, _count, mean=_mean, variance=_var)
85 |     _std = _var**.5
86 |     return _stats(_count, _errors, _sum, _min, _max, _mean, _var, _std)
87 | 
88 | 
89 | Table.stats = stats
90 | 
91 | 
92 | def onlinestats(xi, n, mean=0, variance=0):
93 |     # function to calculate online mean and variance
94 |     meanprv = mean
95 |     varianceprv = variance
96 |     mean = (((n - 1)*meanprv) + xi)/n
97 |     variance = (((n - 1)*varianceprv) + ((xi - meanprv)*(xi - mean)))/n
98 |     return mean, variance
99 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "setuptools-scm", "wheel"]
3 | 
4 | [tool.bandit]
5 | exclude_dirs = ["bin", "docs"]
6 | 
7 | [tool.bandit.assert_used]
8 | skips = ["*/*_test.py", "*/test_*.py"]
9 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | log_level=DEBUG
3 | doctest_optionflags = NORMALIZE_WHITESPACE ALLOW_UNICODE
4 | addopts = --ignore-glob=*_py2.py --ignore-glob=petl/io/db.py
5 | 


--------------------------------------------------------------------------------
/requirements-database.txt:
--------------------------------------------------------------------------------
1 | # packages required for testing petl with databases
2 | 
3 | cryptography 
4 | pymysql 
5 | SQLAlchemy>=1.3.6,<2.0
6 | psycopg2-binary
7 | # PyMySQL==0.9.3
8 | 


--------------------------------------------------------------------------------
/requirements-docs.txt:
--------------------------------------------------------------------------------
 1 | # Used in tox.ini @ py3x-docs
 2 | 
 3 | sphinx
 4 | sphinx-issues
 5 | rinohtype
 6 | 
 7 | # Used for generating docs version from Git
 8 | 
 9 | setuptools
10 | setuptools-scm
11 | 


--------------------------------------------------------------------------------
/requirements-formats.txt:
--------------------------------------------------------------------------------
 1 | Cython
 2 | numpy
 3 | numexpr
 4 | intervaltree>=3.0.2
 5 | lxml>=4.6.5
 6 | openpyxl>=2.6.2
 7 | pandas
 8 | Whoosh>=2.7.4
 9 | xlrd>=2.0.1
10 | xlwt>=1.3.0
11 | fastavro>=0.24.2 ; python_version >= '3'
12 | fastavro==0.24.2 ; python_version < '3'
13 | gspread>=3.4.0 ; python_version >= '3'
14 | 
15 | # version 3.9.2 fails with python3.12 on macos-latest: PyTables/PyTables#1093
16 | tables ; sys_platform != 'darwin'
17 | 
18 | 


--------------------------------------------------------------------------------
/requirements-linting.txt:
--------------------------------------------------------------------------------
 1 | ## Used as main formatter/linter:
 2 | 
 3 | ruff >= 0.3
 4 | 
 5 | # Used in Github:
 6 | 
 7 | pylint >= 3.0.0
 8 | flake8 >= 7.0.0
 9 | black >= 24.0.0
10 | bandit[toml,sarif] >= 1.7.0
11 | 
12 | ## Suggestions:
13 | 
14 | # pre-commit
15 | 
16 | #? Obs: Should work with python >= 3.8
17 | 
18 | 


--------------------------------------------------------------------------------
/requirements-optional.txt:
--------------------------------------------------------------------------------
 1 | # Packages bellow need complex local setup #
 2 | # Also check: .github/workflows/test-changes.yml
 3 | 
 4 | # Throubleshooting: 
 5 | # 1.  $ export DISABLE_BLOSC_AVX2=1
 6 | 
 7 | # 2.1 $ brew install c-blosc              # On macOS
 8 | # 2.2 $ sudo apt-get install python3-dev  # On debian distros
 9 | # 2.3 $ sudo dnf install python3-devel    # On debian distros
10 | 
11 | # 3.1 $ sudo find / -iname "Python.h"
12 | # 3.2 $ export C_INCLUDE_PATH=/usr/include/python3.11/Python.h
13 | 
14 | 
15 | blosc  ; python_version >= '3.7' and python_version != '3.13'
16 | 
17 | # Throubleshooting: 
18 | # 1. $ pip install --prefer-binary -r requirements-optional.txt
19 | # 2. $ pip install --prefer-binary bcolz
20 | 
21 | bcolz  ; python_version >= '3.7' and python_version < '3.9.9'
22 | 


--------------------------------------------------------------------------------
/requirements-remote.txt:
--------------------------------------------------------------------------------
 1 | # packages for testing remote sources
 2 | 
 3 | fastavro>=0.24.2 ; python_version >= '3'
 4 | smbprotocol>=1.0.1
 5 | paramiko>=2.7.1
 6 | requests; python_version >= '3'
 7 | fsspec>=0.7.4 ; python_version >= '3'
 8 | aiohttp>=3.6.2 ; python_version >= '3.5.3' or ( python_version > '3.10' and python_version < '3.2' )
 9 | s3fs>=0.2.2 ; python_version >= '3'
10 | 


--------------------------------------------------------------------------------
/requirements-tests.txt:
--------------------------------------------------------------------------------
1 | wheel
2 | setuptools
3 | setuptools-scm
4 | pytest-cov>=2.12.0
5 | pytest>=4.6.6,<7.0.0
6 | tox
7 | coverage
8 | coveralls
9 | mock; python_version < '3.0'


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | from setuptools import find_packages, setup
 4 | 
 5 | setup(
 6 |     name='petl',
 7 |     author='Alistair Miles',
 8 |     author_email='alimanfoo@googlemail.com',
 9 |     maintainer="Juarez Rudsatz",
10 |     maintainer_email="juarezr@gmail.com",
11 |     package_dir={'': '.'},
12 |     packages=find_packages('.'),
13 |     scripts=['bin/petl'],
14 |     url='https://github.com/petl-developers/petl',
15 |     license='MIT License',
16 |     description='A Python package for extracting, transforming and loading '
17 |                 'tables of data.',
18 |     long_description=open('README.txt').read(),
19 |     python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*',
20 |     setup_requires=["setuptools>18.0", "setuptools-scm>1.5.4"],
21 |     extras_require={
22 |         'avro': ['fastavro>=0.24.0'],
23 |         'bcolz': ['bcolz>=1.2.1'],
24 |         'db': ['SQLAlchemy>=1.3.6,<2.0'],
25 |         'hdf5': ['cython>=0.29.13', 'numpy>=1.16.4', 'numexpr>=2.6.9', 
26 |                  'tables>=3.5.2'],
27 |         'http': ['aiohttp>=3.6.2', 'requests'],
28 |         'interval': ['intervaltree>=3.0.2'],
29 |         'numpy': ['numpy>=1.16.4'],
30 |         'pandas': ['pandas>=0.24.2'],
31 |         'remote': ['fsspec>=0.7.4'],
32 |         'smb': ['smbprotocol>=1.0.1'],
33 |         'xls': ['xlrd>=2.0.1', 'xlwt>=1.3.0'],
34 |         'xlsx': ['openpyxl>=2.6.2'],
35 |         'xpath': ['lxml>=4.4.0'],
36 |         'whoosh': ['whoosh'],
37 |     },
38 |     use_scm_version={
39 |         "version_scheme": "guess-next-dev",
40 |         "local_scheme": "dirty-tag",
41 |         "write_to": "petl/version.py",
42 |     },
43 |     classifiers=['Intended Audience :: Developers',
44 |                  'License :: OSI Approved :: MIT License',
45 |                  'Programming Language :: Python :: 2',
46 |                  'Programming Language :: Python :: 2.7',
47 |                  'Programming Language :: Python :: 3',
48 |                  'Programming Language :: Python :: 3.6',
49 |                  'Programming Language :: Python :: 3.7',
50 |                  'Programming Language :: Python :: 3.8',
51 |                  'Programming Language :: Python :: 3.9',
52 |                  'Programming Language :: Python :: 3.10',
53 |                  'Programming Language :: Python :: 3.11',
54 |                  'Programming Language :: Python :: 3.12',
55 |                  'Programming Language :: Python :: 3.13',
56 |                  'Topic :: Software Development :: Libraries :: Python Modules'
57 |                  ]
58 | )
59 | 


--------------------------------------------------------------------------------