├── .codesandbox
    ├── Dockerfile
    └── setup.sh
├── .coveragerc
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yml
    │   ├── feature_request.yml
    │   └── submit_question.yml
    └── workflows
    │   ├── ci.yml
    │   └── docs.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── datar
    ├── __init__.py
    ├── all.py
    ├── apis
    │   ├── __init__.py
    │   ├── base.py
    │   ├── dplyr.py
    │   ├── forcats.py
    │   ├── misc.py
    │   ├── tibble.py
    │   └── tidyr.py
    ├── base.py
    ├── core
    │   ├── __init__.py
    │   ├── defaults.py
    │   ├── load_plugins.py
    │   ├── names.py
    │   ├── operator.py
    │   ├── options.py
    │   ├── plugin.py
    │   └── utils.py
    ├── data
    │   ├── __init__.py
    │   ├── airlines.csv.gz
    │   ├── airports.csv.gz
    │   ├── airquality.csv.gz
    │   ├── anscombe.csv.gz
    │   ├── band_instruments.csv.gz
    │   ├── band_instruments2.csv.gz
    │   ├── band_members.csv.gz
    │   ├── billboard.csv.gz
    │   ├── chickweight.csv.gz
    │   ├── cms_patient_care.csv.gz
    │   ├── cms_patient_experience.csv.gz
    │   ├── construction.csv.gz
    │   ├── diamonds.csv.gz
    │   ├── economics.csv.gz
    │   ├── economics_long.csv.gz
    │   ├── faithful.csv.gz
    │   ├── faithfuld.csv.gz
    │   ├── fish_encounters.csv.gz
    │   ├── flights.csv.gz
    │   ├── gss_cat.csv.gz
    │   ├── household.csv.gz
    │   ├── iris.csv.gz
    │   ├── luv_colours.csv.gz
    │   ├── metadata.py
    │   ├── midwest.csv.gz
    │   ├── mpg.csv.gz
    │   ├── msleep.csv.gz
    │   ├── mtcars.csv.gz
    │   ├── planes.csv.gz
    │   ├── population.csv.gz
    │   ├── presidential.csv.gz
    │   ├── relig_income.csv.gz
    │   ├── seals.csv.gz
    │   ├── smiths.csv.gz
    │   ├── starwars.csv.gz
    │   ├── state_abb.csv.gz
    │   ├── state_division.csv.gz
    │   ├── state_region.csv.gz
    │   ├── storms.csv.gz
    │   ├── table1.csv.gz
    │   ├── table2.csv.gz
    │   ├── table3.csv.gz
    │   ├── table4a.csv.gz
    │   ├── table4b.csv.gz
    │   ├── table5.csv.gz
    │   ├── toothgrowth.csv.gz
    │   ├── txhousing.csv.gz
    │   ├── us_rent_income.csv.gz
    │   ├── warpbreaks.csv.gz
    │   ├── weather.csv.gz
    │   ├── who.csv.gz
    │   ├── who2.csv.gz
    │   └── world_bank_pop.csv.gz
    ├── datasets.py
    ├── dplyr.py
    ├── forcats.py
    ├── misc.py
    ├── tibble.py
    └── tidyr.py
├── docs
    ├── CHANGELOG.md
    ├── backends.md
    ├── data.md
    ├── f.md
    ├── favicon.png
    ├── func_factory.png
    ├── import.md
    ├── notebooks
    │   ├── across.ipynb
    │   ├── add_column.ipynb
    │   ├── add_row.ipynb
    │   ├── arrange.ipynb
    │   ├── base-arithmetic.ipynb
    │   ├── base-funs.ipynb
    │   ├── base.ipynb
    │   ├── between.ipynb
    │   ├── bind.ipynb
    │   ├── case_when.ipynb
    │   ├── chop.ipynb
    │   ├── coalesce.ipynb
    │   ├── complete.ipynb
    │   ├── context.ipynb
    │   ├── count.ipynb
    │   ├── cumall.ipynb
    │   ├── desc.ipynb
    │   ├── distinct.ipynb
    │   ├── drop_na.ipynb
    │   ├── enframe.ipynb
    │   ├── expand.ipynb
    │   ├── expand_grid.ipynb
    │   ├── extract.ipynb
    │   ├── fill.ipynb
    │   ├── filter-joins.ipynb
    │   ├── filter.ipynb
    │   ├── forcats_fct_multi.ipynb
    │   ├── forcats_lvl_addrm.ipynb
    │   ├── forcats_lvl_order.ipynb
    │   ├── forcats_lvl_value.ipynb
    │   ├── forcats_misc.ipynb
    │   ├── full_seq.ipynb
    │   ├── group_by.ipynb
    │   ├── group_map.ipynb
    │   ├── group_split.ipynb
    │   ├── group_trim.ipynb
    │   ├── lead-lag.ipynb
    │   ├── mutate-joins.ipynb
    │   ├── mutate.ipynb
    │   ├── n_distinct.ipynb
    │   ├── na_if.ipynb
    │   ├── nb_helpers.py
    │   ├── near.ipynb
    │   ├── nest-join.ipynb
    │   ├── nest.ipynb
    │   ├── nth.ipynb
    │   ├── other.ipynb
    │   ├── pack.ipynb
    │   ├── pivot_longer.ipynb
    │   ├── pivot_wider.ipynb
    │   ├── pull.ipynb
    │   ├── ranking.ipynb
    │   ├── readme.ipynb
    │   ├── recode.ipynb
    │   ├── relocate.ipynb
    │   ├── rename.ipynb
    │   ├── replace_na.ipynb
    │   ├── rownames.ipynb
    │   ├── rows.ipynb
    │   ├── rowwise.ipynb
    │   ├── select.ipynb
    │   ├── separate.ipynb
    │   ├── setops.ipynb
    │   ├── slice.ipynb
    │   ├── summarise.ipynb
    │   ├── tibble.ipynb
    │   ├── uncount.ipynb
    │   ├── unite.ipynb
    │   └── with_groups.ipynb
    ├── options.md
    ├── reference-maps
    │   ├── ALL.md
    │   ├── base.md
    │   ├── datasets.md
    │   ├── dplyr.md
    │   ├── forcats.md
    │   ├── other.md
    │   ├── stats.md
    │   ├── tibble.md
    │   ├── tidyr.md
    │   └── utils.md
    └── style.css
├── example.png
├── example2.png
├── mkdocs.yml
├── poetry.lock
├── pyproject.toml
├── setup.py
├── tests
    ├── __init__.py
    ├── conflict_names.py
    ├── conftest.py
    ├── test_array_ufunc.py
    ├── test_base.py
    ├── test_conflict_names.py
    ├── test_data.py
    ├── test_dplyr.py
    ├── test_forcats.py
    ├── test_names.py
    ├── test_options.py
    ├── test_plugin.py
    ├── test_tibble.py
    ├── test_tidyr.py
    └── test_utils.py
└── tox.ini


/.codesandbox/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.10.12
2 | 
3 | RUN apt-get update && apt-get install -y npm fish && \
4 |     pip install -U pip && \
5 |     pip install poetry && \
6 |     poetry config virtualenvs.create false && \
7 |     chsh -s /usr/bin/fish


--------------------------------------------------------------------------------
/.codesandbox/setup.sh:
--------------------------------------------------------------------------------
 1 | WORKSPACE="/workspace"
 2 | 
 3 | # Install python dependencies
 4 | poetry update && poetry install
 5 | 
 6 | cd $WORKSPACE
 7 | 
 8 | # Install whichpy
 9 | WHICHPY="https://gist.githubusercontent.com/pwwang/879966128b0408c2459eb0a0b413fa69/raw/2f2573d191edec1937a2bf0873aa33a646b5ef29/whichpy.fish"
10 | curl -sS $WHICHPY -o ~/.config/fish/functions/whichpy.fish
11 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | exclude_lines =
3 |     pragma: no cover
4 |     if TYPE_CHECKING:
5 | omit =
6 |     datar/datasets.py
7 |     */site-packages/*
8 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: Report incorrect behavior in the datar library
 3 | title: "[BUG] "
 4 | labels: [bug]
 5 | 
 6 | body:
 7 |   - type: checkboxes
 8 |     id: checks
 9 |     attributes:
10 |       label: datar version checks
11 |       options:
12 |         - label: >
13 |             I have checked that this issue has not already been reported.
14 |           required: true
15 |         - label: >
16 |             I have confirmed this bug exists on the
17 |             **latest version** of datar and its backends.
18 |           required: true
19 |   - type: textarea
20 |     id: problem
21 |     attributes:
22 |       label: Issue Description
23 |       description: >
24 |         Please provide a description of the issue shown in the reproducible example.
25 |     validations:
26 |       required: true
27 |   - type: textarea
28 |     id: expected-behavior
29 |     attributes:
30 |       label: Expected Behavior
31 |       description: >
32 |         Please describe or show a code example of the expected behavior.
33 |     validations:
34 |       required: true
35 |   - type: textarea
36 |     id: version
37 |     attributes:
38 |       label: Installed Versions
39 |       description: >
40 |         Please paste the output of ``datar.get_versions()``
41 |       value: >
42 |         <details>
43 | 
44 | 
45 |         Replace this line with the output of datar.get_versions()
46 | 
47 | 
48 |         </details>
49 |     validations:
50 |       required: true
51 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: Feature Request
 2 | description: Suggest an idea for datar
 3 | title: "[ENH] "
 4 | labels: [enhancement]
 5 | 
 6 | body:
 7 |   - type: checkboxes
 8 |     id: checks
 9 |     attributes:
10 |       label: Feature Type
11 |       description: Please check what type of feature request you would like to propose.
12 |       options:
13 |         - label: >
14 |             Adding new functionality to datar
15 |         - label: >
16 |             Changing existing functionality in datar
17 |         - label: >
18 |             Removing existing functionality in datar
19 |   - type: textarea
20 |     id: description
21 |     attributes:
22 |       label: Problem Description
23 |       description: >
24 |         Please describe what problem the feature would solve, e.g. "I wish I could use datar to ..."
25 |       placeholder: >
26 |         I wish I could use datar to port the purrr package from R.
27 |     validations:
28 |       required: true
29 |   - type: textarea
30 |     id: feature
31 |     attributes:
32 |       label: Feature Description
33 |       description: >
34 |         Please describe how the new feature would be implemented, using psudocode if relevant.
35 |       placeholder: >
36 |         Add a new module `datar.purrr` with functions `map`, `map2`, `map_df`, etc.
37 |     validations:
38 |       required: true
39 |   - type: textarea
40 |     id: context
41 |     attributes:
42 |       label: Additional Context
43 |       description: >
44 |         Please provide any relevant GitHub issues, code examples or references that help describe and support
45 |         the feature request.
46 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/submit_question.yml:
--------------------------------------------------------------------------------
 1 | name: Submit Question
 2 | description: Ask a general question about datar
 3 | title: "[QST] "
 4 | labels: [question]
 5 | 
 6 | body:
 7 |   - type: textarea
 8 |     id: question
 9 |     attributes:
10 |       label: Question about datar
11 |       description: >
12 |         Try to provide a clear and concise description of your question.
13 |       placeholder: |
14 |         ```python
15 |         # Your code here, if applicable
16 | 
17 |         ```
18 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |   release:
 7 |     types: [published]
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         python-version: [3.9, "3.10", "3.11", "3.12"]
15 | 
16 |     steps:
17 |       - uses: actions/checkout@v4
18 |       - name: Setup Python # Set Python version
19 |         uses: actions/setup-python@v5
20 |         with:
21 |           python-version: ${{ matrix.python-version }}
22 |       - name: Install dependencies
23 |         run: |
24 |           python -m pip install --upgrade pip
25 |           python -m pip install poetry
26 |           poetry config virtualenvs.create false
27 |           # poetry install -v
28 |           python -m pip install .
29 |           python -m pip install flake8 pytest pytest-cov six numpy python-slugify
30 |       - name: Run flake8
31 |         run: flake8 datar
32 |       - name: Test with pytest
33 |         run: poetry run pytest tests/ --junitxml=junit/test-results-${{ matrix.python-version }}.xml
34 |       - name: Upload pytest test results
35 |         uses: actions/upload-artifact@v4
36 |         with:
37 |           name: pytest-results-${{ matrix.python-version }}
38 |           path: junit/test-results-${{ matrix.python-version }}.xml
39 |         # Use always() to always run this step to publish test results when there are test failures
40 |         if: ${{ always() }}
41 |       - name: Run codacy-coverage-reporter
42 |         uses: codacy/codacy-coverage-reporter-action@master
43 |         if: matrix.python-version == 3.12
44 |         with:
45 |           project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
46 |           coverage-reports: cov.xml
47 | 
48 |   deploy:
49 |     needs: build
50 |     runs-on: ubuntu-latest
51 |     if: github.event_name == 'release'
52 |     strategy:
53 |       matrix:
54 |         python-version: ["3.12"]
55 |     steps:
56 |       - uses: actions/checkout@v4
57 |       - name: Setup Python # Set Python version
58 |         uses: actions/setup-python@v5
59 |         with:
60 |           python-version: ${{ matrix.python-version }}
61 |       - name: Install dependencies
62 |         run: |
63 |           python -m pip install --upgrade pip
64 |           python -m pip install poetry
65 |       - name: Publish to PyPI
66 |         run: poetry publish --build -u ${{ secrets.PYPI_USER }} -p ${{ secrets.PYPI_PASSWORD }}
67 |         if: success()
68 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: Build Docs
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   docs:
 7 |     runs-on: ubuntu-latest
 8 |     # if: github.ref == 'refs/heads/master'
 9 |     strategy:
10 |       matrix:
11 |         python-version: ["3.12"]
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - name: Setup Python # Set Python version
15 |         uses: actions/setup-python@v5
16 |         with:
17 |           python-version: ${{ matrix.python-version }}
18 |       - name: Install dependencies
19 |         run: |
20 |           python -m pip install poetry
21 |           poetry config virtualenvs.create false
22 |           poetry install -v --with docs
23 |           python -m pip install --upgrade pip
24 |           # Can't skip optional deps with poetry install -v
25 |           # poetry install -v
26 |           python -m pip install .
27 |       - name: Build docs
28 |         run: |
29 |           # python -m pip install -r docs/requirements.txt
30 |           python -m ipykernel install --user --name python --display-name python
31 |           python -m ipykernel install --user --name python3 --display-name python3
32 |           cd docs
33 |           cp ../README.md index.md
34 |           cp ../example.png example.png
35 |           cp ../example2.png example2.png
36 |           # cp ../logo.png logo.png
37 |           cd ..
38 |           mkdocs build
39 |         if : success()
40 |       - name: Deploy docs
41 |         run: |
42 |           mkdocs gh-deploy --clean --force
43 |         # if: success() && github.ref == 'refs/heads/master'
44 | 
45 |   fix-index:
46 |     needs: docs
47 |     runs-on: ubuntu-latest
48 |     # if: github.ref == 'refs/heads/master'
49 |     strategy:
50 |       matrix:
51 |         python-version: ["3.12"]
52 |     steps:
53 |       - uses: actions/checkout@v4
54 |         with:
55 |           ref: gh-pages
56 |       - name: Fix index.html
57 |         run: |
58 |           echo ':: head of index.html - before ::'
59 |           head index.html
60 |           sed -i '1,5{/^$/d}' index.html
61 |           echo ':: head of index.html - after ::'
62 |           head index.html
63 |         if: success()
64 |       - name: Commit changes
65 |         run: |
66 |           git config --local user.email "action@github.com"
67 |           git config --local user.name "GitHub Action"
68 |           git commit -m "Add changes" -a
69 |         if: success()
70 |       - name: Push changes
71 |         uses: ad-m/github-push-action@master
72 |         with:
73 |           github_token: ${{ secrets.GITHUB_TOKEN }}
74 |           branch: gh-pages
75 |         if: success()
76 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | .coverage.xml
 46 | cov.xml
 47 | *,cover
 48 | .hypothesis/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # IPython Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # dotenv
 81 | .env
 82 | 
 83 | # virtualenv
 84 | venv/
 85 | ENV/
 86 | 
 87 | # Spyder project settings
 88 | .spyderproject
 89 | 
 90 | # Rope project settings
 91 | .ropeproject
 92 | 
 93 | workdir/
 94 | node_modules/
 95 | _book/
 96 | .vscode
 97 | export/
 98 | *.svg
 99 | *.dot
100 | *.queue.txt
101 | site/
102 | 
103 | # poetry
104 | # poetry.lock
105 | 
106 | # backup files
107 | *.bak
108 | 
109 | docs/index.md
110 | docs/logo.png
111 | docs/example.png
112 | docs/example2.png
113 | docs/api/
114 | docs/*.nbconvert.ipynb
115 | docs/*/*.nbconvert.ipynb
116 | 
117 | # vscode's local history extension
118 | .history/
119 | 
120 | # For quick test
121 | /_t.py
122 | /_t.ipynb
123 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | fail_fast: true
 2 | repos:
 3 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 4 |     rev: 5df1a4bf6f04a1ed3a643167b38d502575e29aef
 5 |     hooks:
 6 |     -   id: trailing-whitespace
 7 |     -   id: end-of-file-fixer
 8 |     -   id: check-yaml
 9 |         exclude: 'mkdocs.yml'
10 | -   repo: local
11 |     hooks:
12 |     -   id: flake8
13 |         name: Run flake8
14 |         files: ^datar/.+$
15 |         pass_filenames: false
16 |         entry: flake8
17 |         args: [datar]
18 |         types: [python]
19 |         language: system
20 |     -   id: versionchecker
21 |         name: Check version agreement in pyproject and __version__
22 |         entry: bash -c
23 |         language: system
24 |         args:
25 |             - get_ver() { echo $(egrep "^__version|^version" $1 | cut -d= -f2 | sed 's/\"\| //g'); };
26 |               v1=`get_ver pyproject.toml`;
27 |               v2=`get_ver datar/__init__.py`;
28 |               if [[ $v1 == $v2 ]]; then exit 0; else exit 1; fi
29 |         pass_filenames: false
30 |         files: ^pyproject\.toml|datar/__init__\.py$
31 |     -   id: pytest
32 |         name: Run pytest
33 |         entry: pytest
34 |         language: system
35 |         args: [tests/]
36 |         pass_filenames: false
37 |         files: ^tests/.+$|^datar/.+$
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 pwwang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # datar
  2 | 
  3 | A Grammar of Data Manipulation in python
  4 | 
  5 | <!-- badges -->
  6 | [![Pypi][6]][7] [![Github][8]][9] ![Building][10] [![Docs and API][11]][5] [![Codacy][12]][13] [![Codacy coverage][14]][13] [![Downloads][20]][7]
  7 | 
  8 | [Documentation][5] | [Reference Maps][15] | [Notebook Examples][16] | [API][17]
  9 | 
 10 | `datar` is a re-imagining of APIs for data manipulation in python with multiple backends supported. Those APIs are aligned with tidyverse packages in R as much as possible.
 11 | 
 12 | ## Installation
 13 | 
 14 | ```shell
 15 | pip install -U datar
 16 | 
 17 | # install with a backend
 18 | pip install -U datar[pandas]
 19 | 
 20 | # More backends support coming soon
 21 | ```
 22 | 
 23 | <!-- ## Maximum compatibility with R packages
 24 | 
 25 | |Package|Version|
 26 | |-|-|
 27 | |[dplyr][21]|1.0.8| -->
 28 | 
 29 | ## Backends
 30 | 
 31 | |Repo|Badges|
 32 | |-|-|
 33 | |[datar-numpy][1]|![3] ![18]|
 34 | |[datar-pandas][2]|![4] ![19]|
 35 | |[datar-arrow][22]|![23] ![24]|
 36 | 
 37 | ## Example usage
 38 | 
 39 | ```python
 40 | # with pandas backend
 41 | from datar import f
 42 | from datar.dplyr import mutate, filter_, if_else
 43 | from datar.tibble import tibble
 44 | # or
 45 | # from datar.all import f, mutate, filter_, if_else, tibble
 46 | 
 47 | df = tibble(
 48 |     x=range(4),  # or c[:4]  (from datar.base import c)
 49 |     y=['zero', 'one', 'two', 'three']
 50 | )
 51 | df >> mutate(z=f.x)
 52 | """# output
 53 |         x        y       z
 54 |   <int64> <object> <int64>
 55 | 0       0     zero       0
 56 | 1       1      one       1
 57 | 2       2      two       2
 58 | 3       3    three       3
 59 | """
 60 | 
 61 | df >> mutate(z=if_else(f.x>1, 1, 0))
 62 | """# output:
 63 |         x        y       z
 64 |   <int64> <object> <int64>
 65 | 0       0     zero       0
 66 | 1       1      one       0
 67 | 2       2      two       1
 68 | 3       3    three       1
 69 | """
 70 | 
 71 | df >> filter_(f.x>1)
 72 | """# output:
 73 |         x        y
 74 |   <int64> <object>
 75 | 0       2      two
 76 | 1       3    three
 77 | """
 78 | 
 79 | df >> mutate(z=if_else(f.x>1, 1, 0)) >> filter_(f.z==1)
 80 | """# output:
 81 |         x        y       z
 82 |   <int64> <object> <int64>
 83 | 0       2      two       1
 84 | 1       3    three       1
 85 | """
 86 | ```
 87 | 
 88 | ```python
 89 | # works with plotnine
 90 | # example grabbed from https://github.com/has2k1/plydata
 91 | import numpy
 92 | from datar import f
 93 | from datar.base import sin, pi
 94 | from datar.tibble import tibble
 95 | from datar.dplyr import mutate, if_else
 96 | from plotnine import ggplot, aes, geom_line, theme_classic
 97 | 
 98 | df = tibble(x=numpy.linspace(0, 2 * pi, 500))
 99 | (
100 |     df
101 |     >> mutate(y=sin(f.x), sign=if_else(f.y >= 0, "positive", "negative"))
102 |     >> ggplot(aes(x="x", y="y"))
103 |     + theme_classic()
104 |     + geom_line(aes(color="sign"), size=1.2)
105 | )
106 | ```
107 | 
108 | ![example](./example.png)
109 | 
110 | ```python
111 | # very easy to integrate with other libraries
112 | # for example: klib
113 | import klib
114 | from pipda import register_verb
115 | from datar import f
116 | from datar.data import iris
117 | from datar.dplyr import pull
118 | 
119 | dist_plot = register_verb(func=klib.dist_plot)
120 | iris >> pull(f.Sepal_Length) >> dist_plot()
121 | ```
122 | 
123 | ![example](./example2.png)
124 | 
125 | ## Testimonials
126 | 
127 | [@coforfe](https://github.com/coforfe):
128 | > Thanks for your excellent package to port R (`dplyr`) flow of processing to Python. I have been using other alternatives, and yours is the one that offers the most extensive and equivalent to what is possible now with `dplyr`.
129 | 
130 | [1]: https://github.com/pwwang/datar-numpy
131 | [2]: https://github.com/pwwang/datar-pandas
132 | [3]: https://img.shields.io/codacy/coverage/0a7519dad44246b6bab30576895f6766?style=flat-square
133 | [4]: https://img.shields.io/codacy/coverage/45f4ea84ae024f1a8cf84be54dd144f7?style=flat-square
134 | [5]: https://pwwang.github.io/datar/
135 | [6]: https://img.shields.io/pypi/v/datar?style=flat-square
136 | [7]: https://pypi.org/project/datar/
137 | [8]: https://img.shields.io/github/v/tag/pwwang/datar?style=flat-square
138 | [9]: https://github.com/pwwang/datar
139 | [10]: https://img.shields.io/github/actions/workflow/status/pwwang/datar/ci.yml?branch=master&style=flat-square
140 | [11]: https://img.shields.io/github/actions/workflow/status/pwwang/datar/docs.yml?branch=master&style=flat-square
141 | [12]: https://img.shields.io/codacy/grade/3d9bdff4d7a34bdfb9cd9e254184cb35?style=flat-square
142 | [13]: https://app.codacy.com/gh/pwwang/datar
143 | [14]: https://img.shields.io/codacy/coverage/3d9bdff4d7a34bdfb9cd9e254184cb35?style=flat-square
144 | [15]: https://pwwang.github.io/datar/reference-maps/ALL/
145 | [16]: https://pwwang.github.io/datar/notebooks/across/
146 | [17]: https://pwwang.github.io/datar/api/datar/
147 | [18]: https://img.shields.io/pypi/v/datar-numpy?style=flat-square
148 | [19]: https://img.shields.io/pypi/v/datar-pandas?style=flat-square
149 | [20]: https://img.shields.io/pypi/dm/datar?style=flat-square
150 | [21]: https://github.com/tidyverse/dplyr
151 | [22]: https://github.com/pwwang/datar-arrow
152 | [23]: https://img.shields.io/codacy/coverage/5f4ef9dd2503437db18786ff9e841d8b?style=flat-square
153 | [24]: https://img.shields.io/pypi/v/datar-arrow?style=flat-square
154 | 


--------------------------------------------------------------------------------
/datar/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Mapping as _Mapping
 2 | 
 3 | from .core import operator as _
 4 | from .core.defaults import f
 5 | from .core.options import options, get_option, options_context
 6 | 
 7 | __version__ = "0.15.9"
 8 | 
 9 | 
10 | def get_versions(prnt: bool = True) -> _Mapping[str, str]:
11 |     """Return/Print the versions of the dependencies.
12 | 
13 |     Args:
14 |         prnt: If True, print the versions, otherwise return them.
15 | 
16 |     Returns:
17 |         A dict of the versions of the dependencies if `prnt` is False.
18 |     """
19 |     import sys
20 |     import executing
21 |     import pipda
22 |     import simplug
23 |     from .core.load_plugins import plugin
24 | 
25 |     versions = {
26 |         "python": sys.version,
27 |         "datar": __version__,
28 |         "simplug": simplug.__version__,
29 |         "executing": executing.__version__,
30 |         "pipda": pipda.__version__,
31 |     }
32 | 
33 |     versions_plg = plugin.hooks.get_versions()
34 |     versions.update(versions_plg)
35 | 
36 |     if not prnt:
37 |         return versions
38 | 
39 |     keylen = max(map(len, versions))
40 |     for key in versions:
41 |         ver = versions[key]
42 |         verlines = ver.splitlines()
43 |         print(f"{key.ljust(keylen)}: {verlines.pop(0)}")
44 |         for verline in verlines:  # pragma: no cover
45 |             print(f"{' ' * keylen}  {verline}")
46 | 
47 |     return None
48 | 


--------------------------------------------------------------------------------
/datar/all.py:
--------------------------------------------------------------------------------
 1 | """Import all constants, verbs and functions"""
 2 | 
 3 | from .core import load_plugins as _
 4 | from .core.defaults import f
 5 | 
 6 | from .base import _conflict_names as _base_conflict_names
 7 | from .dplyr import _conflict_names as _dplyr_conflict_names
 8 | 
 9 | from .base import *
10 | from .dplyr import *
11 | from .forcats import *
12 | from .tibble import *
13 | from .tidyr import *
14 | from .misc import *
15 | 
16 | __all__ = [key for key in locals() if not key.startswith("_")]
17 | 
18 | if get_option("allow_conflict_names"):  # noqa: F405
19 |     __all__.extend(_base_conflict_names | _dplyr_conflict_names)
20 |     for name in _base_conflict_names | _dplyr_conflict_names:
21 |         locals()[name] = locals()[name + "_"]
22 | 
23 | 
24 | def __getattr__(name):
25 |     """Even when allow_conflict_names is False, datar.base.sum should be fine
26 |     """
27 |     if name in _base_conflict_names | _dplyr_conflict_names:
28 |         import sys
29 |         import ast
30 |         from executing import Source
31 |         node = Source.executing(sys._getframe(1)).node
32 |         if isinstance(node, (ast.Call, ast.Attribute)):
33 |             # import datar.all as d
34 |             # d.sum(...) or getattr(d, "sum")(...)
35 |             return globals()[name + "_"]
36 | 
37 |     raise AttributeError
38 | 


--------------------------------------------------------------------------------
/datar/apis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/apis/__init__.py


--------------------------------------------------------------------------------
/datar/apis/misc.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | 
 3 | from pipda import register_func
 4 | 
 5 | 
 6 | @contextmanager
 7 | def _array_ufunc_with_backend(backend: str):
 8 |     """Use a backend for the operator"""
 9 |     old_backend = array_ufunc.backend
10 |     array_ufunc.backend = backend
11 |     yield
12 |     array_ufunc.backend = old_backend
13 | 
14 | 
15 | @register_func(cls=object, dispatchable="first")
16 | def array_ufunc(x, ufunc, *args, kind, **kwargs):
17 |     """Implement the array ufunc
18 | 
19 |     Allow other backends to override the behavior of the ufunc on
20 |     different types of data.
21 |     """
22 |     return ufunc(x, *args, **kwargs)
23 | 
24 | 
25 | array_ufunc.backend = None
26 | array_ufunc.with_backend = _array_ufunc_with_backend
27 | 


--------------------------------------------------------------------------------
/datar/apis/tibble.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations as _
  2 | from typing import Any, Callable as _Callable
  3 | 
  4 | from pipda import (
  5 |     register_verb as _register_verb,
  6 |     register_func as _register_func,
  7 | )
  8 | 
  9 | from ..core.utils import (
 10 |     NotImplementedByCurrentBackendError as _NotImplementedByCurrentBackendError,
 11 | )
 12 | 
 13 | 
 14 | @_register_func(plain=True)
 15 | def tibble(
 16 |     *args,
 17 |     _name_repair: str | _Callable = "check_unique",
 18 |     _rows: int = None,
 19 |     _dtypes=None,
 20 |     _drop_index: bool = False,
 21 |     _index=None,
 22 |     **kwargs,
 23 | ) -> Any:
 24 |     """Constructs a data frame
 25 | 
 26 |     Args:
 27 |         *args: and
 28 |         **kwargs: A set of name-value pairs.
 29 |         _name_repair: treatment of problematic column names:
 30 |             - "minimal": No name repair or checks, beyond basic existence,
 31 |             - "unique": Make sure names are unique and not empty,
 32 |             - "check_unique": (default value), no name repair,
 33 |                 but check they are unique,
 34 |             - "universal": Make the names unique and syntactic
 35 |             - a function: apply custom name repair
 36 |         _rows: Number of rows of a 0-col dataframe when args and kwargs are
 37 |             not provided. When args or kwargs are provided, this is ignored.
 38 |         _dtypes: The dtypes for each columns to convert to.
 39 |         _drop_index: Whether drop the index for the final data frame
 40 |         _index: The new index of the output frame
 41 | 
 42 |     Returns:
 43 |         A constructed tibble
 44 |     """
 45 |     raise _NotImplementedByCurrentBackendError("tibble")
 46 | 
 47 | 
 48 | @_register_func(pipeable=True, dispatchable=True)
 49 | def tibble_(
 50 |     *args,
 51 |     _name_repair: str | _Callable = "check_unique",
 52 |     _rows: int = None,
 53 |     _dtypes=None,
 54 |     _drop_index: bool = False,
 55 |     _index=None,
 56 |     **kwargs,
 57 | ) -> Any:
 58 |     raise _NotImplementedByCurrentBackendError("tibble_")
 59 | 
 60 | 
 61 | @_register_func(plain=True)
 62 | def tribble(
 63 |     *dummies,
 64 |     _name_repair: str | _Callable = "minimal",
 65 |     _dtypes=None,
 66 | ) -> Any:
 67 |     """Create dataframe using an easier to read row-by-row layout
 68 |     Unlike original API that uses formula (`f.col`) to indicate the column
 69 |     names, we use `f.col` to indicate them.
 70 | 
 71 |     Args:
 72 |         *dummies: Arguments specifying the structure of a dataframe
 73 |             Variable names should be specified with `f.name`
 74 |         _dtypes: The dtypes for each columns to convert to.
 75 | 
 76 |     Examples:
 77 |         >>> tribble(
 78 |         >>>     f.colA, f.colB,
 79 |         >>>     "a",    1,
 80 |         >>>     "b",    2,
 81 |         >>>     "c",    3,
 82 |         >>> )
 83 | 
 84 |     Returns:
 85 |         A dataframe
 86 |     """
 87 |     raise _NotImplementedByCurrentBackendError("tribble")
 88 | 
 89 | 
 90 | @_register_func(plain=True)
 91 | def tibble_row(
 92 |     *args,
 93 |     _name_repair: str | _Callable = "check_unique",
 94 |     _dtypes=None,
 95 |     **kwargs,
 96 | ) -> Any:
 97 |     """Constructs a data frame that is guaranteed to occupy one row.
 98 |     Scalar values will be wrapped with `[]`
 99 |     Args:
100 |         *args: and
101 |         **kwargs: A set of name-value pairs.
102 |         _name_repair: treatment of problematic column names:
103 |             - "minimal": No name repair or checks, beyond basic existence,
104 |             - "unique": Make sure names are unique and not empty,
105 |             - "check_unique": (default value), no name repair,
106 |                 but check they are unique,
107 |             - "universal": Make the names unique and syntactic
108 |             - a function: apply custom name repair
109 |     Returns:
110 |         A constructed dataframe
111 |     """
112 |     raise _NotImplementedByCurrentBackendError("tibble_row")
113 | 
114 | 
115 | @_register_verb()
116 | def as_tibble(df) -> Any:
117 |     """Convert a DataFrame object to Tibble object"""
118 |     raise _NotImplementedByCurrentBackendError("as_tibble", df)
119 | 
120 | 
121 | @_register_verb()
122 | def enframe(x, name="name", value="value") -> Any:
123 |     """Converts mappings or lists to one- or two-column data frames.
124 | 
125 |     Args:
126 |         x: a list, a dictionary or a dataframe with one or two columns
127 |         name: and
128 |         value: value Names of the columns that store the names and values.
129 |             If `None`, a one-column dataframe is returned.
130 |             `value` cannot be `None`
131 | 
132 |     Returns:
133 |         A data frame with two columns if `name` is not None (default) or
134 |         one-column otherwise.
135 |     """
136 |     raise _NotImplementedByCurrentBackendError("enframe", x)
137 | 
138 | 
139 | @_register_verb()
140 | def deframe(x) -> Any:
141 |     """Converts two-column data frames to a dictionary
142 |     using the first column as name and the second column as value.
143 |     If the input has only one column, a list.
144 | 
145 |     Args:
146 |         x: A data frame.
147 | 
148 |     Returns:
149 |         A dictionary or a list if only one column in the data frame.
150 |     """
151 |     raise _NotImplementedByCurrentBackendError("deframe", x)
152 | 
153 | 
154 | @_register_verb()
155 | def add_row(
156 |     _data,
157 |     *args,
158 |     _before=None,
159 |     _after=None,
160 |     **kwargs,
161 | ) -> Any:
162 |     """Add one or more rows of data to an existing data frame.
163 | 
164 |     Aliases `add_case`
165 | 
166 |     Args:
167 |         _data: Data frame to append to.
168 |         *args: and
169 |         **kwargs: Name-value pairs to add to the data frame.
170 |         _before: and
171 |         _after: row index where to add the new rows.
172 |             (default to add after the last row)
173 | 
174 |     Returns:
175 |         The dataframe with the added rows
176 | 
177 |     """
178 |     raise _NotImplementedByCurrentBackendError("add_row", _data)
179 | 
180 | 
181 | @_register_verb()
182 | def add_column(
183 |     _data,
184 |     *args,
185 |     _before=None,
186 |     _after=None,
187 |     _name_repair="check_unique",
188 |     _dtypes=None,
189 |     **kwargs,
190 | ) -> Any:
191 |     """Add one or more columns to an existing data frame.
192 | 
193 |     Args:
194 |         _data: Data frame to append to
195 |         *args: and
196 |         **kwargs: Name-value pairs to add to the data frame
197 |         _before: and
198 |         _after: Column index or name where to add the new columns
199 |             (default to add after the last column)
200 |         _dtypes: The dtypes for the new columns, either a uniform dtype or a
201 |             dict of dtypes with keys the column names
202 | 
203 |     Returns:
204 |         The dataframe with the added columns
205 |     """
206 |     raise _NotImplementedByCurrentBackendError("add_column", _data)
207 | 
208 | 
209 | @_register_verb()
210 | def has_rownames(_data) -> bool:
211 |     """Detect if a data frame has row names
212 | 
213 |     Aliases `has_index`
214 | 
215 |     Args:
216 |         _data: The data frame to check
217 | 
218 |     Returns:
219 |         True if the data frame has index otherwise False.
220 | 
221 |     """
222 |     raise _NotImplementedByCurrentBackendError("has_rownames", _data)
223 | 
224 | 
225 | @_register_verb()
226 | def remove_rownames(_data) -> Any:
227 |     """Remove the index/rownames of a data frame
228 | 
229 |     Aliases `remove_index`, `drop_index`, `remove_rownames`
230 | 
231 |     Args:
232 |         _data: The data frame
233 | 
234 |     Returns:
235 |         The data frame with index removed
236 | 
237 |     """
238 |     raise _NotImplementedByCurrentBackendError("remove_rownames", _data)
239 | 
240 | 
241 | @_register_verb()
242 | def rownames_to_column(_data, var="rowname") -> Any:
243 |     """Add rownames as a column
244 | 
245 |     Aliases `index_to_column`
246 | 
247 |     Args:
248 |         _data: The data frame
249 |         var: The name of the column
250 | 
251 |     Returns:
252 |         The data frame with rownames added as one column. Note that the
253 |         original index is removed.
254 |     """
255 |     raise _NotImplementedByCurrentBackendError("rownames_to_column", _data)
256 | 
257 | 
258 | @_register_verb()
259 | def rowid_to_column(_data, var="rowid") -> Any:
260 |     """Add rownames as a column
261 | 
262 |     Args:
263 |         _data: The data frame
264 |         var: The name of the column
265 | 
266 |     Returns:
267 |         The data frame with row ids added as one column.
268 | 
269 |     """
270 |     raise _NotImplementedByCurrentBackendError("rowid_to_column", _data)
271 | 
272 | 
273 | @_register_verb()
274 | def column_to_rownames(_data, var="rowname") -> Any:
275 |     """Set rownames/index with one column, and remove it
276 | 
277 |     Aliases `column_to_index`
278 | 
279 |     Args:
280 |         _data: The data frame
281 |         var: The column to conver to the rownames
282 | 
283 |     Returns:
284 |         The data frame with the column converted to rownames
285 |     """
286 |     raise _NotImplementedByCurrentBackendError("column_to_rownames", _data)
287 | 
288 | 
289 | # aliases
290 | add_case = add_row
291 | has_index = has_rownames
292 | remove_index = drop_index = remove_rownames
293 | index_to_column = rownames_to_column
294 | column_to_index = column_to_rownames
295 | 


--------------------------------------------------------------------------------
/datar/base.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .core.load_plugins import plugin as _plugin
 3 | from .apis.base import *
 4 | 
 5 | locals().update(_plugin.hooks.base_api())
 6 | __all__ = [key for key in locals() if not key.startswith("_")]
 7 | _conflict_names = {"min", "max", "sum", "abs", "round", "all", "any", "re"}
 8 | 
 9 | if get_option("allow_conflict_names"):  # noqa: F405
10 |     __all__.extend(_conflict_names)
11 |     for name in _conflict_names:
12 |         locals()[name] = locals()[name + "_"]
13 | 
14 | 
15 | def __getattr__(name):
16 |     """Even when allow_conflict_names is False, datar.base.sum should be fine
17 |     """
18 |     if name in _conflict_names:
19 |         import sys
20 |         import ast
21 |         from executing import Source
22 |         node = Source.executing(sys._getframe(1)).node
23 |         if isinstance(node, (ast.Call, ast.Attribute)):
24 |             # import datar.base as d
25 |             # d.sum(...)
26 |             return globals()[name + "_"]
27 | 
28 |     raise AttributeError
29 | 


--------------------------------------------------------------------------------
/datar/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/core/__init__.py


--------------------------------------------------------------------------------
/datar/core/defaults.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | 
3 | from pipda import Symbolic
4 | 
5 | f = Symbolic()
6 | 
7 | OPTION_FILE_HOME = Path("~/.datar.toml").expanduser()
8 | OPTION_FILE_CWD = Path("./.datar.toml").resolve()
9 | 


--------------------------------------------------------------------------------
/datar/core/load_plugins.py:
--------------------------------------------------------------------------------
 1 | from pipda import register_array_ufunc
 2 | 
 3 | from .options import get_option
 4 | from .plugin import plugin
 5 | 
 6 | 
 7 | def _array_ufunc_to_register(ufunc, x, *args, kind, **kwargs):
 8 |     """Register the array ufunc to pipda"""
 9 |     from ..apis.misc import array_ufunc
10 | 
11 |     return array_ufunc(
12 |         x,
13 |         ufunc,
14 |         *args,
15 |         kind=kind,
16 |         **kwargs,
17 |         __backend=array_ufunc.backend,
18 |     )
19 | 
20 | 
21 | plugin.load_entrypoints(only=get_option("backends"))
22 | 
23 | plugin.hooks.setup()
24 | register_array_ufunc(_array_ufunc_to_register)
25 | 


--------------------------------------------------------------------------------
/datar/core/names.py:
--------------------------------------------------------------------------------
  1 | """Name repairing"""
  2 | import inspect
  3 | import re
  4 | import keyword
  5 | import math
  6 | from numbers import Number
  7 | from typing import Any, Callable, List, Union, Iterable, Tuple
  8 | 
  9 | from .utils import logger
 10 | 
 11 | 
 12 | class NameNonUniqueError(ValueError):
 13 |     """Error for non-unique names"""
 14 | 
 15 | 
 16 | def _isnan(x: Any) -> bool:
 17 |     """Check if x is nan"""
 18 |     return isinstance(x, Number) and math.isnan(x)
 19 | 
 20 | 
 21 | def _is_scalar(x: Any) -> bool:
 22 |     """Check if x is scalar"""
 23 |     if isinstance(x, str):  # pragma: no cover
 24 |         return True
 25 |     try:
 26 |         iter(x)
 27 |     except TypeError:
 28 |         return True
 29 |     return False
 30 | 
 31 | 
 32 | def _log_changed_names(changed_names: List[Tuple[str, str]]) -> None:
 33 |     """Log the changed names"""
 34 |     if not changed_names:
 35 |         return
 36 | 
 37 |     logger.warning("New names:")
 38 |     for orig_name, new_name in changed_names:
 39 |         logger.warning("* %r -> %r", orig_name, new_name)
 40 | 
 41 | 
 42 | def _repair_names_minimal(names: Iterable[str]) -> List[str]:
 43 |     """Minimal repairing"""
 44 |     return ["" if name is None or _isnan(name) else str(name) for name in names]
 45 | 
 46 | 
 47 | def _repair_names_unique(
 48 |     names: Iterable[str],
 49 |     quiet: bool = False,
 50 |     sanitizer: Callable = None,
 51 | ) -> List[str]:
 52 |     """Make sure names are unique"""
 53 |     min_names = _repair_names_minimal(names)
 54 |     neat_names = [
 55 |         re.sub(r"(?:(?<!_)_{1,2}\d+|(?<!_)__)+$", "", name)
 56 |         for name in min_names
 57 |     ]
 58 |     if callable(sanitizer):
 59 |         neat_names = [sanitizer(name) for name in neat_names]
 60 | 
 61 |     new_names = []
 62 |     changed_names = []
 63 |     for i, name in enumerate(names):
 64 |         neat_name = neat_names[i]
 65 |         if neat_names.count(neat_name) > 1 or neat_name == "":
 66 |             neat_name = f"{neat_name}__{i}"
 67 |         if neat_name != name:
 68 |             changed_names.append((name, neat_name))
 69 |         new_names.append(neat_name)
 70 |     if not quiet:
 71 |         _log_changed_names(changed_names)
 72 |     return new_names
 73 | 
 74 | 
 75 | def _repair_names_universal(
 76 |     names: Iterable[str],
 77 |     quiet: bool = False,
 78 | ) -> List[str]:
 79 |     """Make sure names are safely to be used as variable or attribute"""
 80 |     min_names = _repair_names_minimal(names)
 81 |     neat_names = [re.sub(r"[^\w]", "_", name) for name in min_names]
 82 |     new_names = _repair_names_unique(
 83 |         neat_names,
 84 |         quiet=True,
 85 |         sanitizer=lambda name: (
 86 |             f"_{name}"
 87 |             if keyword.iskeyword(name) or (name and name[0].isdigit())
 88 |             else name
 89 |         ),
 90 |     )
 91 |     if not quiet:
 92 |         changed_names = [
 93 |             (orig_name, new_name)
 94 |             for orig_name, new_name in zip(names, new_names)
 95 |             if orig_name != new_name
 96 |         ]
 97 |         _log_changed_names(changed_names)
 98 |     return new_names
 99 | 
100 | 
101 | def _repair_names_check_unique(names: Iterable[str]) -> Iterable[str]:
102 |     """Just check the uniqueness"""
103 |     for name in names:
104 |         if names.count(name) > 1:
105 |             raise NameNonUniqueError(f"Names must be unique: {name}")
106 |         if name == "" or _isnan(name):
107 |             raise NameNonUniqueError(f"Names can't be empty: {name}")
108 |         if re.search(r"(?:(?<!_)_{2}\d+|(?<!_)__)+$", str(name)):
109 |             raise NameNonUniqueError(
110 |                 f"Names can't be of the form `__` or `_j`: {name}"
111 |             )
112 |     return names
113 | 
114 | 
115 | BUILTIN_REPAIR_METHODS = dict(
116 |     minimal=_repair_names_minimal,
117 |     unique=_repair_names_unique,
118 |     universal=_repair_names_universal,
119 |     check_unique=_repair_names_check_unique,
120 | )
121 | 
122 | 
123 | def repair_names(
124 |     names: Iterable[str],
125 |     repair: Union[str, Callable],
126 | ) -> List[str]:
127 |     """Repair names based on the method
128 | 
129 |     Args:
130 |         names: The names to be repaired
131 |         repair: The method to repair
132 |             - `minimal`: Minimal names are never None or NA.
133 |                 When an element doesn't have a name, its minimal name
134 |                 is an empty string.
135 |             - `unique`: Unique names are unique. A suffix is appended to
136 |                 duplicate names to make them unique.
137 |             - `universal`: Universal names are unique and syntactic,
138 |                 meaning that you can safely use the names as variables without
139 |                 causing a syntax error (like `f.<name>`).
140 |             - A function, accepts either a list of names or a single name.
141 |                 Function accepts a list of names must annotate the first
142 |                 argument with `typing.Iterable` or `typing.Sequence`.
143 | 
144 |     Examples:
145 |         >>> repair_names([None]*3, repair="minimal")
146 |         >>> # ["", "", ""]
147 |         >>> repair_names(["x", NA], repair="minimal")
148 |         >>> # ["x", ""]
149 |         >>> repair_names(["", "x", "", "y", "x", "_2", "__"], repair="unique")
150 |         >>> # ["__1", "x__2", "__3", "y", "x__5", "__6", "__7"]
151 |         >>> repair_names(["", "x", NA, "x"], repair="universal")
152 |         >>> # ["__1", "x__2", "__3", "x__4"]
153 |         >>> repair_names(["(y)"  "_z"  ".2fa"  "False"], repair="universal")
154 |         >>> # ["_y_", "_z", "_2fa", "_False"]
155 | 
156 |     Returns:
157 |         The repaired names
158 | 
159 |     Raises:
160 |         ValueError: when repair is not a string or callable
161 |         NameNonUniqueError: when check_unique fails
162 |     """
163 |     if isinstance(repair, str):
164 |         repair = BUILTIN_REPAIR_METHODS[repair]  # type: ignore
165 |     elif (
166 |         not _is_scalar(repair)
167 |         and all(isinstance(elem, str) for elem in repair)
168 |     ):
169 |         return repair  # type: ignore
170 |     elif not callable(repair):
171 |         raise ValueError("Expect a function for name repairing.")
172 | 
173 |     parameters = inspect.signature(repair).parameters  # type: ignore
174 |     annotation = list(parameters.values())[0].annotation
175 |     if annotation is inspect._empty or annotation._name not in (
176 |         "Iterable",
177 |         "Sequence",
178 |     ):  # scalar input
179 |         return [repair(name) for name in names]
180 | 
181 |     return repair(names)
182 | 


--------------------------------------------------------------------------------
/datar/core/operator.py:
--------------------------------------------------------------------------------
 1 | """Operators for datar"""
 2 | from typing import Callable
 3 | from contextlib import contextmanager
 4 | 
 5 | from pipda import register_operator, Operator
 6 | 
 7 | 
 8 | @register_operator
 9 | class DatarOperator(Operator):
10 |     """Operator class for datar"""
11 | 
12 |     backend = None
13 | 
14 |     @classmethod
15 |     @contextmanager
16 |     def with_backend(cls, backend: str):
17 |         """Use a backend for the operator"""
18 |         old_backend = cls.backend
19 |         cls.backend = backend
20 |         yield
21 |         cls.backend = old_backend
22 | 
23 |     def __getattr__(self, name: str) -> Callable:
24 |         from .plugin import plugin
25 |         return lambda x, y=None: plugin.hooks.operate(
26 |             name,
27 |             x,
28 |             y,
29 |             __plugin=self.__class__.backend,
30 |         )
31 | 


--------------------------------------------------------------------------------
/datar/core/options.py:
--------------------------------------------------------------------------------
  1 | """Provide options"""
  2 | from __future__ import annotations
  3 | 
  4 | from typing import Any, Generator, Mapping
  5 | from contextlib import contextmanager
  6 | 
  7 | from diot import Diot
  8 | from simpleconf import Config
  9 | 
 10 | from .defaults import OPTION_FILE_CWD, OPTION_FILE_HOME
 11 | 
 12 | _key_transform = lambda key: key.replace("_", ".")
 13 | _dict_transform_back = lambda dic: {
 14 |     key.replace(".", "_"): val for key, val in dic.items()
 15 | }
 16 | 
 17 | OPTIONS = Diot(
 18 |     Config.load(
 19 |         {
 20 |             # Do we allow to use conflict names directly?
 21 |             "allow_conflict_names": False,
 22 |             # Disable some installed backends
 23 |             "backends": [],
 24 |         },
 25 |         OPTION_FILE_HOME,
 26 |         OPTION_FILE_CWD,
 27 |         ignore_nonexist=True,
 28 |     ),
 29 |     diot_transform=_key_transform,
 30 | )
 31 | 
 32 | 
 33 | def options(
 34 |     *args: str | Mapping[str, Any],
 35 |     _return: bool = None,
 36 |     **kwargs: Any,
 37 | ) -> Mapping[str, Any]:
 38 |     """Allow the user to set and examine a variety of global options
 39 | 
 40 |     Args:
 41 |         *args: Names of options to return
 42 |         **kwargs: name-value pair to create/set an option
 43 |         _return: Whether return the options.
 44 |             If `None`, turned to `True` when option names provided in `args`.
 45 | 
 46 |     Returns:
 47 |         The options before updating if `_return` is `True`.
 48 |     """
 49 |     if not args and not kwargs and (_return is None or _return is True):
 50 |         # Make sure the options won't be changed
 51 |         return OPTIONS.copy()
 52 | 
 53 |     names = [arg.replace(".", "_") for arg in args if isinstance(arg, str)]
 54 |     pairs = {}
 55 |     for arg in args:
 56 |         if isinstance(arg, dict):
 57 |             pairs.update(_dict_transform_back(arg))
 58 |     pairs.update(_dict_transform_back(kwargs))
 59 | 
 60 |     out = None
 61 |     if _return is None:
 62 |         _return = names
 63 | 
 64 |     if _return:
 65 |         out = Diot(
 66 |             {
 67 |                 name: value
 68 |                 for name, value in OPTIONS.items()
 69 |                 if name in names or name in pairs
 70 |             },
 71 |             diot_transform=_key_transform,
 72 |         )
 73 | 
 74 |     for key, val in pairs.items():
 75 |         oldval = OPTIONS[key]
 76 |         if oldval == val:
 77 |             continue
 78 |         OPTIONS[key] = val
 79 | 
 80 |     return out
 81 | 
 82 | 
 83 | @contextmanager
 84 | def options_context(**kwargs: Any) -> Generator:
 85 |     """A context manager to execute code with temporary options
 86 | 
 87 |     Note that this is not thread-safe.
 88 |     """
 89 |     opts = options()  # type: Mapping[str, Any]
 90 |     options(**kwargs)
 91 |     yield
 92 |     options(opts)
 93 | 
 94 | 
 95 | def get_option(x: str, default: Any = None) -> Any:
 96 |     """Get the current value set for option `x`,
 97 |     or `default` (which defaults to `NULL`) if the option is unset.
 98 | 
 99 |     Args:
100 |         x: The name of the option
101 |         default: The default value if `x` is unset
102 |     """
103 |     return OPTIONS.get(x, default)
104 | 
105 | 
106 | def add_option(x: str, default: Any = None) -> None:
107 |     """Add an option
108 | 
109 |     Args:
110 |         x: The name of the option
111 |         default: The default value if `x` is unset
112 |     """
113 |     OPTIONS.setdefault(x, default)
114 | 


--------------------------------------------------------------------------------
/datar/core/plugin.py:
--------------------------------------------------------------------------------
 1 | """Plugin system to support different backends"""
 2 | from typing import Any, List, Mapping, Tuple, Callable
 3 | 
 4 | from simplug import Simplug, SimplugResult, makecall
 5 | 
 6 | plugin = Simplug("datar")
 7 | 
 8 | 
 9 | def _collect(calls: List[Tuple[Callable, Tuple, Mapping]]) -> Mapping[str, Any]:
10 |     """Collect the results from plugins"""
11 |     collected = {}
12 |     for call in calls:
13 |         out = makecall(call)
14 |         if out is not None:
15 |             collected.update(out)
16 |     return collected
17 | 
18 | 
19 | @plugin.spec
20 | def setup():
21 |     """Initialize the backend"""
22 | 
23 | 
24 | @plugin.spec(result=_collect)
25 | def get_versions():
26 |     """Return the versions of the dependencies of the plugin."""
27 | 
28 | 
29 | @plugin.spec(result=SimplugResult.TRY_SINGLE)
30 | def load_dataset(name: str, metadata: Mapping):
31 |     """Implementations for load_dataset()"""
32 | 
33 | 
34 | @plugin.spec(result=_collect)
35 | def base_api():
36 |     """What is implemented the base APIs."""
37 | 
38 | 
39 | @plugin.spec(result=_collect)
40 | def dplyr_api():
41 |     """What is implemented the dplyr APIs."""
42 | 
43 | 
44 | @plugin.spec(result=_collect)
45 | def tibble_api():
46 |     """What is implemented the tibble APIs."""
47 | 
48 | 
49 | @plugin.spec(result=_collect)
50 | def forcats_api():
51 |     """What is implemented the forcats APIs."""
52 | 
53 | 
54 | @plugin.spec(result=_collect)
55 | def tidyr_api():
56 |     """What is implemented the tidyr APIs."""
57 | 
58 | 
59 | @plugin.spec(result=_collect)
60 | def misc_api():
61 |     """What is implemented the misc APIs."""
62 | 
63 | 
64 | @plugin.spec(result=SimplugResult.SINGLE)
65 | def c_getitem(item):
66 |     """Get item for c"""
67 | 
68 | 
69 | @plugin.spec(result=SimplugResult.SINGLE)
70 | def operate(op: str, x: Any, y: Any = None):
71 |     """Operate on x and y"""
72 | 


--------------------------------------------------------------------------------
/datar/core/utils.py:
--------------------------------------------------------------------------------
 1 | """Utilities for datar"""
 2 | import sys
 3 | import logging
 4 | from typing import Any, Callable
 5 | from contextlib import contextmanager
 6 | 
 7 | from .plugin import plugin
 8 | 
 9 | # logger
10 | logger = logging.getLogger("datar")
11 | logger.setLevel(logging.INFO)
12 | stream_handler = logging.StreamHandler(sys.stderr)
13 | stream_handler.setFormatter(
14 |     logging.Formatter(
15 |         "[%(asctime)s][%(name)s][%(levelname)7s] %(message)s",
16 |         datefmt="%Y-%m-%d %H:%M:%S",
17 |     )
18 | )
19 | logger.addHandler(stream_handler)
20 | 
21 | 
22 | class NotImplementedByCurrentBackendError(NotImplementedError):
23 |     """Raised when a function is not implemented by the current backend"""
24 | 
25 |     def __init__(self, func: str, data: Any = None) -> None:
26 |         data_msg = ""
27 |         if data is not None:
28 |             data_msg = f"data type: {type(data).__name__}, "
29 |         msg = (
30 |             f"'{func}' "
31 |             f"({data_msg}backends: "
32 |             f"{', '.join(plugin.get_enabled_plugin_names())})"
33 |         )
34 |         super().__init__(msg)
35 | 
36 | 
37 | class CollectionFunction:
38 |     """Enables c[1:3] to be interpreted as 1:3"""
39 | 
40 |     def __init__(self, c_func: Callable) -> None:
41 |         self.c = c_func
42 |         self.backend = None
43 | 
44 |     def __call__(self, *args, **kwargs):
45 |         kwargs["__ast_fallback"] = "normal"
46 |         return self.c(*args, **kwargs)
47 | 
48 |     @contextmanager
49 |     def with_backend(self, backend: str):
50 |         """Set the backend for c[]"""
51 |         _backend = self.backend
52 |         self.backend = backend
53 |         yield
54 |         self.backend = _backend
55 | 
56 |     def __getitem__(self, item):
57 |         """Allow c[1:3] to be interpreted as 1:3"""
58 |         return plugin.hooks.c_getitem(item, __plugin=self.backend)
59 | 
60 | 
61 | def arg_match(arg, argname, values, errmsg=None):
62 |     """Make sure arg is in one of the values.
63 | 
64 |     Mimics `rlang::arg_match`.
65 |     """
66 |     if not errmsg:
67 |         values = list(values)
68 |         errmsg = f"`{argname}` must be one of {values}."
69 |     if arg not in values:
70 |         raise ValueError(errmsg)
71 |     return arg
72 | 


--------------------------------------------------------------------------------
/datar/data/__init__.py:
--------------------------------------------------------------------------------
 1 | """Collects datasets from R-datasets, dplyr and tidyr packages"""
 2 | import functools
 3 | from typing import Any, List
 4 | 
 5 | from ..core.load_plugins import plugin
 6 | from .metadata import Metadata, metadata
 7 | 
 8 | 
 9 | # Should never do `from datar.data import *`
10 | __all__ = []  # type: List[str]
11 | 
12 | 
13 | def descr_datasets(*names: str):
14 |     """Get the information of the given datasets
15 | 
16 |     Args:
17 |         *names: Names of the datasets to get the information of.
18 |     """
19 |     return {
20 |         key: val
21 |         for key, val in metadata.items()
22 |         if key in names or not names
23 |     }
24 | 
25 | 
26 | def add_dataset(name: str, meta: Metadata):
27 |     """Add a dataset to the registry
28 | 
29 |     Args:
30 |         name: The name of the dataset
31 |         metadata: The metadata of the dataset
32 |     """
33 |     metadata[name] = meta
34 | 
35 | 
36 | @functools.lru_cache()
37 | def load_dataset(name: str, __backend: str = None) -> Any:
38 |     """Load the specific dataset"""
39 |     loaded = plugin.hooks.load_dataset(name, metadata, __plugin=__backend)
40 |     if loaded is None:
41 |         from ..core.utils import NotImplementedByCurrentBackendError
42 |         raise NotImplementedByCurrentBackendError(f"loading dataset '{name}'")
43 | 
44 |     return loaded
45 | 
46 | 
47 | def __getattr__(name: str):
48 |     # mkapi accesses quite a lot of attributes starting with _
49 |     if not name.isidentifier() or name.startswith("__"):  # pragma: no cover
50 |         raise AttributeError(name)
51 | 
52 |     return load_dataset(name.lower())
53 | 


--------------------------------------------------------------------------------
/datar/data/airlines.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/airlines.csv.gz


--------------------------------------------------------------------------------
/datar/data/airports.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/airports.csv.gz


--------------------------------------------------------------------------------
/datar/data/airquality.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/airquality.csv.gz


--------------------------------------------------------------------------------
/datar/data/anscombe.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/anscombe.csv.gz


--------------------------------------------------------------------------------
/datar/data/band_instruments.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/band_instruments.csv.gz


--------------------------------------------------------------------------------
/datar/data/band_instruments2.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/band_instruments2.csv.gz


--------------------------------------------------------------------------------
/datar/data/band_members.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/band_members.csv.gz


--------------------------------------------------------------------------------
/datar/data/billboard.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/billboard.csv.gz


--------------------------------------------------------------------------------
/datar/data/chickweight.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/chickweight.csv.gz


--------------------------------------------------------------------------------
/datar/data/cms_patient_care.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/cms_patient_care.csv.gz


--------------------------------------------------------------------------------
/datar/data/cms_patient_experience.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/cms_patient_experience.csv.gz


--------------------------------------------------------------------------------
/datar/data/construction.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/construction.csv.gz


--------------------------------------------------------------------------------
/datar/data/diamonds.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/diamonds.csv.gz


--------------------------------------------------------------------------------
/datar/data/economics.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/economics.csv.gz


--------------------------------------------------------------------------------
/datar/data/economics_long.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/economics_long.csv.gz


--------------------------------------------------------------------------------
/datar/data/faithful.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/faithful.csv.gz


--------------------------------------------------------------------------------
/datar/data/faithfuld.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/faithfuld.csv.gz


--------------------------------------------------------------------------------
/datar/data/fish_encounters.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/fish_encounters.csv.gz


--------------------------------------------------------------------------------
/datar/data/flights.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/flights.csv.gz


--------------------------------------------------------------------------------
/datar/data/gss_cat.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/gss_cat.csv.gz


--------------------------------------------------------------------------------
/datar/data/household.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/household.csv.gz


--------------------------------------------------------------------------------
/datar/data/iris.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/iris.csv.gz


--------------------------------------------------------------------------------
/datar/data/luv_colours.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/luv_colours.csv.gz


--------------------------------------------------------------------------------
/datar/data/midwest.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/midwest.csv.gz


--------------------------------------------------------------------------------
/datar/data/mpg.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/mpg.csv.gz


--------------------------------------------------------------------------------
/datar/data/msleep.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/msleep.csv.gz


--------------------------------------------------------------------------------
/datar/data/mtcars.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/mtcars.csv.gz


--------------------------------------------------------------------------------
/datar/data/planes.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/planes.csv.gz


--------------------------------------------------------------------------------
/datar/data/population.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/population.csv.gz


--------------------------------------------------------------------------------
/datar/data/presidential.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/presidential.csv.gz


--------------------------------------------------------------------------------
/datar/data/relig_income.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/relig_income.csv.gz


--------------------------------------------------------------------------------
/datar/data/seals.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/seals.csv.gz


--------------------------------------------------------------------------------
/datar/data/smiths.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/smiths.csv.gz


--------------------------------------------------------------------------------
/datar/data/starwars.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/starwars.csv.gz


--------------------------------------------------------------------------------
/datar/data/state_abb.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/state_abb.csv.gz


--------------------------------------------------------------------------------
/datar/data/state_division.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/state_division.csv.gz


--------------------------------------------------------------------------------
/datar/data/state_region.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/state_region.csv.gz


--------------------------------------------------------------------------------
/datar/data/storms.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/storms.csv.gz


--------------------------------------------------------------------------------
/datar/data/table1.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/table1.csv.gz


--------------------------------------------------------------------------------
/datar/data/table2.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/table2.csv.gz


--------------------------------------------------------------------------------
/datar/data/table3.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/table3.csv.gz


--------------------------------------------------------------------------------
/datar/data/table4a.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/table4a.csv.gz


--------------------------------------------------------------------------------
/datar/data/table4b.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/table4b.csv.gz


--------------------------------------------------------------------------------
/datar/data/table5.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/table5.csv.gz


--------------------------------------------------------------------------------
/datar/data/toothgrowth.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/toothgrowth.csv.gz


--------------------------------------------------------------------------------
/datar/data/txhousing.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/txhousing.csv.gz


--------------------------------------------------------------------------------
/datar/data/us_rent_income.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/us_rent_income.csv.gz


--------------------------------------------------------------------------------
/datar/data/warpbreaks.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/warpbreaks.csv.gz


--------------------------------------------------------------------------------
/datar/data/weather.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/weather.csv.gz


--------------------------------------------------------------------------------
/datar/data/who.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/who.csv.gz


--------------------------------------------------------------------------------
/datar/data/who2.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/who2.csv.gz


--------------------------------------------------------------------------------
/datar/data/world_bank_pop.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/world_bank_pop.csv.gz


--------------------------------------------------------------------------------
/datar/datasets.py:
--------------------------------------------------------------------------------
 1 | # pragma: no cover
 2 | import warnings
 3 | 
 4 | 
 5 | class DatasetsDeprecatedWarning(DeprecationWarning):
 6 |     ...
 7 | 
 8 | 
 9 | warnings.simplefilter("always", DatasetsDeprecatedWarning)
10 | 
11 | warnings.warn(
12 |     "Import data from `datar.datasets` is deprecated and "
13 |     "will be removed in the future. try `datar.data` instead.",
14 |     DatasetsDeprecatedWarning,
15 | )
16 | 
17 | 
18 | def __getattr__(name: str):
19 |     from . import data
20 |     return getattr(data, name)
21 | 


--------------------------------------------------------------------------------
/datar/dplyr.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .core.load_plugins import plugin as _plugin
 3 | from .core.options import get_option as _get_option
 4 | from .apis.dplyr import *
 5 | 
 6 | locals().update(_plugin.hooks.dplyr_api())
 7 | __all__ = [key for key in locals() if not key.startswith("_")]
 8 | _conflict_names = {"filter", "slice"}
 9 | 
10 | if _get_option("allow_conflict_names"):
11 |     __all__.extend(_conflict_names)
12 |     for name in _conflict_names:
13 |         locals()[name] = locals()[name + "_"]
14 | 
15 | 
16 | def __getattr__(name):
17 |     """Even when allow_conflict_names is False, datar.base.sum should be fine
18 |     """
19 |     if name in _conflict_names:
20 |         import sys
21 |         import ast
22 |         from executing import Source
23 |         node = Source.executing(sys._getframe(1)).node
24 |         if isinstance(node, (ast.Call, ast.Attribute)):
25 |             # import datar.dplyr as d
26 |             # d.sum(...)
27 |             return globals()[name + "_"]
28 | 
29 |     raise AttributeError
30 | 


--------------------------------------------------------------------------------
/datar/forcats.py:
--------------------------------------------------------------------------------
1 | 
2 | from .core.load_plugins import plugin as _plugin
3 | from .apis.forcats import *
4 | 
5 | locals().update(_plugin.hooks.forcats_api())
6 | 


--------------------------------------------------------------------------------
/datar/misc.py:
--------------------------------------------------------------------------------
1 | from .core.load_plugins import plugin as _plugin
2 | 
3 | locals().update(_plugin.hooks.misc_api())
4 | 


--------------------------------------------------------------------------------
/datar/tibble.py:
--------------------------------------------------------------------------------
1 | 
2 | from .core.load_plugins import plugin as _plugin
3 | from .apis.tibble import *
4 | 
5 | locals().update(_plugin.hooks.tibble_api())
6 | 


--------------------------------------------------------------------------------
/datar/tidyr.py:
--------------------------------------------------------------------------------
1 | 
2 | from .core.load_plugins import plugin as _plugin
3 | from .apis.tidyr import *
4 | 
5 | locals().update(_plugin.hooks.tidyr_api())
6 | 


--------------------------------------------------------------------------------
/docs/backends.md:
--------------------------------------------------------------------------------
 1 | # Backends
 2 | 
 3 | The `datar` package is a collection of APIs that are ported from a bunch of R packages. The APIs are implemented in a backend-agnostic way, so that they can be used with different backends. Currently, `datar` supports the following backends:
 4 | 
 5 | - [`numpy`](https://github.com/pwwang/datar-numpy): Mostly the implementations of functions from `datar.base`.
 6 | - [`pandas`](https://github.com/pwwang/datar-pandas): Implementations using `pandas` as backend.
 7 | 
 8 | ## Installation of a backend
 9 | 
10 | ```bash
11 | pip install -U datar[<pandas>]
12 | ```
13 | 
14 | ## Using desired backends
15 | 
16 | You can install multiple backends, but can use a subset of them.
17 | 
18 | ```python
19 | from datar import options
20 | 
21 | options(backends=['pandas'])
22 | 
23 | # Import the API functions then
24 | ```
25 | 
26 | ## Writing a backend
27 | 
28 | A backend is supposed to implement as a `Simplug` plugin. There are a hooks to be implemented.
29 | 
30 | ### Hooks
31 | 
32 | - `setup()`: calleed before any API is imported. You can do some setup here.
33 | - `get_versions()`: return a dict of versions of the dependencies of the backend. The keys are the names of the packages, and the values are the versions.
34 | - `load_dataset(name: str, metadata: Mapping)`: load a dataset, which can be loaded using `from datar.data import <dataset>`.
35 | - `base_api()`: load the implementation of `datar.apis.base`.
36 | - `dplyr_api()`: load the implementation of `datar.apis.dplyr`.
37 | - `tibble_api()`: load the implementation of `datar.apis.tibble`.
38 | - `forcats_api()`: load the implementation of `datar.apis.forcats`.
39 | - `tidyr_api()`: load the implementation of `datar.apis.tidyr`.
40 | - `other_api()`: load other backend-specific APIs.
41 | - `c_getitem(item)`: load the implementation of `datar.base.c.__getitem__` (`c[...]`).
42 | - `operate(op: str, x: Any, y: Any = None)`: load the implementation of the operators.
43 | 
44 | ## Seleting a backend at runtime
45 | 
46 | You can use `__backend` to select a backend at runtime.
47 | 
48 | ```python
49 | from datar.tibble import tibble
50 | 
51 | tibble(..., __backend="pandas")
52 | ```
53 | 
54 | ## Selecting a backend for operators
55 | 
56 | If you have multiple backends installed, you can select a backend for operators.
57 | 
58 | ```python
59 | from datar.core.operator import DatarOperator
60 | 
61 | DatarOperator.backend = "pandas"
62 | 
63 | # Or use the context manager
64 | with DatarOperator.with_backend("pandas"):
65 |     data >> mutate(z=f.x + f.y)
66 | ```
67 | 
68 | ## Selecting a backend for `c[]`
69 | 
70 | ```python
71 | from datar.base import c
72 | 
73 | c.backend = "pandas"
74 | 
75 | # Or use the context manager
76 | with c.with_backend("pandas"):
77 |     data >> mutate(z=c[1:3])
78 | ```
79 | 
80 | ## Selecting a backend for numpy ufuncs
81 | 
82 | ```python
83 | from datar.apis.other import array_ufunc
84 | 
85 | array_ufunc.backend = "pandas"
86 | 
87 | # Or use the context manager
88 | with array_ufunc.with_backend("pandas"):
89 |     data >> mutate(z=np.sin(f.x))
90 | ```
91 | 


--------------------------------------------------------------------------------
/docs/data.md:
--------------------------------------------------------------------------------
 1 | 
 2 | See full reference of datasets at: [reference-maps/data][1]
 3 | 
 4 | Datasets have to be imported individually by:
 5 | 
 6 | ```python
 7 | from datar.data import iris
 8 | 
 9 | # or
10 | from datar import data
11 | 
12 | iris = data.iris
13 | ```
14 | 
15 | To list all available datasets:
16 | 
17 | ```python
18 | from datar import data
19 | print(datasets.descr_datasets())
20 | ```
21 | 
22 | `file` shows the path to the csv file of the dataset, and `index` shows if it has index (rownames).
23 | 
24 | !!! Note
25 | 
26 |     The column names are altered by replace `.` to `_`. For example `Sepal.Width` to `Sepal_Width`.
27 | 
28 | !!! Note
29 | 
30 |     Dataset names are case-insensitive. So you can do:
31 | 
32 |     ```python
33 |     from datar.datasets import ToothGrowth
34 |     # or
35 |     from datar.datasets import toothgrowth
36 |     ```
37 | 
38 | See also [Backends][2] for implementations to loaad datasets.
39 | 
40 | [1]: ./reference-maps/data
41 | [2]: ./backends
42 | 


--------------------------------------------------------------------------------
/docs/f.md:
--------------------------------------------------------------------------------
 1 | ## Why `f`?
 2 | 
 3 | It is just fast for you to type, since usually, it is `.` right after `f`. Then you have your left hand and right hand working together sequentially.
 4 | 
 5 | ## The `Symbolic` object `f`
 6 | 
 7 | You can import it by `from datar import f`, or `from datar.all import *`
 8 | 
 9 | `f` is a universal `Symbolic` object, which does the magic to connect the expressions in verb arguments so that they can be delayed to execute.
10 | 
11 | There are different uses for the `f`.
12 | 
13 | - Use as a proxy to refer to dataframe columns (i.e. `f.x`, `f['x']`)
14 | - Use as the column name marker for `tribble`:
15 | 
16 |     ```python
17 |     tribble(
18 |         f.x, f.y
19 |         1,   2
20 |         3,   4
21 |     )
22 |     ```
23 | 
24 | !!! note
25 | 
26 |     If you want a sequence literal, other than using `base.seq()`, you can
27 |     also use `base.c[]`.
28 | 
29 |     For example,
30 |     ```python
31 |     from datar.base import c
32 |     from datar.tibble import tibble
33 |     df = tibble(x=c[1:5])  # 1, 2, 3, 4
34 |     ```
35 | 
36 | 
37 | ## If you don't like `f` ...
38 | 
39 | Sometimes if you have mixed verbs with piping and you want to distinguish to proxies for different verbs:
40 | 
41 | ```python
42 | # you can just replicate f with a different name
43 | g = f
44 | 
45 | df = tibble(x=1, y=2)
46 | df >> left_join(df >> group_by(f.x), by=g.y)
47 | ```
48 | 
49 | Or you can instantiate a new `Symbolic` object:
50 | ```python
51 | from pipda.symbolic import Symbolic
52 | 
53 | g = Symbolic()
54 | # assert f is g
55 | 
56 | # f and g make no difference in execution technically
57 | ```
58 | 
59 | You can also alias `f` by:
60 | ```python
61 | from datar import f as g
62 | ```
63 | 


--------------------------------------------------------------------------------
/docs/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/docs/favicon.png


--------------------------------------------------------------------------------
/docs/func_factory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/docs/func_factory.png


--------------------------------------------------------------------------------
/docs/import.md:
--------------------------------------------------------------------------------
 1 | ## Import submodule, verbs and functions from datar
 2 | 
 3 | You can import everything (all verbs and functions) from datar by:
 4 | ```python
 5 | from datar.all import *
 6 | ```
 7 | 
 8 | which is not recommended. Instead, you can import individual verbs or functions by:
 9 | ```python
10 | from datar.all import mutate
11 | ```
12 | 
13 | !!! Attention
14 | 
15 |     When you use `from datar.all import *`, you need to pay attention to the python builtin names that are covered by `datar` (will warn by default). For example, `slice` will be `datar.dplyr.slice` instead of `builtins.slice`. To refer to the builtin one, you need to:
16 |     ```python
17 |     import builtins
18 | 
19 |     s = builtins.slice(None, 3, None) # [:3]
20 |     ```
21 | 
22 | Or if you know the origin of the verb, you can also do:
23 | ```python
24 | from datar.dplyr import mutate
25 | ```
26 | 
27 | You can also keep the namespace:
28 | ```python
29 | from datar import dplyr
30 | 
31 | # df = tibble(x=1)
32 | # then use it with the dplyr namespace:
33 | df >> dplyr.mutate(y=2)
34 | ```
35 | 
36 | If you feel those namespaces are annoying, you can always use `datar.all`:
37 | ```python
38 | from datar.all import mutate
39 | ```
40 | 
41 | ## Import datasets from datar
42 | 
43 | !!! note
44 | 
45 |     Dataset has to be imported individually. This means  `from datar.datasets import *` won't work (you don't want all datasets to exhaust your memory).
46 | 
47 | You don't have to worry about other datasets to be imported and take up the memory when you import one. The dataset is only loaded into memory when you explictly import it individually.
48 | 
49 | See also [datasets](../datasets) for details about available datasets.
50 | 
51 | ## About python reserved names to be masked by `datar`
52 | 
53 | Sometimes it will be confusing especially when python builtin functions are overriden by `datar`. There are a couple of datar (`r-base`, `dplyr`) functions with the same name as python builtin functions. For example: `filter`, which is a python builtin function, but also a `dplyr` function. You should use `filter_` instead. By default, `datar` will raise an error when you try to import `filter`. You can set this option to `True` to allow this behavior.
54 | 


--------------------------------------------------------------------------------
/docs/notebooks/add_column.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "execution": {
  8 |      "iopub.execute_input": "2021-07-16T22:28:27.609283Z",
  9 |      "iopub.status.busy": "2021-07-16T22:28:27.607781Z",
 10 |      "iopub.status.idle": "2021-07-16T22:28:28.439771Z",
 11 |      "shell.execute_reply": "2021-07-16T22:28:28.440308Z"
 12 |     }
 13 |    },
 14 |    "outputs": [
 15 |     {
 16 |      "data": {
 17 |       "text/html": [
 18 |        "<div style=\"text-align: right; text-style: italic\">Try this notebook on <a target=\"_blank\" href=\"https://mybinder.org/v2/gh/pwwang/datar/dev?filepath=docs%2Fnotebooks%2Fadd_column.ipynb\">binder</a>.</div>"
 19 |       ],
 20 |       "text/plain": [
 21 |        "<IPython.core.display.HTML object>"
 22 |       ]
 23 |      },
 24 |      "metadata": {},
 25 |      "output_type": "display_data"
 26 |     },
 27 |     {
 28 |      "data": {
 29 |       "text/markdown": [
 30 |        "### <div style=\"background-color: #EEE; padding: 5px 0 8px 0\">★ add_column</div>"
 31 |       ],
 32 |       "text/plain": [
 33 |        "<IPython.core.display.Markdown object>"
 34 |       ]
 35 |      },
 36 |      "metadata": {},
 37 |      "output_type": "display_data"
 38 |     },
 39 |     {
 40 |      "data": {
 41 |       "text/markdown": [
 42 |        "##### Add one or more columns to an existing data frame.\n",
 43 |        "\n",
 44 |        "##### Args:\n",
 45 |        "&emsp;&emsp;`_data`: Data frame to append to  \n",
 46 |        "&emsp;&emsp;`*args`: and  \n",
 47 |        "&emsp;&emsp;`**kwargs`: Name-value pairs to add to the data frame  \n",
 48 |        "&emsp;&emsp;`_before`: and  \n",
 49 |        "&emsp;&emsp;`_after`: Column index or name where to add the new columns  \n",
 50 |        "&emsp;&emsp;&emsp;&emsp;(default to add after the last column)  \n",
 51 |        "\n",
 52 |        "&emsp;&emsp;`_dtypes`: The dtypes for the new columns, either a uniform dtype or a  \n",
 53 |        "&emsp;&emsp;&emsp;&emsp;dict of dtypes with keys the column names  \n",
 54 |        "\n",
 55 |        "##### Returns:\n",
 56 |        "&emsp;&emsp;The dataframe with the added columns  \n"
 57 |       ],
 58 |       "text/plain": [
 59 |        "<IPython.core.display.Markdown object>"
 60 |       ]
 61 |      },
 62 |      "metadata": {},
 63 |      "output_type": "display_data"
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "# https://tibble.tidyverse.org/reference/add_column.html\n",
 68 |     "\n",
 69 |     "from datar import f\n",
 70 |     "from datar.tibble import *\n",
 71 |     "from datar.base import seq\n",
 72 |     "from datar.core.names import NameNonUniqueError\n",
 73 |     "\n",
 74 |     "%run nb_helpers.py\n",
 75 |     "nb_header(add_column)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 2,
 81 |    "metadata": {
 82 |     "execution": {
 83 |      "iopub.execute_input": "2021-07-16T22:28:28.449685Z",
 84 |      "iopub.status.busy": "2021-07-16T22:28:28.449088Z",
 85 |      "iopub.status.idle": "2021-07-16T22:28:28.691135Z",
 86 |      "shell.execute_reply": "2021-07-16T22:28:28.691675Z"
 87 |     }
 88 |    },
 89 |    "outputs": [
 90 |     {
 91 |      "data": {
 92 |       "text/html": [
 93 |        "<div>\n",
 94 |        "<style scoped>\n",
 95 |        "    .dataframe tbody tr th:only-of-type {\n",
 96 |        "        vertical-align: middle;\n",
 97 |        "    }\n",
 98 |        "\n",
 99 |        "    .dataframe tbody tr th {\n",
100 |        "        vertical-align: top;\n",
101 |        "    }\n",
102 |        "\n",
103 |        "    .dataframe thead th {\n",
104 |        "        text-align: right;\n",
105 |        "    }\n",
106 |        "</style>\n",
107 |        "<table border=\"1\" class=\"dataframe\">\n",
108 |        "  <thead>\n",
109 |        "    <tr style=\"text-align: right;\">\n",
110 |        "      <th></th>\n",
111 |        "      <th>x</th>\n",
112 |        "      <th>y</th>\n",
113 |        "      <th>z</th>\n",
114 |        "      <th>w</th>\n",
115 |        "    </tr>\n",
116 |        "  </thead>\n",
117 |        "  <tbody>\n",
118 |        "    <tr>\n",
119 |        "      <th style=\"font-style: italic;\" ></th>\n",
120 |        "      <td style=\"font-style: italic;\" >&lt;int64&gt;</td>\n",
121 |        "      <td style=\"font-style: italic;\" >&lt;int64&gt;</td>\n",
122 |        "      <td style=\"font-style: italic;\" >&lt;int64&gt;</td>\n",
123 |        "      <td style=\"font-style: italic;\" >&lt;int64&gt;</td>\n",
124 |        "    </tr>\n",
125 |        "    <tr>\n",
126 |        "      <th>0</th>\n",
127 |        "      <td>1</td>\n",
128 |        "      <td>3</td>\n",
129 |        "      <td>-1</td>\n",
130 |        "      <td>0</td>\n",
131 |        "    </tr>\n",
132 |        "    <tr>\n",
133 |        "      <th>1</th>\n",
134 |        "      <td>2</td>\n",
135 |        "      <td>2</td>\n",
136 |        "      <td>0</td>\n",
137 |        "      <td>0</td>\n",
138 |        "    </tr>\n",
139 |        "    <tr>\n",
140 |        "      <th>2</th>\n",
141 |        "      <td>3</td>\n",
142 |        "      <td>1</td>\n",
143 |        "      <td>1</td>\n",
144 |        "      <td>0</td>\n",
145 |        "    </tr>\n",
146 |        "  </tbody>\n",
147 |        "</table>\n",
148 |        "</div>\n"
149 |       ],
150 |       "text/plain": [
151 |        "        x       y       z       w\n",
152 |        "  <int64> <int64> <int64> <int64>\n",
153 |        "0       1       3      -1       0\n",
154 |        "1       2       2       0       0\n",
155 |        "2       3       1       1       0"
156 |       ]
157 |      },
158 |      "execution_count": 2,
159 |      "metadata": {},
160 |      "output_type": "execute_result"
161 |     }
162 |    ],
163 |    "source": [
164 |     "df = tibble(x=seq(1,3), y=seq(3,1))\n",
165 |     "df >> add_column(z=seq(-1,1), w=0)"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 3,
171 |    "metadata": {
172 |     "execution": {
173 |      "iopub.execute_input": "2021-07-16T22:28:28.701403Z",
174 |      "iopub.status.busy": "2021-07-16T22:28:28.700765Z",
175 |      "iopub.status.idle": "2021-07-16T22:28:28.726181Z",
176 |      "shell.execute_reply": "2021-07-16T22:28:28.725600Z"
177 |     }
178 |    },
179 |    "outputs": [
180 |     {
181 |      "data": {
182 |       "text/html": [
183 |        "<div>\n",
184 |        "<style scoped>\n",
185 |        "    .dataframe tbody tr th:only-of-type {\n",
186 |        "        vertical-align: middle;\n",
187 |        "    }\n",
188 |        "\n",
189 |        "    .dataframe tbody tr th {\n",
190 |        "        vertical-align: top;\n",
191 |        "    }\n",
192 |        "\n",
193 |        "    .dataframe thead th {\n",
194 |        "        text-align: right;\n",
195 |        "    }\n",
196 |        "</style>\n",
197 |        "<table border=\"1\" class=\"dataframe\">\n",
198 |        "  <thead>\n",
199 |        "    <tr style=\"text-align: right;\">\n",
200 |        "      <th></th>\n",
201 |        "      <th>x</th>\n",
202 |        "      <th>z</th>\n",
203 |        "      <th>y</th>\n",
204 |        "    </tr>\n",
205 |        "  </thead>\n",
206 |        "  <tbody>\n",
207 |        "    <tr>\n",
208 |        "      <th style=\"font-style: italic;\" ></th>\n",
209 |        "      <td style=\"font-style: italic;\" >&lt;int64&gt;</td>\n",
210 |        "      <td style=\"font-style: italic;\" >&lt;int64&gt;</td>\n",
211 |        "      <td style=\"font-style: italic;\" >&lt;int64&gt;</td>\n",
212 |        "    </tr>\n",
213 |        "    <tr>\n",
214 |        "      <th>0</th>\n",
215 |        "      <td>1</td>\n",
216 |        "      <td>-1</td>\n",
217 |        "      <td>3</td>\n",
218 |        "    </tr>\n",
219 |        "    <tr>\n",
220 |        "      <th>1</th>\n",
221 |        "      <td>2</td>\n",
222 |        "      <td>0</td>\n",
223 |        "      <td>2</td>\n",
224 |        "    </tr>\n",
225 |        "    <tr>\n",
226 |        "      <th>2</th>\n",
227 |        "      <td>3</td>\n",
228 |        "      <td>1</td>\n",
229 |        "      <td>1</td>\n",
230 |        "    </tr>\n",
231 |        "  </tbody>\n",
232 |        "</table>\n",
233 |        "</div>\n"
234 |       ],
235 |       "text/plain": [
236 |        "        x       z       y\n",
237 |        "  <int64> <int64> <int64>\n",
238 |        "0       1      -1       3\n",
239 |        "1       2       0       2\n",
240 |        "2       3       1       1"
241 |       ]
242 |      },
243 |      "execution_count": 3,
244 |      "metadata": {},
245 |      "output_type": "execute_result"
246 |     }
247 |    ],
248 |    "source": [
249 |     "df >> add_column(z=seq(-1,1), _before=f.y)"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": 4,
255 |    "metadata": {
256 |     "execution": {
257 |      "iopub.execute_input": "2021-07-16T22:28:28.734549Z",
258 |      "iopub.status.busy": "2021-07-16T22:28:28.733777Z",
259 |      "iopub.status.idle": "2021-07-16T22:28:28.751592Z",
260 |      "shell.execute_reply": "2021-07-16T22:28:28.751990Z"
261 |     }
262 |    },
263 |    "outputs": [
264 |     {
265 |      "name": "stdout",
266 |      "output_type": "stream",
267 |      "text": [
268 |       "Names must be unique: x\n"
269 |      ]
270 |     }
271 |    ],
272 |    "source": [
273 |     "# You can't overwrite existing columns\n",
274 |     "try:\n",
275 |     "    df >> add_column(x = seq(4,6))\n",
276 |     "except NameNonUniqueError as err:\n",
277 |     "    print(err)"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 5,
283 |    "metadata": {
284 |     "execution": {
285 |      "iopub.execute_input": "2021-07-16T22:28:28.760324Z",
286 |      "iopub.status.busy": "2021-07-16T22:28:28.759646Z",
287 |      "iopub.status.idle": "2021-07-16T22:28:28.776413Z",
288 |      "shell.execute_reply": "2021-07-16T22:28:28.776819Z"
289 |     }
290 |    },
291 |    "outputs": [
292 |     {
293 |      "name": "stdout",
294 |      "output_type": "stream",
295 |      "text": [
296 |       "[ValueError] Value has incompatible index.\n"
297 |      ]
298 |     }
299 |    ],
300 |    "source": [
301 |     "# You can't create new observations\n",
302 |     "with try_catch():\n",
303 |     "    df >> add_column(z = seq(1,5))"
304 |    ]
305 |   }
306 |  ],
307 |  "metadata": {
308 |   "kernelspec": {
309 |    "display_name": "Python 3.9.5 ('base')",
310 |    "language": "python",
311 |    "name": "python3"
312 |   },
313 |   "language_info": {
314 |    "codemirror_mode": {
315 |     "name": "ipython",
316 |     "version": 3
317 |    },
318 |    "file_extension": ".py",
319 |    "mimetype": "text/x-python",
320 |    "name": "python",
321 |    "nbconvert_exporter": "python",
322 |    "pygments_lexer": "ipython3",
323 |    "version": "3.9.5"
324 |   },
325 |   "vscode": {
326 |    "interpreter": {
327 |     "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4"
328 |    }
329 |   }
330 |  },
331 |  "nbformat": 4,
332 |  "nbformat_minor": 2
333 | }
334 | 


--------------------------------------------------------------------------------
/docs/notebooks/coalesce.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "applicable-fault",
  7 |    "metadata": {
  8 |     "execution": {
  9 |      "iopub.execute_input": "2021-07-16T22:27:49.903448Z",
 10 |      "iopub.status.busy": "2021-07-16T22:27:49.902768Z",
 11 |      "iopub.status.idle": "2021-07-16T22:27:50.871433Z",
 12 |      "shell.execute_reply": "2021-07-16T22:27:50.871879Z"
 13 |     }
 14 |    },
 15 |    "outputs": [
 16 |     {
 17 |      "data": {
 18 |       "text/html": [
 19 |        "<div style=\"text-align: right; text-style: italic\">Try this notebook on <a target=\"_blank\" href=\"https://mybinder.org/v2/gh/pwwang/datar/dev?filepath=docs%2Fnotebooks%2Fcoalesce.ipynb\">binder</a>.</div>"
 20 |       ],
 21 |       "text/plain": [
 22 |        "<IPython.core.display.HTML object>"
 23 |       ]
 24 |      },
 25 |      "metadata": {},
 26 |      "output_type": "display_data"
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/markdown": [
 31 |        "### <div style=\"background-color: #EEE; padding: 5px 0 8px 0\">★ coalesce</div>"
 32 |       ],
 33 |       "text/plain": [
 34 |        "<IPython.core.display.Markdown object>"
 35 |       ]
 36 |      },
 37 |      "metadata": {},
 38 |      "output_type": "display_data"
 39 |     },
 40 |     {
 41 |      "data": {
 42 |       "text/markdown": [
 43 |        "##### Replace missing values with the first non-missing value\n",
 44 |        "\n",
 45 |        "The original API:  \n",
 46 |        "https://dplyr.tidyverse.org/reference/coalesce.html  \n",
 47 |        "\n",
 48 |        "##### Args:\n",
 49 |        "&emsp;&emsp;`x`: A vector  \n",
 50 |        "&emsp;&emsp;`*replace`: Values to replace missing values with.  \n",
 51 |        "\n",
 52 |        "##### Returns:\n",
 53 |        "&emsp;&emsp;An array of values  \n"
 54 |       ],
 55 |       "text/plain": [
 56 |        "<IPython.core.display.Markdown object>"
 57 |       ]
 58 |      },
 59 |      "metadata": {},
 60 |      "output_type": "display_data"
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "# https://dplyr.tidyverse.org/reference/coalesce.html\n",
 65 |     "%run nb_helpers.py\n",
 66 |     "\n",
 67 |     "from datar.all import *\n",
 68 |     "\n",
 69 |     "nb_header(coalesce)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 2,
 75 |    "id": "smoking-gilbert",
 76 |    "metadata": {
 77 |     "execution": {
 78 |      "iopub.execute_input": "2021-07-16T22:27:50.894245Z",
 79 |      "iopub.status.busy": "2021-07-16T22:27:50.893678Z",
 80 |      "iopub.status.idle": "2021-07-16T22:27:51.105408Z",
 81 |      "shell.execute_reply": "2021-07-16T22:27:51.103096Z"
 82 |     }
 83 |    },
 84 |    "outputs": [
 85 |     {
 86 |      "data": {
 87 |       "text/plain": [
 88 |        "0    5.0\n",
 89 |        "1    4.0\n",
 90 |        "2    3.0\n",
 91 |        "3    0.0\n",
 92 |        "4    2.0\n",
 93 |        "5    0.0\n",
 94 |        "6    1.0\n",
 95 |        "7    0.0\n",
 96 |        "Name: y, dtype: float64"
 97 |       ]
 98 |      },
 99 |      "execution_count": 2,
100 |      "metadata": {},
101 |      "output_type": "execute_result"
102 |     }
103 |    ],
104 |    "source": [
105 |     "df = tibble(x=[5,4,3,NA,2,NA,1,NA])\n",
106 |     "df >> mutate(y=coalesce(f.x, 0)) >> pull(f.y)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 3,
112 |    "id": "intense-liver",
113 |    "metadata": {
114 |     "execution": {
115 |      "iopub.execute_input": "2021-07-16T22:27:51.132360Z",
116 |      "iopub.status.busy": "2021-07-16T22:27:51.131696Z",
117 |      "iopub.status.idle": "2021-07-16T22:27:51.159635Z",
118 |      "shell.execute_reply": "2021-07-16T22:27:51.158647Z"
119 |     }
120 |    },
121 |    "outputs": [
122 |     {
123 |      "data": {
124 |       "text/plain": [
125 |        "0    1.0\n",
126 |        "1    2.0\n",
127 |        "2    3.0\n",
128 |        "3    4.0\n",
129 |        "4    5.0\n",
130 |        "Name: m, dtype: float64"
131 |       ]
132 |      },
133 |      "execution_count": 3,
134 |      "metadata": {},
135 |      "output_type": "execute_result"
136 |     }
137 |    ],
138 |    "source": [
139 |     "df = tibble(\n",
140 |     "    y=[1,2,NA,NA,5],\n",
141 |     "    z=[NA,NA,3,4,5]\n",
142 |     ")\n",
143 |     "df >> mutate(m=coalesce(f.y, f.z)) >> pull(f.m)"
144 |    ]
145 |   }
146 |  ],
147 |  "metadata": {
148 |   "kernelspec": {
149 |    "display_name": "Python 3.9.5 ('base')",
150 |    "language": "python",
151 |    "name": "python3"
152 |   },
153 |   "language_info": {
154 |    "codemirror_mode": {
155 |     "name": "ipython",
156 |     "version": 3
157 |    },
158 |    "file_extension": ".py",
159 |    "mimetype": "text/x-python",
160 |    "name": "python",
161 |    "nbconvert_exporter": "python",
162 |    "pygments_lexer": "ipython3",
163 |    "version": "3.9.5"
164 |   },
165 |   "vscode": {
166 |    "interpreter": {
167 |     "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4"
168 |    }
169 |   }
170 |  },
171 |  "nbformat": 4,
172 |  "nbformat_minor": 5
173 | }
174 | 


--------------------------------------------------------------------------------
/docs/notebooks/desc.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "important-empty",
  7 |    "metadata": {
  8 |     "execution": {
  9 |      "iopub.execute_input": "2021-07-16T22:27:38.744602Z",
 10 |      "iopub.status.busy": "2021-07-16T22:27:38.743026Z",
 11 |      "iopub.status.idle": "2021-07-16T22:27:39.602512Z",
 12 |      "shell.execute_reply": "2021-07-16T22:27:39.602933Z"
 13 |     }
 14 |    },
 15 |    "outputs": [
 16 |     {
 17 |      "data": {
 18 |       "text/html": [
 19 |        "<div style=\"text-align: right; text-style: italic\">Try this notebook on <a target=\"_blank\" href=\"https://mybinder.org/v2/gh/pwwang/datar/dev?filepath=docs%2Fnotebooks%2Fdesc.ipynb\">binder</a>.</div>"
 20 |       ],
 21 |       "text/plain": [
 22 |        "<IPython.core.display.HTML object>"
 23 |       ]
 24 |      },
 25 |      "metadata": {},
 26 |      "output_type": "display_data"
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/markdown": [
 31 |        "### <div style=\"background-color: #EEE; padding: 5px 0 8px 0\">★ desc</div>"
 32 |       ],
 33 |       "text/plain": [
 34 |        "<IPython.core.display.Markdown object>"
 35 |       ]
 36 |      },
 37 |      "metadata": {},
 38 |      "output_type": "display_data"
 39 |     },
 40 |     {
 41 |      "data": {
 42 |       "text/markdown": [
 43 |        "##### Transform a vector into a format that will be sorted in descending order\n",
 44 |        "\n",
 45 |        "This is useful within arrange().  \n",
 46 |        "\n",
 47 |        "The original API:  \n",
 48 |        "https://dplyr.tidyverse.org/reference/desc.html  \n",
 49 |        "\n",
 50 |        "##### Args:\n",
 51 |        "&emsp;&emsp;`x`: vector to transform  \n",
 52 |        "\n",
 53 |        "##### Returns:\n",
 54 |        "&emsp;&emsp;The descending order of x  \n"
 55 |       ],
 56 |       "text/plain": [
 57 |        "<IPython.core.display.Markdown object>"
 58 |       ]
 59 |      },
 60 |      "metadata": {},
 61 |      "output_type": "display_data"
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "%run nb_helpers.py\n",
 66 |     "from datar.base import factor, letters\n",
 67 |     "from datar.dplyr import desc\n",
 68 |     "\n",
 69 |     "nb_header(desc)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 2,
 75 |    "id": "equal-software",
 76 |    "metadata": {
 77 |     "execution": {
 78 |      "iopub.execute_input": "2021-07-16T22:27:39.613373Z",
 79 |      "iopub.status.busy": "2021-07-16T22:27:39.612755Z",
 80 |      "iopub.status.idle": "2021-07-16T22:27:39.620797Z",
 81 |      "shell.execute_reply": "2021-07-16T22:27:39.621622Z"
 82 |     }
 83 |    },
 84 |    "outputs": [
 85 |     {
 86 |      "data": {
 87 |       "text/plain": [
 88 |        "array([ -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9, -10])"
 89 |       ]
 90 |      },
 91 |      "execution_count": 2,
 92 |      "metadata": {},
 93 |      "output_type": "execute_result"
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "desc(range(1,11))"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 3,
103 |    "id": "delayed-lincoln",
104 |    "metadata": {
105 |     "execution": {
106 |      "iopub.execute_input": "2021-07-16T22:27:39.642189Z",
107 |      "iopub.status.busy": "2021-07-16T22:27:39.641582Z",
108 |      "iopub.status.idle": "2021-07-16T22:27:39.651348Z",
109 |      "shell.execute_reply": "2021-07-16T22:27:39.651772Z"
110 |     }
111 |    },
112 |    "outputs": [
113 |     {
114 |      "data": {
115 |       "text/plain": [
116 |        "array([ -0.,  -1.,  -2.,  -3.,  -4.,  -5.,  -6.,  -7.,  -8.,  -9., -10.,\n",
117 |        "       -11., -12., -13., -14., -15., -16., -17., -18., -19., -20., -21.,\n",
118 |        "       -22., -23., -24., -25.])"
119 |       ]
120 |      },
121 |      "execution_count": 3,
122 |      "metadata": {},
123 |      "output_type": "execute_result"
124 |     }
125 |    ],
126 |    "source": [
127 |     "desc(factor(letters))"
128 |    ]
129 |   }
130 |  ],
131 |  "metadata": {
132 |   "kernelspec": {
133 |    "display_name": "Python 3.9.5 ('base')",
134 |    "language": "python",
135 |    "name": "python3"
136 |   },
137 |   "language_info": {
138 |    "codemirror_mode": {
139 |     "name": "ipython",
140 |     "version": 3
141 |    },
142 |    "file_extension": ".py",
143 |    "mimetype": "text/x-python",
144 |    "name": "python",
145 |    "nbconvert_exporter": "python",
146 |    "pygments_lexer": "ipython3",
147 |    "version": "3.9.5"
148 |   },
149 |   "vscode": {
150 |    "interpreter": {
151 |     "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4"
152 |    }
153 |   }
154 |  },
155 |  "nbformat": 4,
156 |  "nbformat_minor": 5
157 | }
158 | 


--------------------------------------------------------------------------------
/docs/notebooks/forcats_fct_multi.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/html": [
 11 |        "<div style=\"text-align: right; text-style: italic\">Try this notebook on <a target=\"_blank\" href=\"https://mybinder.org/v2/gh/pwwang/datar/dev?filepath=docs%2Fnotebooks%2Fforcat_fct_multi.ipynb\">binder</a>.</div>"
 12 |       ],
 13 |       "text/plain": [
 14 |        "<IPython.core.display.HTML object>"
 15 |       ]
 16 |      },
 17 |      "metadata": {},
 18 |      "output_type": "display_data"
 19 |     },
 20 |     {
 21 |      "data": {
 22 |       "text/markdown": [
 23 |        "### <div style=\"background-color: #EEE; padding: 5px 0 8px 0\">★ fct_c</div>"
 24 |       ],
 25 |       "text/plain": [
 26 |        "<IPython.core.display.Markdown object>"
 27 |       ]
 28 |      },
 29 |      "metadata": {},
 30 |      "output_type": "display_data"
 31 |     },
 32 |     {
 33 |      "data": {
 34 |       "text/markdown": [
 35 |        "##### Concatenate factors, combining levels\n",
 36 |        "\n",
 37 |        "This is a useful ways of patching together factors from multiple sources  \n",
 38 |        "that really should have the same levels but don't.  \n",
 39 |        "\n",
 40 |        "##### Args:\n",
 41 |        "&emsp;&emsp;`*fs`: factors to concatenate  \n",
 42 |        "\n",
 43 |        "##### Returns:\n",
 44 |        "&emsp;&emsp;The concatenated factor  \n"
 45 |       ],
 46 |       "text/plain": [
 47 |        "<IPython.core.display.Markdown object>"
 48 |       ]
 49 |      },
 50 |      "metadata": {},
 51 |      "output_type": "display_data"
 52 |     },
 53 |     {
 54 |      "data": {
 55 |       "text/markdown": [
 56 |        "### <div style=\"background-color: #EEE; padding: 5px 0 8px 0\">★ fct_cross</div>"
 57 |       ],
 58 |       "text/plain": [
 59 |        "<IPython.core.display.Markdown object>"
 60 |       ]
 61 |      },
 62 |      "metadata": {},
 63 |      "output_type": "display_data"
 64 |     },
 65 |     {
 66 |      "data": {
 67 |       "text/markdown": [
 68 |        "##### Combine levels from two or more factors to create a new factor\n",
 69 |        "\n",
 70 |        "Computes a factor whose levels are all the combinations of  \n",
 71 |        "the levels of the input factors.  \n",
 72 |        "\n",
 73 |        "##### Args:\n",
 74 |        "&emsp;&emsp;`*fs`: factors to cross  \n",
 75 |        "&emsp;&emsp;`sep`: A string to separate levels  \n",
 76 |        "&emsp;&emsp;`keep_empty`: If True, keep combinations with no observations as levels  \n",
 77 |        "\n",
 78 |        "##### Returns:\n",
 79 |        "&emsp;&emsp;The new factor  \n"
 80 |       ],
 81 |       "text/plain": [
 82 |        "<IPython.core.display.Markdown object>"
 83 |       ]
 84 |      },
 85 |      "metadata": {},
 86 |      "output_type": "display_data"
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "%run nb_helpers.py\n",
 91 |     "from datar.all import *\n",
 92 |     "\n",
 93 |     "nb_header(\n",
 94 |     "    fct_c,\n",
 95 |     "    fct_cross,\n",
 96 |     "    book=\"forcat_fct_multi\",\n",
 97 |     ")\n"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "## fct_c"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 2,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "fa = factor(\"a\")\n",
114 |     "fb = factor(\"b\")\n",
115 |     "fab = factor(c(\"a\", \"b\"))\n",
116 |     "\n",
117 |     "# c(fa, fb, fab)\n",
118 |     "# convert factor to integer for `c`?"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 3,
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/plain": [
129 |        "['a', 'b', 'a', 'b']\n",
130 |        "Categories (2, object): ['a', 'b']"
131 |       ]
132 |      },
133 |      "execution_count": 3,
134 |      "metadata": {},
135 |      "output_type": "execute_result"
136 |     }
137 |    ],
138 |    "source": [
139 |     "fct_c(fa, fb, fab)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 4,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "data": {
149 |       "text/plain": [
150 |        "['a', 'b', 'a', 'b']\n",
151 |        "Categories (2, object): ['a', 'b']"
152 |       ]
153 |      },
154 |      "execution_count": 4,
155 |      "metadata": {},
156 |      "output_type": "execute_result"
157 |     }
158 |    ],
159 |    "source": [
160 |     "fs = [fa, fb, fab]\n",
161 |     "fct_c(*fs)"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "## fct_cross"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 5,
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "data": {
178 |       "text/plain": [
179 |        "['apple:green', 'kiwi:green', 'apple:red', 'apple:green']\n",
180 |        "Categories (3, object): ['apple:green', 'apple:red', 'kiwi:green']"
181 |       ]
182 |      },
183 |      "execution_count": 5,
184 |      "metadata": {},
185 |      "output_type": "execute_result"
186 |     }
187 |    ],
188 |    "source": [
189 |     "fruit = factor(c(\"apple\", \"kiwi\", \"apple\", \"apple\"))\n",
190 |     "colour = factor(c(\"green\", \"green\", \"red\", \"green\"))\n",
191 |     "eaten = c(\"yes\", \"no\", \"yes\", \"no\")\n",
192 |     "fct_cross(fruit, colour)"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 6,
198 |    "metadata": {},
199 |    "outputs": [
200 |     {
201 |      "data": {
202 |       "text/plain": [
203 |        "['apple:green:yes', 'kiwi:green:no', 'apple:red:yes', 'apple:green:no']\n",
204 |        "Categories (4, object): ['apple:green:no', 'apple:green:yes', 'apple:red:yes', 'kiwi:green:no']"
205 |       ]
206 |      },
207 |      "execution_count": 6,
208 |      "metadata": {},
209 |      "output_type": "execute_result"
210 |     }
211 |    ],
212 |    "source": [
213 |     "fct_cross(fruit, colour, eaten)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 7,
219 |    "metadata": {},
220 |    "outputs": [
221 |     {
222 |      "data": {
223 |       "text/plain": [
224 |        "['apple:green', 'kiwi:green', 'apple:red', 'apple:green']\n",
225 |        "Categories (4, object): ['apple:green', 'apple:red', 'kiwi:green', 'kiwi:red']"
226 |       ]
227 |      },
228 |      "execution_count": 7,
229 |      "metadata": {},
230 |      "output_type": "execute_result"
231 |     }
232 |    ],
233 |    "source": [
234 |     "fct_cross(fruit, colour, keep_empty = TRUE)"
235 |    ]
236 |   }
237 |  ],
238 |  "metadata": {
239 |   "kernelspec": {
240 |    "display_name": "Python 3.9.5 ('base')",
241 |    "language": "python",
242 |    "name": "python3"
243 |   },
244 |   "language_info": {
245 |    "codemirror_mode": {
246 |     "name": "ipython",
247 |     "version": 3
248 |    },
249 |    "file_extension": ".py",
250 |    "mimetype": "text/x-python",
251 |    "name": "python",
252 |    "nbconvert_exporter": "python",
253 |    "pygments_lexer": "ipython3",
254 |    "version": "3.9.5"
255 |   },
256 |   "orig_nbformat": 4,
257 |   "vscode": {
258 |    "interpreter": {
259 |     "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4"
260 |    }
261 |   }
262 |  },
263 |  "nbformat": 4,
264 |  "nbformat_minor": 2
265 | }
266 | 


--------------------------------------------------------------------------------
/docs/notebooks/full_seq.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "occasional-onion",
  7 |    "metadata": {
  8 |     "execution": {
  9 |      "iopub.execute_input": "2021-07-16T22:27:58.337680Z",
 10 |      "iopub.status.busy": "2021-07-16T22:27:58.336855Z",
 11 |      "iopub.status.idle": "2021-07-16T22:27:59.226466Z",
 12 |      "shell.execute_reply": "2021-07-16T22:27:59.226860Z"
 13 |     }
 14 |    },
 15 |    "outputs": [
 16 |     {
 17 |      "data": {
 18 |       "text/html": [
 19 |        "<div style=\"text-align: right; text-style: italic\">Try this notebook on <a target=\"_blank\" href=\"https://mybinder.org/v2/gh/pwwang/datar/dev?filepath=docs%2Fnotebooks%2Ffull_seq.ipynb\">binder</a>.</div>"
 20 |       ],
 21 |       "text/plain": [
 22 |        "<IPython.core.display.HTML object>"
 23 |       ]
 24 |      },
 25 |      "metadata": {},
 26 |      "output_type": "display_data"
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/markdown": [
 31 |        "### <div style=\"background-color: #EEE; padding: 5px 0 8px 0\">★ full_seq</div>"
 32 |       ],
 33 |       "text/plain": [
 34 |        "<IPython.core.display.Markdown object>"
 35 |       ]
 36 |      },
 37 |      "metadata": {},
 38 |      "output_type": "display_data"
 39 |     },
 40 |     {
 41 |      "data": {
 42 |       "text/markdown": [
 43 |        "##### Create the full sequence of values in a vector\n",
 44 |        "\n",
 45 |        "##### Args:\n",
 46 |        "&emsp;&emsp;`x`: A numeric vector.  \n",
 47 |        "&emsp;&emsp;`period`: Gap between each observation. The existing data will be  \n",
 48 |        "&emsp;&emsp;&emsp;&emsp;checked to ensure that it is actually of this periodicity.  \n",
 49 |        "\n",
 50 |        "&emsp;&emsp;`tol`: Numerical tolerance for checking periodicity.  \n",
 51 |        "\n",
 52 |        "##### Returns:\n",
 53 |        "&emsp;&emsp;The full sequence  \n"
 54 |       ],
 55 |       "text/plain": [
 56 |        "<IPython.core.display.Markdown object>"
 57 |       ]
 58 |      },
 59 |      "metadata": {},
 60 |      "output_type": "display_data"
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "# https://tidyr.tidyverse.org/reference/full_seq.html\n",
 65 |     "%run nb_helpers.py\n",
 66 |     "\n",
 67 |     "from datar.all import *\n",
 68 |     "\n",
 69 |     "nb_header(full_seq)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 2,
 75 |    "id": "convenient-professional",
 76 |    "metadata": {
 77 |     "execution": {
 78 |      "iopub.execute_input": "2021-07-16T22:27:59.233189Z",
 79 |      "iopub.status.busy": "2021-07-16T22:27:59.232551Z",
 80 |      "iopub.status.idle": "2021-07-16T22:27:59.245528Z",
 81 |      "shell.execute_reply": "2021-07-16T22:27:59.246036Z"
 82 |     }
 83 |    },
 84 |    "outputs": [
 85 |     {
 86 |      "data": {
 87 |       "text/plain": [
 88 |        "array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])"
 89 |       ]
 90 |      },
 91 |      "execution_count": 2,
 92 |      "metadata": {},
 93 |      "output_type": "execute_result"
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "full_seq(c(1, 2, 4, 5, 10), 1)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "id": "ad52e92c",
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": []
107 |   }
108 |  ],
109 |  "metadata": {
110 |   "kernelspec": {
111 |    "display_name": "Python 3.9.5 ('base')",
112 |    "language": "python",
113 |    "name": "python3"
114 |   },
115 |   "language_info": {
116 |    "codemirror_mode": {
117 |     "name": "ipython",
118 |     "version": 3
119 |    },
120 |    "file_extension": ".py",
121 |    "mimetype": "text/x-python",
122 |    "name": "python",
123 |    "nbconvert_exporter": "python",
124 |    "pygments_lexer": "ipython3",
125 |    "version": "3.9.5"
126 |   },
127 |   "vscode": {
128 |    "interpreter": {
129 |     "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4"
130 |    }
131 |   }
132 |  },
133 |  "nbformat": 4,
134 |  "nbformat_minor": 5
135 | }
136 | 


--------------------------------------------------------------------------------
/docs/notebooks/group_trim.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "9941c94b",
  7 |    "metadata": {
  8 |     "execution": {
  9 |      "iopub.execute_input": "2021-07-16T22:28:00.630674Z",
 10 |      "iopub.status.busy": "2021-07-16T22:28:00.630102Z",
 11 |      "iopub.status.idle": "2021-07-16T22:28:01.530300Z",
 12 |      "shell.execute_reply": "2021-07-16T22:28:01.530718Z"
 13 |     }
 14 |    },
 15 |    "outputs": [
 16 |     {
 17 |      "data": {
 18 |       "text/html": [
 19 |        "<div style=\"text-align: right; text-style: italic\">Try this notebook on <a target=\"_blank\" href=\"https://mybinder.org/v2/gh/pwwang/datar/dev?filepath=docs%2Fnotebooks%2Fgroup_trim.ipynb\">binder</a>.</div>"
 20 |       ],
 21 |       "text/plain": [
 22 |        "<IPython.core.display.HTML object>"
 23 |       ]
 24 |      },
 25 |      "metadata": {},
 26 |      "output_type": "display_data"
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/markdown": [
 31 |        "### <div style=\"background-color: #EEE; padding: 5px 0 8px 0\">★ group_trim</div>"
 32 |       ],
 33 |       "text/plain": [
 34 |        "<IPython.core.display.Markdown object>"
 35 |       ]
 36 |      },
 37 |      "metadata": {},
 38 |      "output_type": "display_data"
 39 |     },
 40 |     {
 41 |      "data": {
 42 |       "text/markdown": [
 43 |        "##### Remove empty groups\n",
 44 |        "\n",
 45 |        "The original API:  \n",
 46 |        "https://dplyr.tidyverse.org/reference/group_trim.html  \n",
 47 |        "\n",
 48 |        "##### Args:\n",
 49 |        "&emsp;&emsp;`_data`: A grouped frame  \n",
 50 |        "&emsp;&emsp;`_drop`: See `group_by`.  \n",
 51 |        "\n",
 52 |        "##### Returns:\n",
 53 |        "&emsp;&emsp;A grouped frame  \n"
 54 |       ],
 55 |       "text/plain": [
 56 |        "<IPython.core.display.Markdown object>"
 57 |       ]
 58 |      },
 59 |      "metadata": {},
 60 |      "output_type": "display_data"
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "# https://dplyr.tidyverse.org/reference/group_trim.html\n",
 65 |     "%run nb_helpers.py\n",
 66 |     "\n",
 67 |     "from datar.all import *\n",
 68 |     "\n",
 69 |     "nb_header(group_trim)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 3,
 75 |    "id": "d4c86c45",
 76 |    "metadata": {
 77 |     "execution": {
 78 |      "iopub.execute_input": "2021-07-16T22:28:01.592992Z",
 79 |      "iopub.status.busy": "2021-07-16T22:28:01.583766Z",
 80 |      "iopub.status.idle": "2021-07-16T22:28:01.685184Z",
 81 |      "shell.execute_reply": "2021-07-16T22:28:01.684381Z"
 82 |     }
 83 |    },
 84 |    "outputs": [
 85 |     {
 86 |      "data": {
 87 |       "text/html": [
 88 |        "<div>\n",
 89 |        "<style scoped>\n",
 90 |        "    .dataframe tbody tr th:only-of-type {\n",
 91 |        "        vertical-align: middle;\n",
 92 |        "    }\n",
 93 |        "\n",
 94 |        "    .dataframe tbody tr th {\n",
 95 |        "        vertical-align: top;\n",
 96 |        "    }\n",
 97 |        "\n",
 98 |        "    .dataframe thead th {\n",
 99 |        "        text-align: right;\n",
100 |        "    }\n",
101 |        "</style>\n",
102 |        "<table border=\"1\" class=\"dataframe\">\n",
103 |        "  <thead>\n",
104 |        "    <tr style=\"text-align: right;\">\n",
105 |        "      <th></th>\n",
106 |        "      <th>x</th>\n",
107 |        "      <th>_rows</th>\n",
108 |        "    </tr>\n",
109 |        "  </thead>\n",
110 |        "  <tbody>\n",
111 |        "    <tr>\n",
112 |        "      <th style=\"font-style: italic;\" ></th>\n",
113 |        "      <td style=\"font-style: italic;\" >&lt;category&gt;</td>\n",
114 |        "      <td style=\"font-style: italic;\" >&lt;object&gt;</td>\n",
115 |        "    </tr>\n",
116 |        "    <tr>\n",
117 |        "      <th>0</th>\n",
118 |        "      <td>a</td>\n",
119 |        "      <td>[0]</td>\n",
120 |        "    </tr>\n",
121 |        "    <tr>\n",
122 |        "      <th>1</th>\n",
123 |        "      <td>b</td>\n",
124 |        "      <td>[1]</td>\n",
125 |        "    </tr>\n",
126 |        "    <tr>\n",
127 |        "      <th>2</th>\n",
128 |        "      <td>c</td>\n",
129 |        "      <td>[]</td>\n",
130 |        "    </tr>\n",
131 |        "  </tbody>\n",
132 |        "</table>\n",
133 |        "</div>\n"
134 |       ],
135 |       "text/plain": [
136 |        "           x    _rows\n",
137 |        "  <category> <object>\n",
138 |        "0          a      [0]\n",
139 |        "1          b      [1]\n",
140 |        "2          c       []"
141 |       ]
142 |      },
143 |      "execution_count": 3,
144 |      "metadata": {},
145 |      "output_type": "execute_result"
146 |     }
147 |    ],
148 |    "source": [
149 |     "df = tibble(x=factor([\"a\", \"b\"], levels=list(\"abc\")))\n",
150 |     "df >> group_by(f.x, _drop=False) >> group_data()"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 4,
156 |    "id": "a11f49fc",
157 |    "metadata": {
158 |     "execution": {
159 |      "iopub.execute_input": "2021-07-16T22:28:01.749108Z",
160 |      "iopub.status.busy": "2021-07-16T22:28:01.742401Z",
161 |      "iopub.status.idle": "2021-07-16T22:28:01.861904Z",
162 |      "shell.execute_reply": "2021-07-16T22:28:01.862322Z"
163 |     }
164 |    },
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "text/html": [
169 |        "<div>\n",
170 |        "<style scoped>\n",
171 |        "    .dataframe tbody tr th:only-of-type {\n",
172 |        "        vertical-align: middle;\n",
173 |        "    }\n",
174 |        "\n",
175 |        "    .dataframe tbody tr th {\n",
176 |        "        vertical-align: top;\n",
177 |        "    }\n",
178 |        "\n",
179 |        "    .dataframe thead th {\n",
180 |        "        text-align: right;\n",
181 |        "    }\n",
182 |        "</style>\n",
183 |        "<table border=\"1\" class=\"dataframe\">\n",
184 |        "  <thead>\n",
185 |        "    <tr style=\"text-align: right;\">\n",
186 |        "      <th></th>\n",
187 |        "      <th>x</th>\n",
188 |        "      <th>_rows</th>\n",
189 |        "    </tr>\n",
190 |        "  </thead>\n",
191 |        "  <tbody>\n",
192 |        "    <tr>\n",
193 |        "      <th style=\"font-style: italic;\" ></th>\n",
194 |        "      <td style=\"font-style: italic;\" >&lt;category&gt;</td>\n",
195 |        "      <td style=\"font-style: italic;\" >&lt;object&gt;</td>\n",
196 |        "    </tr>\n",
197 |        "    <tr>\n",
198 |        "      <th>0</th>\n",
199 |        "      <td>a</td>\n",
200 |        "      <td>[0]</td>\n",
201 |        "    </tr>\n",
202 |        "    <tr>\n",
203 |        "      <th>1</th>\n",
204 |        "      <td>b</td>\n",
205 |        "      <td>[1]</td>\n",
206 |        "    </tr>\n",
207 |        "  </tbody>\n",
208 |        "</table>\n",
209 |        "</div>\n"
210 |       ],
211 |       "text/plain": [
212 |        "           x    _rows\n",
213 |        "  <category> <object>\n",
214 |        "0          a      [0]\n",
215 |        "1          b      [1]"
216 |       ]
217 |      },
218 |      "execution_count": 4,
219 |      "metadata": {},
220 |      "output_type": "execute_result"
221 |     }
222 |    ],
223 |    "source": [
224 |     "df >> group_by(f.x, _drop=False) >> group_trim() >> group_data()"
225 |    ]
226 |   }
227 |  ],
228 |  "metadata": {
229 |   "kernelspec": {
230 |    "display_name": "Python 3.9.5 ('base')",
231 |    "language": "python",
232 |    "name": "python3"
233 |   },
234 |   "language_info": {
235 |    "codemirror_mode": {
236 |     "name": "ipython",
237 |     "version": 3
238 |    },
239 |    "file_extension": ".py",
240 |    "mimetype": "text/x-python",
241 |    "name": "python",
242 |    "nbconvert_exporter": "python",
243 |    "pygments_lexer": "ipython3",
244 |    "version": "3.9.5"
245 |   },
246 |   "vscode": {
247 |    "interpreter": {
248 |     "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4"
249 |    }
250 |   }
251 |  },
252 |  "nbformat": 4,
253 |  "nbformat_minor": 5
254 | }
255 | 


--------------------------------------------------------------------------------
/docs/notebooks/n_distinct.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "several-cowboy",
  7 |    "metadata": {
  8 |     "execution": {
  9 |      "iopub.execute_input": "2021-07-16T22:28:07.196543Z",
 10 |      "iopub.status.busy": "2021-07-16T22:28:07.195916Z",
 11 |      "iopub.status.idle": "2021-07-16T22:28:08.127610Z",
 12 |      "shell.execute_reply": "2021-07-16T22:28:08.128233Z"
 13 |     }
 14 |    },
 15 |    "outputs": [
 16 |     {
 17 |      "data": {
 18 |       "text/html": [
 19 |        "<div style=\"text-align: right; text-style: italic\">Try this notebook on <a target=\"_blank\" href=\"https://mybinder.org/v2/gh/pwwang/datar/dev?filepath=docs%2Fnotebooks%2Fn_distinct.ipynb\">binder</a>.</div>"
 20 |       ],
 21 |       "text/plain": [
 22 |        "<IPython.core.display.HTML object>"
 23 |       ]
 24 |      },
 25 |      "metadata": {},
 26 |      "output_type": "display_data"
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/markdown": [
 31 |        "### <div style=\"background-color: #EEE; padding: 5px 0 8px 0\">★ sample</div>"
 32 |       ],
 33 |       "text/plain": [
 34 |        "<IPython.core.display.Markdown object>"
 35 |       ]
 36 |      },
 37 |      "metadata": {},
 38 |      "output_type": "display_data"
 39 |     },
 40 |     {
 41 |      "data": {
 42 |       "text/markdown": [
 43 |        "##### Sample a vector\n",
 44 |        "\n",
 45 |        "##### Args:\n",
 46 |        "&emsp;&emsp;`x`: a vector or scaler  \n",
 47 |        "&emsp;&emsp;`size`: the size of the sample  \n",
 48 |        "&emsp;&emsp;`replace`: whether to sample with replacement  \n",
 49 |        "&emsp;&emsp;`prob`: the probabilities of sampling each element  \n",
 50 |        "\n",
 51 |        "##### Returns:\n",
 52 |        "&emsp;&emsp;The sampled vector  \n"
 53 |       ],
 54 |       "text/plain": [
 55 |        "<IPython.core.display.Markdown object>"
 56 |       ]
 57 |      },
 58 |      "metadata": {},
 59 |      "output_type": "display_data"
 60 |     },
 61 |     {
 62 |      "data": {
 63 |       "text/markdown": [
 64 |        "### <div style=\"background-color: #EEE; padding: 5px 0 8px 0\">★ n_distinct</div>"
 65 |       ],
 66 |       "text/plain": [
 67 |        "<IPython.core.display.Markdown object>"
 68 |       ]
 69 |      },
 70 |      "metadata": {},
 71 |      "output_type": "display_data"
 72 |     },
 73 |     {
 74 |      "data": {
 75 |       "text/markdown": [
 76 |        "##### Count the number of distinct values\n",
 77 |        "\n",
 78 |        "The original API:  \n",
 79 |        "https://dplyr.tidyverse.org/reference/distinct.html  \n",
 80 |        "\n",
 81 |        "##### Args:\n",
 82 |        "&emsp;&emsp;`_data`: A data frame  \n",
 83 |        "&emsp;&emsp;`na_rm`: If `True`, remove missing values before counting.  \n",
 84 |        "\n",
 85 |        "##### Returns:\n",
 86 |        "&emsp;&emsp;The number of distinct values  \n"
 87 |       ],
 88 |       "text/plain": [
 89 |        "<IPython.core.display.Markdown object>"
 90 |       ]
 91 |      },
 92 |      "metadata": {},
 93 |      "output_type": "display_data"
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "# https://dplyr.tidyverse.org/reference/n_distinct.html\n",
 98 |     "%run nb_helpers.py\n",
 99 |     "\n",
100 |     "from datar.all import sample, n_distinct\n",
101 |     "\n",
102 |     "nb_header(sample, n_distinct, book='n_distinct')"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 2,
108 |    "id": "sharing-michigan",
109 |    "metadata": {
110 |     "execution": {
111 |      "iopub.execute_input": "2021-07-16T22:28:08.142741Z",
112 |      "iopub.status.busy": "2021-07-16T22:28:08.141395Z",
113 |      "iopub.status.idle": "2021-07-16T22:28:08.149693Z",
114 |      "shell.execute_reply": "2021-07-16T22:28:08.150142Z"
115 |     }
116 |    },
117 |    "outputs": [
118 |     {
119 |      "data": {
120 |       "text/plain": [
121 |        "100000"
122 |       ]
123 |      },
124 |      "execution_count": 2,
125 |      "metadata": {},
126 |      "output_type": "execute_result"
127 |     }
128 |    ],
129 |    "source": [
130 |     "x = sample(range(10), 1e5, replace=True)\n",
131 |     "len(x)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 3,
137 |    "id": "interested-store",
138 |    "metadata": {
139 |     "execution": {
140 |      "iopub.execute_input": "2021-07-16T22:28:08.215359Z",
141 |      "iopub.status.busy": "2021-07-16T22:28:08.214713Z",
142 |      "iopub.status.idle": "2021-07-16T22:28:08.311083Z",
143 |      "shell.execute_reply": "2021-07-16T22:28:08.310511Z"
144 |     }
145 |    },
146 |    "outputs": [
147 |     {
148 |      "data": {
149 |       "text/plain": [
150 |        "10"
151 |       ]
152 |      },
153 |      "execution_count": 3,
154 |      "metadata": {},
155 |      "output_type": "execute_result"
156 |     }
157 |    ],
158 |    "source": [
159 |     "n_distinct(x)"
160 |    ]
161 |   }
162 |  ],
163 |  "metadata": {
164 |   "kernelspec": {
165 |    "display_name": "Python 3.9.5 ('base')",
166 |    "language": "python",
167 |    "name": "python3"
168 |   },
169 |   "language_info": {
170 |    "codemirror_mode": {
171 |     "name": "ipython",
172 |     "version": 3
173 |    },
174 |    "file_extension": ".py",
175 |    "mimetype": "text/x-python",
176 |    "name": "python",
177 |    "nbconvert_exporter": "python",
178 |    "pygments_lexer": "ipython3",
179 |    "version": "3.9.5"
180 |   },
181 |   "vscode": {
182 |    "interpreter": {
183 |     "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4"
184 |    }
185 |   }
186 |  },
187 |  "nbformat": 4,
188 |  "nbformat_minor": 5
189 | }
190 | 


--------------------------------------------------------------------------------
/docs/notebooks/nb_helpers.py:
--------------------------------------------------------------------------------
 1 | """helpers for notebooks"""
 2 | from contextlib import contextmanager
 3 | 
 4 | from IPython.display import display, Markdown, HTML
 5 | from IPython.core.interactiveshell import InteractiveShell
 6 | import pardoc
 7 | from varname.helpers import debug  # noqa
 8 | from datar import options
 9 | 
10 | options(allow_conflict_names=True)
11 | 
12 | InteractiveShell.ast_node_interactivity = "all"
13 | 
14 | BINDER_URL = (
15 |     "https://mybinder.org/v2/gh/pwwang/datar/"
16 |     "dev?filepath=docs%2Fnotebooks%2F"
17 | )
18 | 
19 | 
20 | def nb_header(*funcs, book=None):
21 |     """Print the header of a notebooks, mostly the docs"""
22 |     if book is None:
23 |         book = funcs[0].__name__
24 |     display(
25 |         HTML(
26 |             '<div style="text-align: right; text-style: italic">'
27 |             'Try this notebook on '
28 |             f'<a target="_blank" href="{BINDER_URL}{book}.ipynb">'
29 |             "binder</a>.</div>"
30 |         )
31 |     )
32 | 
33 |     for func in funcs:
34 |         try:
35 |             parsed = pardoc.google_parser.parse(func.__doc__)
36 |             try:
37 |                 del parsed["Examples"]
38 |             except KeyError:
39 |                 pass
40 |         except Exception:
41 |             formatted = func.__doc__
42 |         else:
43 |             formatted = pardoc.google_parser.format(
44 |                 parsed,
45 |                 to="markdown",
46 |                 heading=5,
47 |                 indent_base="&emsp;&emsp;",
48 |             )
49 | 
50 |         display(Markdown(
51 |             f'{"#"*3} '
52 |             '<div style="background-color: #EEE; padding: 5px 0 8px 0">'
53 |             f'★ {func.__name__}'
54 |             '</div>')
55 |         )
56 |         display(Markdown(formatted))
57 | 
58 | 
59 | @contextmanager
60 | def try_catch():
61 |     """Catch the error and print it out"""
62 |     try:
63 |         yield
64 |     except Exception as exc:
65 |         print(f"[{type(exc).__name__}] {exc}")
66 | 


--------------------------------------------------------------------------------
/docs/notebooks/near.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "permanent-waters",
  7 |    "metadata": {
  8 |     "execution": {
  9 |      "iopub.execute_input": "2021-07-16T22:28:33.908144Z",
 10 |      "iopub.status.busy": "2021-07-16T22:28:33.907513Z",
 11 |      "iopub.status.idle": "2021-07-16T22:28:34.718530Z",
 12 |      "shell.execute_reply": "2021-07-16T22:28:34.718946Z"
 13 |     }
 14 |    },
 15 |    "outputs": [
 16 |     {
 17 |      "data": {
 18 |       "text/html": [
 19 |        "<div style=\"text-align: right; text-style: italic\">Try this notebook on <a target=\"_blank\" href=\"https://mybinder.org/v2/gh/pwwang/datar/dev?filepath=docs%2Fnotebooks%2Fnear.ipynb\">binder</a>.</div>"
 20 |       ],
 21 |       "text/plain": [
 22 |        "<IPython.core.display.HTML object>"
 23 |       ]
 24 |      },
 25 |      "metadata": {},
 26 |      "output_type": "display_data"
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/markdown": [
 31 |        "### <div style=\"background-color: #EEE; padding: 5px 0 8px 0\">★ near</div>"
 32 |       ],
 33 |       "text/plain": [
 34 |        "<IPython.core.display.Markdown object>"
 35 |       ]
 36 |      },
 37 |      "metadata": {},
 38 |      "output_type": "display_data"
 39 |     },
 40 |     {
 41 |      "data": {
 42 |       "text/markdown": [
 43 |        "##### Check if values are approximately equal\n",
 44 |        "\n",
 45 |        "The original API:  \n",
 46 |        "https://dplyr.tidyverse.org/reference/near.html  \n",
 47 |        "\n",
 48 |        "##### Args:\n",
 49 |        "&emsp;&emsp;`x`: A numeric vector  \n",
 50 |        "&emsp;&emsp;`y`: A numeric vector  \n",
 51 |        "&emsp;&emsp;`tol`: Tolerance  \n",
 52 |        "\n",
 53 |        "##### Returns:\n",
 54 |        "&emsp;&emsp;An array of boolean values  \n"
 55 |       ],
 56 |       "text/plain": [
 57 |        "<IPython.core.display.Markdown object>"
 58 |       ]
 59 |      },
 60 |      "metadata": {},
 61 |      "output_type": "display_data"
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "# https://dplyr.tidyverse.org/reference/near.html\n",
 66 |     "%run nb_helpers.py\n",
 67 |     "\n",
 68 |     "from datar.all import *\n",
 69 |     "\n",
 70 |     "nb_header(near)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 2,
 76 |    "id": "employed-supplier",
 77 |    "metadata": {
 78 |     "execution": {
 79 |      "iopub.execute_input": "2021-07-16T22:28:34.724636Z",
 80 |      "iopub.status.busy": "2021-07-16T22:28:34.723973Z",
 81 |      "iopub.status.idle": "2021-07-16T22:28:34.727483Z",
 82 |      "shell.execute_reply": "2021-07-16T22:28:34.727978Z"
 83 |     }
 84 |    },
 85 |    "outputs": [
 86 |     {
 87 |      "data": {
 88 |       "text/plain": [
 89 |        "False"
 90 |       ]
 91 |      },
 92 |      "execution_count": 2,
 93 |      "metadata": {},
 94 |      "output_type": "execute_result"
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "sqrt(2.0) ** 2.0 == 2.0"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 3,
104 |    "id": "black-decimal",
105 |    "metadata": {
106 |     "execution": {
107 |      "iopub.execute_input": "2021-07-16T22:28:34.734438Z",
108 |      "iopub.status.busy": "2021-07-16T22:28:34.733793Z",
109 |      "iopub.status.idle": "2021-07-16T22:28:34.736689Z",
110 |      "shell.execute_reply": "2021-07-16T22:28:34.737085Z"
111 |     }
112 |    },
113 |    "outputs": [
114 |     {
115 |      "data": {
116 |       "text/plain": [
117 |        "True"
118 |       ]
119 |      },
120 |      "execution_count": 3,
121 |      "metadata": {},
122 |      "output_type": "execute_result"
123 |     }
124 |    ],
125 |    "source": [
126 |     "near(sqrt(2.0) ** 2.0, 2.0)"
127 |    ]
128 |   }
129 |  ],
130 |  "metadata": {
131 |   "kernelspec": {
132 |    "display_name": "Python 3.9.5 ('base')",
133 |    "language": "python",
134 |    "name": "python3"
135 |   },
136 |   "language_info": {
137 |    "codemirror_mode": {
138 |     "name": "ipython",
139 |     "version": 3
140 |    },
141 |    "file_extension": ".py",
142 |    "mimetype": "text/x-python",
143 |    "name": "python",
144 |    "nbconvert_exporter": "python",
145 |    "pygments_lexer": "ipython3",
146 |    "version": "3.9.5"
147 |   },
148 |   "vscode": {
149 |    "interpreter": {
150 |     "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4"
151 |    }
152 |   }
153 |  },
154 |  "nbformat": 4,
155 |  "nbformat_minor": 5
156 | }
157 | 


--------------------------------------------------------------------------------
/docs/notebooks/nest-join.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "adverse-thesis",
  7 |    "metadata": {
  8 |     "execution": {
  9 |      "iopub.execute_input": "2021-07-16T22:28:21.040914Z",
 10 |      "iopub.status.busy": "2021-07-16T22:28:21.040207Z",
 11 |      "iopub.status.idle": "2021-07-16T22:28:22.128495Z",
 12 |      "shell.execute_reply": "2021-07-16T22:28:22.128914Z"
 13 |     }
 14 |    },
 15 |    "outputs": [
 16 |     {
 17 |      "data": {
 18 |       "text/html": [
 19 |        "<div style=\"text-align: right; text-style: italic\">Try this notebook on <a target=\"_blank\" href=\"https://mybinder.org/v2/gh/pwwang/datar/dev?filepath=docs%2Fnotebooks%2Fnest-join.ipynb\">binder</a>.</div>"
 20 |       ],
 21 |       "text/plain": [
 22 |        "<IPython.core.display.HTML object>"
 23 |       ]
 24 |      },
 25 |      "metadata": {},
 26 |      "output_type": "display_data"
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/markdown": [
 31 |        "### <div style=\"background-color: #EEE; padding: 5px 0 8px 0\">★ nest_join</div>"
 32 |       ],
 33 |       "text/plain": [
 34 |        "<IPython.core.display.Markdown object>"
 35 |       ]
 36 |      },
 37 |      "metadata": {},
 38 |      "output_type": "display_data"
 39 |     },
 40 |     {
 41 |      "data": {
 42 |       "text/markdown": [
 43 |        "##### Nest join two data frames by matching rows.\n",
 44 |        "\n",
 45 |        "The original API:  \n",
 46 |        "https://dplyr.tidyverse.org/reference/join.html  \n",
 47 |        "\n",
 48 |        "##### Args:\n",
 49 |        "&emsp;&emsp;`x`: A data frame  \n",
 50 |        "&emsp;&emsp;`y`: A data frame  \n",
 51 |        "&emsp;&emsp;`by`: A list of column names to join by.  \n",
 52 |        "&emsp;&emsp;&emsp;&emsp;If None, use the intersection of the columns of x and y.  \n",
 53 |        "\n",
 54 |        "&emsp;&emsp;`copy`: If True, always copy the data.  \n",
 55 |        "&emsp;&emsp;`keep`: If True, keep the grouping variables in the output.  \n",
 56 |        "&emsp;&emsp;`name`: The name of the column to store the nested data frame.  \n",
 57 |        "\n",
 58 |        "##### Returns:\n",
 59 |        "&emsp;&emsp;A data frame  \n"
 60 |       ],
 61 |       "text/plain": [
 62 |        "<IPython.core.display.Markdown object>"
 63 |       ]
 64 |      },
 65 |      "metadata": {},
 66 |      "output_type": "display_data"
 67 |     }
 68 |    ],
 69 |    "source": [
 70 |     "# https://dplyr.tidyverse.org/reference/nest_join.html\n",
 71 |     "%run nb_helpers.py\n",
 72 |     "\n",
 73 |     "from datar.data import band_members, band_instruments\n",
 74 |     "from datar.all import *\n",
 75 |     "\n",
 76 |     "nb_header(nest_join, book='nest-join')"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 2,
 82 |    "id": "green-continuity",
 83 |    "metadata": {
 84 |     "execution": {
 85 |      "iopub.execute_input": "2021-07-16T22:28:22.136012Z",
 86 |      "iopub.status.busy": "2021-07-16T22:28:22.135245Z",
 87 |      "iopub.status.idle": "2021-07-16T22:28:22.213886Z",
 88 |      "shell.execute_reply": "2021-07-16T22:28:22.214257Z"
 89 |     }
 90 |    },
 91 |    "outputs": [
 92 |     {
 93 |      "data": {
 94 |       "text/html": [
 95 |        "<div>\n",
 96 |        "<style scoped>\n",
 97 |        "    .dataframe tbody tr th:only-of-type {\n",
 98 |        "        vertical-align: middle;\n",
 99 |        "    }\n",
100 |        "\n",
101 |        "    .dataframe tbody tr th {\n",
102 |        "        vertical-align: top;\n",
103 |        "    }\n",
104 |        "\n",
105 |        "    .dataframe thead th {\n",
106 |        "        text-align: right;\n",
107 |        "    }\n",
108 |        "</style>\n",
109 |        "<table border=\"1\" class=\"dataframe\">\n",
110 |        "  <thead>\n",
111 |        "    <tr style=\"text-align: right;\">\n",
112 |        "      <th></th>\n",
113 |        "      <th>name</th>\n",
114 |        "      <th>band</th>\n",
115 |        "      <th>_y_joined</th>\n",
116 |        "    </tr>\n",
117 |        "  </thead>\n",
118 |        "  <tbody>\n",
119 |        "    <tr>\n",
120 |        "      <th style=\"font-style: italic;\" ></th>\n",
121 |        "      <td style=\"font-style: italic;\" >&lt;object&gt;</td>\n",
122 |        "      <td style=\"font-style: italic;\" >&lt;object&gt;</td>\n",
123 |        "      <td style=\"font-style: italic;\" >&lt;object&gt;</td>\n",
124 |        "    </tr>\n",
125 |        "    <tr>\n",
126 |        "      <th>0</th>\n",
127 |        "      <td>Mick</td>\n",
128 |        "      <td>Stones</td>\n",
129 |        "      <td>&lt;DF 0x1&gt;</td>\n",
130 |        "    </tr>\n",
131 |        "    <tr>\n",
132 |        "      <th>1</th>\n",
133 |        "      <td>John</td>\n",
134 |        "      <td>Beatles</td>\n",
135 |        "      <td>&lt;DF 1x1&gt;</td>\n",
136 |        "    </tr>\n",
137 |        "    <tr>\n",
138 |        "      <th>2</th>\n",
139 |        "      <td>Paul</td>\n",
140 |        "      <td>Beatles</td>\n",
141 |        "      <td>&lt;DF 1x1&gt;</td>\n",
142 |        "    </tr>\n",
143 |        "  </tbody>\n",
144 |        "</table>\n",
145 |        "</div>\n"
146 |       ],
147 |       "text/plain": [
148 |        "      name     band _y_joined\n",
149 |        "  <object> <object>  <object>\n",
150 |        "0     Mick   Stones  <DF 0x1>\n",
151 |        "1     John  Beatles  <DF 1x1>\n",
152 |        "2     Paul  Beatles  <DF 1x1>"
153 |       ]
154 |      },
155 |      "execution_count": 2,
156 |      "metadata": {},
157 |      "output_type": "execute_result"
158 |     }
159 |    ],
160 |    "source": [
161 |     "nested = band_members >> nest_join(band_instruments)\n",
162 |     "nested"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 3,
168 |    "id": "french-egyptian",
169 |    "metadata": {
170 |     "execution": {
171 |      "iopub.execute_input": "2021-07-16T22:28:22.228931Z",
172 |      "iopub.status.busy": "2021-07-16T22:28:22.228284Z",
173 |      "iopub.status.idle": "2021-07-16T22:28:22.238218Z",
174 |      "shell.execute_reply": "2021-07-16T22:28:22.237726Z"
175 |     }
176 |    },
177 |    "outputs": [
178 |     {
179 |      "data": {
180 |       "text/plain": [
181 |        "[Empty Tibble\n",
182 |        " Columns: [plays]\n",
183 |        " Index: [],\n",
184 |        "      plays\n",
185 |        "   <object>\n",
186 |        " 0   guitar]"
187 |       ]
188 |      },
189 |      "execution_count": 3,
190 |      "metadata": {},
191 |      "output_type": "execute_result"
192 |     }
193 |    ],
194 |    "source": [
195 |     "nested >> head(2) >> pull(f._y_joined, to='list')"
196 |    ]
197 |   }
198 |  ],
199 |  "metadata": {
200 |   "kernelspec": {
201 |    "display_name": "Python 3.9.5 ('base')",
202 |    "language": "python",
203 |    "name": "python3"
204 |   },
205 |   "language_info": {
206 |    "codemirror_mode": {
207 |     "name": "ipython",
208 |     "version": 3
209 |    },
210 |    "file_extension": ".py",
211 |    "mimetype": "text/x-python",
212 |    "name": "python",
213 |    "nbconvert_exporter": "python",
214 |    "pygments_lexer": "ipython3",
215 |    "version": "3.9.5"
216 |   },
217 |   "vscode": {
218 |    "interpreter": {
219 |     "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4"
220 |    }
221 |   }
222 |  },
223 |  "nbformat": 4,
224 |  "nbformat_minor": 5
225 | }
226 | 


--------------------------------------------------------------------------------
/docs/options.md:
--------------------------------------------------------------------------------
 1 | Options are used to change some behaviors in `datar`.
 2 | 
 3 | ## Available options
 4 | 
 5 | ### allow_conflict_names
 6 | 
 7 | Whether to allow conflict names that reversed by python. For example, `filter` is a python builtin function, but also a `dplyr` function. You should use `filter_` instead. By default, `datar` will raise an error when you try to import `filter`. You can set this option to `True` to allow this behavior.
 8 | 
 9 | ```python
10 | >>> from datar.all import filter
11 | >>> # or from datar.dplyr import filter
12 | Traceback (most recent call last):
13 |   File "<stdin>", line 1, in <module>
14 | ImportError: cannot import name 'filter' from 'datar.all'
15 | ```
16 | 
17 | ```python
18 | >>> from datar import options
19 | >>> options(allow_conflict_names=True)
20 | >>> from datar.all import filter
21 | >>> filter
22 | <function filter_ at 0x7f76b34c0940>
23 | ```
24 | 
25 | The conflict names under `datar.base` are:
26 | 
27 | - `min`
28 | - `max`
29 | - `sum`
30 | - `abs`
31 | - `round`
32 | - `all`
33 | - `any`
34 | - `re`
35 | 
36 | The conflict names under `datar.dplyr` are:
37 | 
38 | - `filter`
39 | - `slice`
40 | 
41 | ### backends
42 | 
43 | If you have multiple backends installed, you can use this option to specify which backends to use.
44 | 
45 | ## Configuration files
46 | 
47 | You can change the default behavior of datar by configuring a `.toml.toml` file in your home directory. For example, to always use underscore-suffixed names for conflicting names, you can add the following to your `~/.datar.toml` file:
48 | 
49 | ```toml
50 | allow_conflict_names = true
51 | ```
52 | 
53 | You can also have a project/directory-based configuration file (`./.datar.toml`) in your current working directory, which has higher priority than the home directory configuration file.
54 | 


--------------------------------------------------------------------------------
/docs/reference-maps/ALL.md:
--------------------------------------------------------------------------------
 1 | 
 2 | |Module|Description|Reference|
 3 | |-|-|-|
 4 | |`base`|APIs ported from `r-base/r-stats/r-utils`|[:octicons-cross-reference-16:][5]|
 5 | |#|#|#|
 6 | |`dplyr`|APIs ported from `tidyverse/dplyr`|[:octicons-cross-reference-16:][2]|
 7 | |`tidyr`|APIs ported from `tidyverse/tidyr`|[:octicons-cross-reference-16:][4]|
 8 | |`tibble`|APIs ported from `tidyverse/tibble`|[:octicons-cross-reference-16:][1]|
 9 | |`forcats`|APIs ported from `tidyverse/forcats`|[:octicons-cross-reference-16:][9]|
10 | |#|#|#|
11 | |`datasets`|Datasets collected from `tidyverse` or other related packages|[:octicons-cross-reference-16:][3]|
12 | |#|#|#|
13 | |`datar`|Datar-specific verbs/functions|[:octicons-cross-reference-16:][6]|
14 | 
15 | [1]: ../tibble
16 | [2]: ../dplyr
17 | [3]: ../datasets
18 | [4]: ../tidyr
19 | [5]: ../base
20 | [6]: ../datar
21 | [9]: ../forcats
22 | 


--------------------------------------------------------------------------------
/docs/reference-maps/datasets.md:
--------------------------------------------------------------------------------
  1 | <style>
  2 | .md-typeset__table {
  3 |    min-width: 100%;
  4 | }
  5 | 
  6 | .md-typeset table:not([class]) {
  7 |     display: table;
  8 |     max-width: 80%;
  9 | }
 10 | </style>
 11 | 
 12 | ## Reference of `datar.data`
 13 | 
 14 | |API|Description|Source|
 15 | |---|---|---:|
 16 | |`airlines`|translation between two letter carrier codes and names|[`r-nycflights13`][1]|
 17 | |`airports`|airport names and locations|[`r-nycflights13`][1]|
 18 | |`flights`|all flights that departed from NYC in 2013|[`r-nycflights13`][1]|
 19 | |`weather`|hourly meterological data for each airport|[`r-nycflights13`][1]|
 20 | |`planes`|construction information about each plane|[`r-nycflights13`][1]|
 21 | |#|#|#|
 22 | |`state_abb`|character vector of 2-letter abbreviations for the state names.|[`r-datasets-state`][15]|
 23 | |`state_division`|factor giving state divisions (New England, Middle Atlantic, South Atlantic, East South Central, West South Central, East North Central, West North Central, Mountain, and Pacific).|[`r-datasets-state`][15]|
 24 | |`state_region`|factor giving the region (Northeast, South, North Central, West) that each state belongs to.|[`r-datasets-state`][15]|
 25 | |#|#|#|
 26 | |`airquality`|Daily air quality measurements in New York, May to September 1973.|[`r-datasets-airquality`][2]|
 27 | |`anscombe`|Four x-y datasets which have the same traditional statistical properties|[`r-datasets-anscombe`][3]|
 28 | |`faithful`|Waiting time between eruptions and the duration of the eruption for the Old Faithful geyser in Yellowstone National Park, Wyoming, USA|[`r-datasets-faithful`][31]|
 29 | |`iris`|Edgar Anderson's Iris Data|[`r-datasets-iris`][9]|
 30 | |`mtcars`|Motor Trend Car Road Tests|[`r-datasets-mtcars`][10]|
 31 | |`warpbreaks`|The Number of Breaks in Yarn during Weaving|[`r-datasets-warpbreaks`][19]|
 32 | |`ToothGrowth`|The Effect of Vitamin C on Tooth Growth in Guinea Pigs|[`r-datasets-ToothGrowth`][21]|
 33 | |#|#|#|
 34 | |`band_instruments`|Band members of the Beatles and Rolling Stones|[`r-dplyr-band_members`][4]|
 35 | |`band_instruments2`|Band members of the Beatles and Rolling Stones|[`r-dplyr-band_members`][4]|
 36 | |`band_members`|Band members of the Beatles and Rolling Stones|[`r-dplyr-band_members`][4]|
 37 | |#|#|#|
 38 | |`table1`|Example tabular representations|[`r-dplyr-storms`][17]|
 39 | |`table2`|Example tabular representations|[`r-dplyr-storms`][17]|
 40 | |`table3`|Example tabular representations|[`r-dplyr-storms`][17]|
 41 | |`table4a`|Example tabular representations|[`r-dplyr-storms`][17]|
 42 | |`table4b`|Example tabular representations|[`r-dplyr-storms`][17]|
 43 | |`table5`|Example tabular representations|[`r-dplyr-storms`][17]|
 44 | |#|#|#|
 45 | |`starwars`|Starwars characters (columns `films`, `vehicles` and `starships` are not included)|[`r-dplyr-starwars`][14]|
 46 | |`storms`|This data is a subset of the NOAA Atlantic hurricane database best track data|[`r-dplyr-storms`][16]|
 47 | |`us_rent_income`|US rent and income data|[`r-dplyr-us_rent_income`][18]|
 48 | |`world_bank_pop`|Population data from the world bank|[`r-dplyr-world_bank_pop`][20]|
 49 | |#|#|#|
 50 | |`billboard`|Song rankings for Billboard top 100 in the year 2000|[`r-tidyr-billboard`][5]|
 51 | |`construction`|Completed construction in the US in 2018|[`r-tidyr-construction`][6]|
 52 | |`fish_encounters`|Information about fish swimming down a river|[`r-tidyr-fish_encounters`][8]|
 53 | |`population`|A subset of data from the World Health Organization Global Tuberculosis Report, and accompanying global populations.|[`r-tidyr-who`][11]|
 54 | |`relig_income`|Pew religion and income survey|[`r-tidyr-relig_income`][12]|
 55 | |`smiths`|A small demo dataset describing John and Mary Smith.|[`r-tidyr-smiths`][13]|
 56 | |`who`|A subset of data from the World Health Organization Global Tuberculosis Report, and accompanying global populations.|[`r-tidyr-who`][11]|
 57 | |#|#|#|
 58 | |`diamonds`|A dataset containing the prices and other attributes of almost 54,000 diamonds|[`r-ggplot2-diamonds`][7]|
 59 | |`economics` `economics_long`|US economic time series|[`r-ggplot2-economics`][22]|
 60 | |`faithfuld`|2d density estimate of Old Faithful data|[`r-ggplot2-faithfuld`][23]|
 61 | |`midwest`|Midwest demographics|[`r-ggplot2-midwest`][24]|
 62 | |`mpg`|Fuel economy data from 1999 to 2008 for 38 popular models of cars|[`r-ggplot2-mpg`][25]|
 63 | |`msleep`|An updated and expanded version of the mammals sleep dataset|[`r-ggplot2-msleep`][26]|
 64 | |`presidential`|Terms of 11 presidents from Eisenhower to Obama|[`r-ggplot2-presidential`][27]|
 65 | |`seals`|Vector field of seal movements|[`r-ggplot2-seals`][28]|
 66 | |`txhousing`|Housing sales in TX|[`r-ggplot2-txhousing`][29]|
 67 | |`luv_colours`|`colors()` in Luv space|[`r-ggplot2-luv_colours`][30]|
 68 | |#|#|
 69 | |`gss_cat`|A sample of categorical variables from the General Social survey|[`r-forcats-gss_cat`][32]|
 70 | 
 71 | [1]: https://github.com/tidyverse/nycflights13
 72 | [2]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/airquality
 73 | [3]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/anscombe
 74 | [4]: https://dplyr.tidyverse.org/reference/band_members.html
 75 | [5]: https://tidyr.tidyverse.org/reference/billboard.html
 76 | [6]: https://tidyr.tidyverse.org/reference/construction.html
 77 | [7]: https://ggplot2.tidyverse.org/reference/diamonds.html
 78 | [8]: https://tidyr.tidyverse.org/reference/fish_encounters.html
 79 | [9]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/iris
 80 | [10]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/mtcars
 81 | [11]: https://tidyr.tidyverse.org/reference/who.html
 82 | [12]: https://tidyr.tidyverse.org/reference/relig_income.html
 83 | [13]: https://tidyr.tidyverse.org/reference/smiths.html
 84 | [14]: https://dplyr.tidyverse.org/reference/starwars.html
 85 | [15]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/state
 86 | [16]: https://dplyr.tidyverse.org/reference/storms.html
 87 | [17]: https://tidyr.tidyverse.org/reference/table1.html
 88 | [18]: https://tidyr.tidyverse.org/reference/us_rent_income.html
 89 | [19]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/warpbreaks
 90 | [20]: https://tidyr.tidyverse.org/reference/world_bank_pop.html
 91 | [21]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/ToothGrowth
 92 | [22]: https://ggplot2.tidyverse.org/reference/economics.html
 93 | [23]: https://ggplot2.tidyverse.org/reference/faithfuld.html
 94 | [24]: https://ggplot2.tidyverse.org/reference/midwest.html
 95 | [25]: https://ggplot2.tidyverse.org/reference/mpg.html
 96 | [26]: https://ggplot2.tidyverse.org/reference/msleep.html
 97 | [27]: https://ggplot2.tidyverse.org/reference/presidential.html
 98 | [28]: https://ggplot2.tidyverse.org/reference/seals.html
 99 | [29]: https://ggplot2.tidyverse.org/reference/txhousing.html
100 | [30]: https://ggplot2.tidyverse.org/reference/luv_colours.html
101 | [31]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/faithfulZZ
102 | [32]: https://forcats.tidyverse.org/reference/gss_cat.html
103 | 


--------------------------------------------------------------------------------
/docs/reference-maps/forcats.md:
--------------------------------------------------------------------------------
  1 | <style>
  2 | .md-typeset__table {
  3 |    min-width: 100%;
  4 | }
  5 | 
  6 | .md-typeset table:not([class]) {
  7 |     display: table;
  8 |     max-width: 80%;
  9 | }
 10 | </style>
 11 | 
 12 | ## Reference of `datar.forcats`
 13 | 
 14 | Reference map of `r-tidyverse-forcats` can be found [here][1].
 15 | 
 16 | <u>**Legend:**</u>
 17 | 
 18 | |Sample|Status|
 19 | |---|---|
 20 | |[normal]()|API that is regularly ported|
 21 | |<s>[strike-through]()</s>|API that is not ported, or not an API originally|
 22 | |[**bold**]()|API that is unique in `datar`|
 23 | |[_italic_]()|Working in process|
 24 | 
 25 | ### Change order of levels
 26 | 
 27 | |API|Description|Notebook example|
 28 | |---|---|---:|
 29 | |[fct_relevel()][2]|Reorder factor levels by hand|[:material-notebook:][3]|
 30 | |[fct_inorder()][4] [fct_infreq()][5] [fct_inseq()][6]|Reorder factor levels by first appearance, frequency, or numeric order|[:material-notebook:][3]|
 31 | |[fct_reorder()][7] [fct_reorder2()][8] [last2()][9] [first2()][10]|Reorder factor levels by sorting along another variable|[:material-notebook:][3]|
 32 | |[fct_shuffle()][11]|Randomly permute factor levels|[:material-notebook:][3]|
 33 | |[fct_rev()][12]|Reverse order of factor levels|[:material-notebook:][3]|
 34 | |[fct_shift()][13]|Shift factor levels to left or right, wrapping around at end|[:material-notebook:][3]|
 35 | 
 36 | ### Change value of levels
 37 | 
 38 | |API|Description|Notebook example|
 39 | |---|---|---:|
 40 | |[fct_anon()][15]|Anonymise factor levels|[:material-notebook:][14]|
 41 | |[fct_collapse()][16]|Collapse factor levels into manually defined groups|[:material-notebook:][14]|
 42 | |[fct_lump()][17] [fct_lump_min()][18] [fct_lump_prop()][19] [fct_lump_n()][20] [fct_lump_lowfreq()][41]|Lump together actor levels into "other"|[:material-notebook:][14]|
 43 | |[fct_other()][21]|Replace levels with "other"|[:material-notebook:][14]|
 44 | |[fct_recode()][22]|Change factor levels by hand|[:material-notebook:][14]|
 45 | |[fct_relabel()][23]|Automatically relabel factor levels, collapse as necessary|[:material-notebook:][14]|
 46 | 
 47 | ### Add/remove levels
 48 | 
 49 | |API|Description|Notebook example|
 50 | |---|---|---:|
 51 | |[fct_expand()][25]|Add additional levels to a factor|[:material-notebook:][24]|
 52 | |[fct_explicit_na()][26]|Make missing values explicit||[:material-notebook:][24]|
 53 | |[fct_drop()][27]|Drop unused levels||[:material-notebook:][24]|
 54 | |[fct_unify()][28]|Unify the levels in a list of factors||[:material-notebook:][24]|
 55 | 
 56 | ### Combine multiple factors
 57 | 
 58 | |API|Description|Notebook example|
 59 | |---|---|---:|
 60 | |[fct_c()][29]|Concatenate factors, combining levels|[:material-notebook:][31]|
 61 | |[fct_cross()][30]|Combine levels from two or more factors to create a new factor|[:material-notebook:][31]|
 62 | 
 63 | ### Other helpers
 64 | 
 65 | |API|Description|Notebook example|
 66 | |---|---|---:|
 67 | |[as_factor()][33]|Convert input to a factor|[:material-notebook:][32]|
 68 | |[fct_count()][34]|Count entries in a factor|[:material-notebook:][32]|
 69 | |[fct_match()][35]|Test for presence of levels in a factor|[:material-notebook:][32]|
 70 | |[fct_unique()][36]|Unique values of a factor|[:material-notebook:][32]|
 71 | |[lvls_reorder()][37] [lvls_revalue()][38] [lvls_expand()][39]|Low-level functions for manipulating levels|[:material-notebook:][32]|
 72 | |[lvls_union()][40]|Find all levels in a list of factors|[:material-notebook:][32]|
 73 | 
 74 | [1]: https://forcats.tidyverse.org/reference/index.html
 75 | [2]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_relevel
 76 | [3]: ../../notebooks/forcats_lvl_order
 77 | [4]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_inorder
 78 | [5]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_infreq
 79 | [6]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_inseq
 80 | [7]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_reorder
 81 | [8]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_reorder2
 82 | [9]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.last2
 83 | [10]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.first2
 84 | [11]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_shuffle
 85 | [12]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_rev
 86 | [13]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_shift
 87 | [14]: ../../notebooks/forcats_lvl_value
 88 | [15]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_relevel
 89 | [16]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_relevel
 90 | [17]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_lump
 91 | [18]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_lump_min
 92 | [19]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_lump_prop
 93 | [20]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_lump_n
 94 | [21]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_other
 95 | [22]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_recode
 96 | [23]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_relabel
 97 | [24]: ../../notebooks/forcats_lvl_addrm
 98 | [25]: ../../api/datar.forcats.lvl_addrm/#datar.tidyr.lvl_addrm.fct_expand
 99 | [26]: ../../api/datar.forcats.lvl_addrm/#datar.tidyr.lvl_addrm.fct_explicit_na
100 | [27]: ../../api/datar.forcats.lvl_addrm/#datar.tidyr.lvl_addrm.fct_drop
101 | [28]: ../../api/datar.forcats.lvl_addrm/#datar.tidyr.lvl_addrm.fct_unify
102 | [29]: ../../api/datar.forcats.fct_multi/#datar.tidyr.fct_multi.fct_c
103 | [30]: ../../api/datar.forcats.fct_multi/#datar.tidyr.fct_multi.fct_cross
104 | [31]: ../../notebooks/forcats_fct_multi
105 | [32]: ../../notebooks/forcats_misc
106 | [33]: ../../api/datar.forcats.misc/#datar.tidyr.misc.as_factor
107 | [34]: ../../api/datar.forcats.misc/#datar.tidyr.misc.fct_count
108 | [35]: ../../api/datar.forcats.misc/#datar.tidyr.misc.fct_match
109 | [36]: ../../api/datar.forcats.misc/#datar.tidyr.misc.fct_unique
110 | [37]: ../../api/datar.forcats.misc/#datar.tidyr.misc.lvls_reorder
111 | [38]: ../../api/datar.forcats.misc/#datar.tidyr.misc.lvls_revalue
112 | [39]: ../../api/datar.forcats.misc/#datar.tidyr.misc.lvls_expand
113 | [40]: ../../api/datar.forcats.misc/#datar.tidyr.misc.lvls_union
114 | [41]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_lump_lowfreq
115 | 


--------------------------------------------------------------------------------
/docs/reference-maps/other.md:
--------------------------------------------------------------------------------
 1 | <style>
 2 | .md-typeset__table {
 3 |    min-width: 100%;
 4 | }
 5 | 
 6 | .md-typeset table:not([class]) {
 7 |     display: table;
 8 |     max-width: 80%;
 9 | }
10 | </style>
11 | 
12 | ## Reference of `datar.datar`
13 | 
14 | <u>**Legend:**</u>
15 | 
16 | |Sample|Status|
17 | |---|---|
18 | |[normal]()|API that is regularly ported|
19 | |<s>[strike-through]()</s>|API that is not ported, or not an API originally|
20 | |[**bold**]()|API that is unique in `datar`|
21 | |[_italic_]()|Working in process|
22 | 
23 | ### Verbs
24 | 
25 | |API|Description|Notebook example|
26 | |---|---|---:|
27 | |[**`get()`**][2]|Extract values from data frames|[:material-notebook:][1]|
28 | |[**`flatten()`**][2]|Flatten values of data frames|[:material-notebook:][1]|
29 | 
30 | ### Functions
31 | 
32 | |[**`itemgetter()`**][3]|Turn `a[f.x]` to a valid verb argument with `itemgetter(a, f.x)`|[:material-notebook:][1]|
33 | |[**`attrgetter()`**][4]|`f.x.<attr>` but works with `SeriesGroupBy` object|[:material-notebook:][1]|
34 | |[**`pd_str()`**][4]|`str` accessor but works with `SeriesGroupBy` object|[:material-notebook:][1]|
35 | |[**`pd_cat()`**][4]|`cat` accessor but works with `SeriesGroupBy` object|[:material-notebook:][1]|
36 | |[**`pd_dt()`**][4]|`dt` accessor but works with `SeriesGroupBy` object|[:material-notebook:][1]|
37 | 
38 | 
39 | [1]: ../../notebooks/datar
40 | [2]: ../../api/datar.datar.verbs/#datar.datar.verbs.get
41 | [3]: ../../api/datar.datar.verbs/#datar.datar.verbs.flatten
42 | [4]: ../../api/datar.datar.funcs/#datar.datar.funcs.itemgetter
43 | [5]: ../../api/datar.datar.funcs/#datar.datar.funcs.attrgetter
44 | [6]: ../../api/datar.datar.funcs/#datar.datar.funcs.pd_str
45 | [7]: ../../api/datar.datar.funcs/#datar.datar.funcs.pd_cat
46 | [8]: ../../api/datar.datar.funcs/#datar.datar.funcs.pd_dt
47 | 


--------------------------------------------------------------------------------
/docs/reference-maps/stats.md:
--------------------------------------------------------------------------------
 1 | <style>
 2 | .md-typeset__table {
 3 |    min-width: 100%;
 4 | }
 5 | 
 6 | .md-typeset table:not([class]) {
 7 |     display: table;
 8 |     max-width: 80%;
 9 | }
10 | </style>
11 | 
12 | ## Reference of `datar.datar`
13 | 
14 | <u>**Legend:**</u>
15 | 
16 | |Sample|Status|
17 | |---|---|
18 | |[normal]()|API that is regularly ported|
19 | |<s>[strike-through]()</s>|API that is not ported, or not an API originally|
20 | |[**bold**]()|API that is unique in `datar`|
21 | |[_italic_]()|Working in process|
22 | 
23 | ### Stats
24 | 
25 | |API|Description|Notebook example|
26 | |---|---|---:|
27 | |[`rnorm()`][1]|Generates random deviates for the normal distribution||
28 | |[`rpois()`][2]|Generates random deviates for the Poisson distribution||
29 | |[`runif()`][3]|Generates random deviates for the uniform distribution||
30 | 
31 | 
32 | [1]: ../../api/datar.base.stats/#datar.base.stats.rnorm
33 | [2]: ../../api/datar.base.stats/#datar.base.stats.rpois
34 | [3]: ../../api/datar.base.stats/#datar.base.stats.runif
35 | 


--------------------------------------------------------------------------------
/docs/reference-maps/tibble.md:
--------------------------------------------------------------------------------
 1 | <style>
 2 | .md-typeset__table {
 3 |    min-width: 100%;
 4 | }
 5 | 
 6 | .md-typeset table:not([class]) {
 7 |     display: table;
 8 |     max-width: 80%;
 9 | }
10 | </style>
11 | 
12 | ## Reference of `datar.tibble`
13 | 
14 | Reference map of `r-tidyverse-tibble` can be found [here][1].
15 | 
16 | <u>**Legend:**</u>
17 | 
18 | |Sample|Status|
19 | |---|---|
20 | |[normal]()|API that is regularly ported|
21 | |<s>[strike-through]()</s>|API that is not ported, or not an API originally|
22 | |[**bold**]()|API that is unique in `datar`|
23 | |_italic_|Working in process|
24 | 
25 | 
26 | ### Tibbles
27 | 
28 | !!! Tip
29 | 
30 |     Tibbles in `datar` are just `pandas.DataFrame`s. So there is no difference between data frames created by `tibble()` and `pandas.DataFrame`, unlike in R, `tibble` and `data.frame`.
31 | 
32 |     Also note that tibbles in `datar` are not `rownames`/`index` aware for most APIs, just like most `tidyverse` APIs.
33 | 
34 | |API|Description|Notebook example|
35 | |---|---|---:|
36 | |<s>`tibble-package`</s>|||
37 | |[`tibble()`][12] [`tibble_row()`][18]|Build a data frame| [:material-notebook:][2] |
38 | |[`fibble()`][13]|Same as `tibble()` but used as Verb arguments| [:material-notebook:][2] |
39 | |<s>`tbl_df-class`</s>|||
40 | |<s>`print(<tbl_df>)`</s> <s>`format(<tbl_df>)`</s>|||
41 | |[`tribble()`][3]|Row-wise tibble creation|[:material-notebook:][2]|
42 | 
43 | ### Coercion
44 | 
45 | |API|Description|Notebook example|
46 | |---|---|---:|
47 | |<s>`is_tibble()`</s>|||
48 | |[`as_tibble()`][19]|Convert data frames into datar's tibbles||
49 | |<s>`new_tibble()`</s> <s>`validate_tibble()`</s>|||
50 | |[`enframe()`][4] [`deframe()`][14]|Converting iterables to data frames, and vice versa| [:material-notebook:][5]|
51 | 
52 | ### Manipulation
53 | 
54 | |API|Description|Notebook example|
55 | |---|---|---:|
56 | |<s>`$` `[[` `[`</s>|Please subset data frames using `pandas` syntax (`df.col`, `df['col']`, `df.loc[...]` or `df.iloc[...]`|
57 | |[`add_row()`][6]| Add rows to a data frame | [:material-notebook:][7] |
58 | |[`add_column()`][8]| Add columns to a data frame | [:material-notebook:][9] |
59 | 
60 | ### Helpers
61 | 
62 | |API|Description|Notebook example|
63 | |---|---|---:|
64 | |<s>`reexports`</s>|||
65 | |[`has_rownames()`/`has_index()`][10] [`remove_rownames()`/`remove_index()`/`drop_index()`][15] [`rownames_to_column()`/`index_to_column()`][16] [`rowid_to_column()` `column_to_rownames()`/`column_to_index()`][17]|Tools for working with row names/DataFrame indexes|[:material-notebook:][11]|
66 | |<s>`view()`</s>|||
67 | 
68 | ### <s>Vectors, matrices, and lists</s>
69 | 
70 | 
71 | [1]: https://tibble.tidyverse.org/reference/index.html
72 | [2]: ../../notebooks/tibble
73 | [3]: ../../api/datar.tibble.tibble/#datar.tibble.tibble.tribble
74 | [4]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.enframe
75 | [5]: ../../notebooks/enframe
76 | [6]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.add_row
77 | [7]: ../../notebooks/add_row
78 | [8]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.add_column
79 | [9]: ../../notebooks/add_column
80 | [10]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.has_rownames
81 | [11]: ../../notebooks/rownames
82 | [12]: ../../api/datar.tibble.tibble/#datar.tibble.tibble.tibble
83 | [13]: ../../api/datar.tibble.tibble/#datar.tibble.tibble.fibble
84 | [14]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.deframe
85 | [15]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.remove_rownames
86 | [16]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.rownames_to_column
87 | [17]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.rowid_to_column
88 | [18]: ../../api/datar.tibble.tibble/#datar.tibble.tibble.tibble_row
89 | [19]: ../../api/datar.tibble.tibble/#datar.tibble.tibble.as_tibble
90 | 


--------------------------------------------------------------------------------
/docs/reference-maps/tidyr.md:
--------------------------------------------------------------------------------
  1 | <style>
  2 | .md-typeset__table {
  3 |    min-width: 100%;
  4 | }
  5 | 
  6 | .md-typeset table:not([class]) {
  7 |     display: table;
  8 |     max-width: 80%;
  9 | }
 10 | </style>
 11 | 
 12 | ## Reference of `datar.dplyr`
 13 | 
 14 | Reference map of `r-tidyverse-tidyr` can be found [here][1].
 15 | 
 16 | <u>**Legend:**</u>
 17 | 
 18 | |Sample|Status|
 19 | |---|---|
 20 | |[normal]()|API that is regularly ported|
 21 | |<s>[strike-through]()</s>|API that is not ported, or not an API originally|
 22 | |[**bold**]()|API that is unique in `datar`|
 23 | |[_italic_]()|Working in process|
 24 | 
 25 | ### Pivoting
 26 | 
 27 | |API|Description|Notebook example|
 28 | |---|---|---:|
 29 | |[pivot_longer()][26]|Pivot data from wide to long|[:material-notebook:][27]|
 30 | |[pivot_wider()][28]|Pivot data from long to wide|[:material-notebook:][29]|
 31 | 
 32 | ### Rectangling
 33 | 
 34 | |API|Description|Notebook example|
 35 | |---|---|---:|
 36 | |_`hoist()`_ _`unnest_longer()`_ _`unnest_wider()`_ _`unnest_auto()`_|Rectangle a nested list into a tidy tibble||
 37 | 
 38 | ### Nesting
 39 | 
 40 | |API|Description|Notebook example|
 41 | |---|---|---:|
 42 | |[`nest()`][9] [`unnest()`][10]|Nest and unnest|[:material-notebook:][11]|
 43 | 
 44 | ### Character vectors
 45 | 
 46 | |API|Description|Notebook example|
 47 | |---|---|---:|
 48 | |[`extract()`][22]|Extract a character column into multiple columns using regular expression groups|[:material-notebook:][23]|
 49 | |[`separate()`][30]|Separate a character column into multiple columns with a regular expression or numeric locations|[:material-notebook:][31]|
 50 | |[`separate_rows()`][34]|Separate a collapsed column into multiple rows|[:material-notebook:][35]|
 51 | |[`unite()`][36]|Unite multiple columns into one by pasting strings together|[:material-notebook:][37]|
 52 | 
 53 | ### Missing values
 54 | 
 55 | |API|Description|Notebook example|
 56 | |---|---|---:|
 57 | |[`complete()`][18]|Complete a data frame with missing combinations of data|[:material-notebook:][19]|
 58 | |[`drop_na()`][20]|Drop rows containing missing values|[:material-notebook:][21]|
 59 | |[`expand()`][12] [`crossing()`][13] [`nesting()`][14]|Expand data frame to include all possible combinations of values|[:material-notebook:][15]|
 60 | |[`expand_grid()`][16]|
 61 | |[`fill()`][24]|Fill in missing values with previous or next value|[:material-notebook:][25]|
 62 | |[`full_seq()`][40]|Create the full sequence of values in a vector|[:material-notebook:][41]|
 63 | |[`replace_na()`][38]|Replace NAs with specified values|[:material-notebook:][39]|
 64 | 
 65 | ### Miscellanea
 66 | 
 67 | |API|Description|Notebook example|
 68 | |---|---|---:|
 69 | |[`chop()`][3] [`unchop()`][4]|Chop and unchop|[:material-notebook:][5]|
 70 | |[`pack()`][6] [`unpack()`][7]|Pack and unpack|[:material-notebook:][8]|
 71 | |[`uncount()`][32]|"Uncount" a data frame|[:material-notebook:][33]|
 72 | 
 73 | ### Data
 74 | 
 75 | See [datasets][2]
 76 | 
 77 | [1]: https://tidyr.tidyverse.org/reference/index.html
 78 | [2]: ../datasets
 79 | [3]: ../../api/datar.tidyr.chop/#datar.tidyr.chop.chop
 80 | [4]: ../../api/datar.tidyr.chop/#datar.tidyr.chop.unchop
 81 | [5]: ../../notebooks/chop
 82 | [6]: ../../api/datar.tidyr.pack/#datar.tidyr.pack.pack
 83 | [7]: ../../api/datar.tidyr.pack/#datar.tidyr.pack.unpack
 84 | [8]: ../../notebooks/chop
 85 | [9]: ../../api/datar.tidyr.nest/#datar.tidyr.nest.nest
 86 | [10]: ../../api/datar.tidyr.nest/#datar.tidyr.nest.unnest
 87 | [11]: ../../notebooks/nest
 88 | [12]: ../../api/datar.tidyr.expand/#datar.tidyr.expand.expand
 89 | [13]: ../../api/datar.tidyr.expand/#datar.tidyr.expand.crossing
 90 | [14]: ../../api/datar.tidyr.expand/#datar.tidyr.expand.nesting
 91 | [15]: ../../notebooks/expand
 92 | [16]: ../../api/datar.tidyr.expand/#datar.tidyr.expand.expand_grid
 93 | [17]: ../../notebooks/expand_grid
 94 | [18]: ../../api/datar.tidyr.complete/#datar.tidyr.complete.complete
 95 | [19]: ../../notebooks/complete
 96 | [20]: ../../api/datar.tidyr.drop_na/#datar.tidyr.drop_na.drop_na
 97 | [21]: ../../notebooks/drop_na
 98 | [22]: ../../api/datar.tidyr.extract/#datar.tidyr.extract.extract
 99 | [23]: ../../notebooks/extract
100 | [24]: ../../api/datar.tidyr.fill/#datar.tidyr.fill.fill
101 | [25]: ../../notebooks/fill
102 | [26]: ../../api/datar.tidyr.pivot_long/#datar.tidyr.pivot_long.pivot_longer
103 | [27]: ../../notebooks/pivot_longer
104 | [28]: ../../api/datar.tidyr.pivot_wide/#datar.tidyr.pivot_wide.pivot_wider
105 | [29]: ../../notebooks/pivot_wider
106 | [30]: ../../api/datar.tidyr.separate/#datar.tidyr.separate.separate
107 | [31]: ../../notebooks/separate
108 | [32]: ../../api/datar.tidyr.uncount/#datar.tidyr.uncount.uncount
109 | [33]: ../../notebooks/uncount
110 | [34]: ../../api/datar.tidyr.separate/#datar.tidyr.separate.separate_rows
111 | [35]: ../../notebooks/separate
112 | [36]: ../../api/datar.tidyr.unite/#datar.tidyr.unite.unite
113 | [37]: ../../notebooks/unite
114 | [38]: ../../api/datar.tidyr.replace_na/#datar.tidyr.replace_na.replace_na
115 | [39]: ../../notebooks/replace_na
116 | [40]: ../../api/datar.tidyr.funcs/#datar.tidyr.funcs.full_seq
117 | [41]: ../../notebooks/full_seq
118 | 


--------------------------------------------------------------------------------
/docs/reference-maps/utils.md:
--------------------------------------------------------------------------------
 1 | <style>
 2 | .md-typeset__table {
 3 |    min-width: 100%;
 4 | }
 5 | 
 6 | .md-typeset table:not([class]) {
 7 |     display: table;
 8 |     max-width: 80%;
 9 | }
10 | </style>
11 | 
12 | ## Reference of `datar.datar`
13 | 
14 | <u>**Legend:**</u>
15 | 
16 | |Sample|Status|
17 | |---|---|
18 | |[normal]()|API that is regularly ported|
19 | |<s>[strike-through]()</s>|API that is not ported, or not an API originally|
20 | |[**bold**]()|API that is unique in `datar`|
21 | |[_italic_]()|Working in process|
22 | 
23 | ### Utils
24 | 
25 | |API|Description|Notebook example|
26 | |---|---|---:|
27 | |[`head()`][1]|Get the head of the object||
28 | |[`tail()`][2]|Get the tail of the object||
29 | 
30 | [1]: ../../api/datar.base.verbs/#datar.base.verbs.head
31 | [2]: ../../api/datar.base.verbs/#datar.base.verbs.tail
32 | 


--------------------------------------------------------------------------------
/docs/style.css:
--------------------------------------------------------------------------------
  1 | 
  2 | .md-main__inner.md-grid {
  3 |     max-width: 80%;
  4 |     margin-left: 32px;
  5 | }
  6 | 
  7 | .md-typeset .admonition, .md-typeset details {
  8 |     font-size: .7rem !important;
  9 | }
 10 | 
 11 | .md-typeset table:not([class]) td {
 12 |     padding: .55em 1.25em !important;
 13 | }
 14 | 
 15 | .md-typeset table:not([class]) th {
 16 |     padding: .75em 1.25em !important;
 17 | }
 18 | 
 19 | .md-grid {
 20 |     max-width: none;
 21 | }
 22 | 
 23 | .mkapi-docstring{
 24 |     line-height: 1;
 25 | }
 26 | .mkapi-node {
 27 |     background-color: #f4faff;
 28 |     border-top: 3px solid #151922;
 29 | }
 30 | .mkapi-node .mkapi-object-container {
 31 |     background-color: #d1d4d6;
 32 |     padding: .12em .4em;
 33 | }
 34 | .mkapi-node .mkapi-object-container .mkapi-object.code {
 35 |     background: none;
 36 |     border: none;
 37 | }
 38 | .mkapi-node .mkapi-object-container .mkapi-object.code * {
 39 |     font-size: .65rem !important;
 40 | }
 41 | .mkapi-node pre {
 42 |     line-height: 1.5;
 43 | }
 44 | .md-typeset pre>code {
 45 |     overflow: visible;
 46 |     line-height: 1.2;
 47 | }
 48 | .mkapi-docstring .md-typeset pre>code {
 49 |     font-size: 0.1rem !important;
 50 | }
 51 | .mkapi-section-name.bases {
 52 |     margin-top: .2em;
 53 | }
 54 | .mkapi-section-body.bases {
 55 |     padding-bottom: .7em;
 56 |     line-height: 1.3;
 57 | }
 58 | .mkapi-section.bases {
 59 |     margin-bottom: .8em;
 60 | }
 61 | .mkapi-node * {
 62 |     font-size: .7rem;
 63 | }
 64 | .mkapi-node a.mkapi-src-link {
 65 |     word-break: keep-all;
 66 | }
 67 | .mkapi-docstring {
 68 |     padding: .4em .15em !important;
 69 | }
 70 | .mkapi-section-name-body {
 71 |     font-size: .72rem !important;
 72 | }
 73 | .mkapi-node ul.mkapi-items li {
 74 |     line-height: 1.4 !important;
 75 | }
 76 | .mkapi-node ul.mkapi-items li * {
 77 |     font-size: .65rem !important;
 78 | }
 79 | .mkapi-node code.mkapi-object-signature {
 80 |     padding-right: 2px;
 81 | }
 82 | .mkapi-node .mkapi-code * {
 83 |     font-size: .6rem;
 84 | }
 85 | .mkapi-node a.mkapi-docs-link {
 86 |     font-size: .6rem;
 87 | }
 88 | .mkapi-node h1.mkapi-object.mkapi-object-code {
 89 |     margin: .2em .3em;
 90 | }
 91 | .mkapi-node h1.mkapi-object.mkapi-object-code .mkapi-object-kind.mkapi-object-kind-code {
 92 |     font-style: normal;
 93 |     margin-right: 16px;
 94 | }
 95 | .mkapi-node .mkapi-item-name {
 96 |     font-size: .7rem !important;
 97 |     color: #555;
 98 |     padding-right: 4px;
 99 | }
100 | .md-typeset {
101 |     font-size: .75rem !important;
102 |     line-height: 1.5 !important;
103 | }
104 | .mkapi-object-kind.package.top {
105 |     font-size: .8rem !important;
106 |     color: #111;
107 | 
108 | }
109 | .mkapi-object.package.top > h2 {
110 |     font-size: .8rem !important;
111 | }
112 | 
113 | .mkapi-object-body.package.top * {
114 |     font-size: .75rem !important;
115 | }
116 | .mkapi-object-kind.module.top {
117 |     font-size: .75rem !important;
118 |     color: #222;
119 | }
120 | 
121 | .mkapi-object-body.module.top * {
122 |     font-size: .75rem !important;
123 | }
124 | 
125 | .mkapi-section-body.examples pre code {
126 |     font-size: .65rem !important;
127 |     overflow: auto;
128 | }
129 | 


--------------------------------------------------------------------------------
/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/example.png


--------------------------------------------------------------------------------
/example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/example2.png


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | site_name: datar
  2 | repo_url: https://github.com/pwwang/datar
  3 | repo_name: pwwang/datar
  4 | theme:
  5 |     favicon: favicon.png
  6 |     logo: favicon.png
  7 |     icon:
  8 |         repo: fontawesome/brands/github
  9 |     palette:
 10 |         primary: black
 11 |     name: 'material'
 12 |     font:
 13 |       text:
 14 |         - FreightSans
 15 |         - "Helvetica Neue"
 16 |         - Helvetica
 17 |         - Arial
 18 |         - sans-serif
 19 |       code:
 20 |         - IBMPlexMono
 21 |         - SFMono-Regular
 22 |         - Menlo
 23 |         - Monaco
 24 |         - Consolas
 25 |         - "Liberation Mono"
 26 |         - "Courier New"
 27 |         - monospace
 28 |     features:
 29 |         - navigation.top
 30 | markdown_extensions:
 31 |     - markdown.extensions.admonition
 32 |     - pymdownx.emoji:
 33 |         emoji_index: !!python/name:material.extensions.emoji.twemoji
 34 |         emoji_generator: !!python/name:material.extensions.emoji.to_svg
 35 |     - pymdownx.superfences:
 36 |         preserve_tabs: true
 37 |     - toc:
 38 |         baselevel: 2
 39 | plugins:
 40 |     - search # necessary for search to work
 41 |     - mkapi
 42 |     - mkdocs-jupyter:
 43 |         execute: false
 44 | extra_css:
 45 |     - style.css
 46 | nav:
 47 |     - 'Home': 'index.md'
 48 |     - 'Reference maps':
 49 |         - 'reference-maps/ALL.md'
 50 |         - 'reference-maps/base.md'
 51 |         - 'reference-maps/dplyr.md'
 52 |         - 'reference-maps/tibble.md'
 53 |         - 'reference-maps/tidyr.md'
 54 |         - 'reference-maps/forcats.md'
 55 |         - 'reference-maps/datasets.md'
 56 |         - 'reference-maps/other.md'
 57 |     - 'Import datar': 'import.md'
 58 |     - 'Backends': 'backends.md'
 59 |     - 'Options': 'options.md'
 60 |     - 'The f-expression': 'f.md'
 61 |     - 'Data': 'data.md'
 62 |     - 'Examples':
 63 |         - 'across': 'notebooks/across.ipynb'
 64 |         - 'add_column': 'notebooks/add_column.ipynb'
 65 |         - 'add_row': 'notebooks/add_row.ipynb'
 66 |         - 'arrange': 'notebooks/arrange.ipynb'
 67 |         - 'base': 'notebooks/base.ipynb'
 68 |         - 'base-arithmetic': 'notebooks/base-arithmetic.ipynb'
 69 |         - 'base-funs': 'notebooks/base-funs.ipynb'
 70 |         - 'between': 'notebooks/between.ipynb'
 71 |         - 'bind': 'notebooks/bind.ipynb'
 72 |         - 'case_when': 'notebooks/case_when.ipynb'
 73 |         - 'chop': 'notebooks/chop.ipynb'
 74 |         - 'coalesce': 'notebooks/coalesce.ipynb'
 75 |         - 'complete': 'notebooks/complete.ipynb'
 76 |         - 'context': 'notebooks/context.ipynb'
 77 |         - 'count': 'notebooks/count.ipynb'
 78 |         - 'cumall': 'notebooks/cumall.ipynb'
 79 |         - 'desc': 'notebooks/desc.ipynb'
 80 |         - 'distinct': 'notebooks/distinct.ipynb'
 81 |         - 'drop_na': 'notebooks/drop_na.ipynb'
 82 |         - 'enframe': 'notebooks/enframe.ipynb'
 83 |         - 'expand': 'notebooks/expand.ipynb'
 84 |         - 'expand_grid': 'notebooks/expand_grid.ipynb'
 85 |         - 'extract': 'notebooks/extract.ipynb'
 86 |         - 'fill': 'notebooks/fill.ipynb'
 87 |         - 'filter': 'notebooks/filter.ipynb'
 88 |         - 'filter-joins': 'notebooks/filter-joins.ipynb'
 89 |         - 'forcats_fct_multi': 'notebooks/forcats_fct_multi.ipynb'
 90 |         - 'forcats_lvl_addrm': 'notebooks/forcats_lvl_addrm.ipynb'
 91 |         - 'forcats_lvl_order': 'notebooks/forcats_lvl_order.ipynb'
 92 |         - 'forcats_lvl_value': 'notebooks/forcats_lvl_value.ipynb'
 93 |         - 'forcats_misc': 'notebooks/forcats_misc.ipynb'
 94 |         - 'full_seq': 'notebooks/full_seq.ipynb'
 95 |         - 'other': 'notebooks/other.ipynb'
 96 |         - 'group_by': 'notebooks/group_by.ipynb'
 97 |         - 'group_map': 'notebooks/group_map.ipynb'
 98 |         - 'group_split': 'notebooks/group_split.ipynb'
 99 |         - 'group_trim': 'notebooks/group_trim.ipynb'
100 |         - 'lead-lag': 'notebooks/lead-lag.ipynb'
101 |         - 'mutate-joins': 'notebooks/mutate-joins.ipynb'
102 |         - 'mutate': 'notebooks/mutate.ipynb'
103 |         - 'n_distinct': 'notebooks/n_distinct.ipynb'
104 |         - 'na_if': 'notebooks/na_if.ipynb'
105 |         - 'near': 'notebooks/near.ipynb'
106 |         - 'nest': 'notebooks/nest.ipynb'
107 |         - 'nest-join': 'notebooks/nest-join.ipynb'
108 |         - 'nth': 'notebooks/nth.ipynb'
109 |         - 'pack': 'notebooks/pack.ipynb'
110 |         - 'pivot_longer': 'notebooks/pivot_longer.ipynb'
111 |         - 'pivot_wider': 'notebooks/pivot_wider.ipynb'
112 |         - 'pull': 'notebooks/pull.ipynb'
113 |         - 'ranking': 'notebooks/ranking.ipynb'
114 |         - 'readme': 'notebooks/readme.ipynb'
115 |         - 'recode': 'notebooks/recode.ipynb'
116 |         - 'relocate': 'notebooks/relocate.ipynb'
117 |         - 'rename': 'notebooks/rename.ipynb'
118 |         - 'replace_na': 'notebooks/replace_na.ipynb'
119 |         - 'rownames': 'notebooks/rownames.ipynb'
120 |         - 'rows': 'notebooks/rows.ipynb'
121 |         - 'rowwise': 'notebooks/rowwise.ipynb'
122 |         - 'select': 'notebooks/select.ipynb'
123 |         - 'separate': 'notebooks/separate.ipynb'
124 |         - 'setops': 'notebooks/setops.ipynb'
125 |         - 'slice': 'notebooks/slice.ipynb'
126 |         - 'summarise': 'notebooks/summarise.ipynb'
127 |         - 'tibble': 'notebooks/tibble.ipynb'
128 |         - 'uncount': 'notebooks/uncount.ipynb'
129 |         - 'unite': 'notebooks/unite.ipynb'
130 |         - 'with_groups': 'notebooks/with_groups.ipynb'
131 |     - 'API': 'mkapi/api/datar'
132 |     - 'Change Log': CHANGELOG.md
133 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "datar"
 3 | version = "0.15.9"
 4 | description = "A Grammar of Data Manipulation in python"
 5 | authors = ["pwwang <pwwang@pwwang.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | homepage = "https://github.com/pwwang/datar"
 9 | repository = "https://github.com/pwwang/datar"
10 | 
11 | [tool.poetry.dependencies]
12 | python = "^3.9"
13 | simplug = "^0.5"
14 | pipda = "^0.13.1"
15 | python-simpleconf = {version = "^0.7", extras = ["toml"]}
16 | datar-numpy = {version = "^0.3.4", optional = true}
17 | datar-pandas = {version = "^0.5.5", optional = true}
18 | # datar-polars = {version = "^0.0.0", optional = true}
19 | datar-arrow = {version = "^0.1", optional = true}
20 | 
21 | [tool.poetry.build]
22 | generate-setup-file = true
23 | 
24 | [tool.poetry.extras]
25 | numpy = ["datar-numpy"]
26 | pandas = ["datar-pandas"]
27 | arrow = ["datar-arrow"]
28 | # modin = ["datar-pandas"]
29 | # polars = ["datar-polars"]
30 | 
31 | [tool.poetry.group.dev.dependencies]
32 | pytest = "^8.1"
33 | pytest-cov = "^6"
34 | six = "^1.16"
35 | numpy = "*"
36 | python-slugify = "^8"
37 | 
38 | [tool.poetry.group.docs.dependencies]
39 | mkdocs = "^1.6"
40 | mkdocs-material = "^9.6"
41 | pymdown-extensions = "^10.14"
42 | mkapi-fix = "^0.1"
43 | mkdocs-jupyter = "^0.25"
44 | ipykernel = "^6.29"
45 | ipython-genutils = "^0.2"
46 | plotnine = "^0.13"
47 | klib = "^1.3"
48 | pardoc = "^0.2"
49 | 
50 | [build-system]
51 | requires = ["poetry-core"]
52 | build-backend = "poetry.core.masonry.api"
53 | 
54 | [tool.mypy]
55 | ignore_missing_imports = true
56 | allow_redefinition = true
57 | disable_error_code = ["attr-defined", "no-redef", "union-attr"]
58 | show_error_codes = true
59 | strict_optional = false
60 | 
61 | [tool.pytest.ini_options]
62 | addopts = "-vv -p no:asyncio --tb=short --cov-config=.coveragerc --cov=datar --cov-report xml:cov.xml --cov-report term-missing"
63 | filterwarnings = [
64 |     # "error"
65 | ]
66 | console_output_style = "progress"
67 | junit_family = "xunit1"
68 | 
69 | [tool.black]
70 | line-length = 80
71 | target-version = ['py37', 'py38', 'py39']
72 | include = '\.pyi?$'
73 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | # This will not be included in the distribution.
 3 | # The distribution is managed by poetry
 4 | # This file is kept only for
 5 | # 1. Github to index the dependents
 6 | # 2. pip install -e .
 7 | """
 8 | 
 9 | from setuptools import setup
10 | 
11 | setup(name="datar")
12 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/tests/__init__.py


--------------------------------------------------------------------------------
/tests/conflict_names.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | 
  4 | def test_getattr(module, allow_conflict_names, fun, error):
  5 |     from datar import options
  6 |     options(allow_conflict_names=allow_conflict_names)
  7 | 
  8 |     if module == "all":
  9 |         import datar.all as d
 10 |     elif module == "base":
 11 |         import datar.base as d
 12 |     elif module == "dplyr":
 13 |         import datar.dplyr as d
 14 | 
 15 |     if not error:
 16 |         return getattr(d, fun)
 17 | 
 18 |     try:
 19 |         getattr(d, fun)
 20 |     except Exception as e:
 21 |         raised = type(e).__name__
 22 |         assert raised == error, f"Raised {raised}, expected {error}"
 23 |     else:
 24 |         raise AssertionError(f"{error} should have raised")
 25 | 
 26 | 
 27 | def _import(module, fun):
 28 |     if module == "all" and fun == "sum":
 29 |         from datar.all import sum  # noqa: F401
 30 |     elif module == "all" and fun == "slice":
 31 |         from datar.all import slice  # noqa: F401
 32 |     elif module == "base" and fun == "sum":
 33 |         from datar.base import sum  # noqa: F401
 34 |     elif module == "dplyr" and fun == "slice":
 35 |         from datar.dplyr import slice  # noqa: F401
 36 | 
 37 | 
 38 | def test_import(module, allow_conflict_names, fun, error):
 39 |     from datar import options
 40 |     options(allow_conflict_names=allow_conflict_names)
 41 | 
 42 |     if not error:
 43 |         return _import(module, fun)
 44 | 
 45 |     try:
 46 |         _import(module, fun)
 47 |     except Exception as e:
 48 |         raised = type(e).__name__
 49 |         assert raised == error, f"Raised {raised}, expected {error}"
 50 |     else:
 51 |         raise AssertionError(f"{error} should have raised")
 52 | 
 53 | 
 54 | def make_test(module, allow_conflict_names, getattr, fun, error):
 55 |     if fun == "_":
 56 |         fun = "sum" if module in ["all", "base"] else "slice"
 57 | 
 58 |     if getattr:
 59 |         return test_getattr(module, allow_conflict_names, fun, error)
 60 | 
 61 |     return test_import(module, allow_conflict_names, fun, error)
 62 | 
 63 | 
 64 | def main():
 65 |     parser = argparse.ArgumentParser()
 66 |     parser.add_argument(
 67 |         "--module",
 68 |         choices=["all", "base", "dplyr"],
 69 |         required=True,
 70 |         help="The module to test"
 71 |     )
 72 |     parser.add_argument(
 73 |         "--allow-conflict-names",
 74 |         action="store_true",
 75 |         help="Whether to allow conflict names",
 76 |         default=False,
 77 |     )
 78 |     parser.add_argument(
 79 |         "--getattr",
 80 |         action="store_true",
 81 |         help=(
 82 |             "Whether to test datar.all.sum, "
 83 |             "otherwise test from datar.all import sum."
 84 |         ),
 85 |         default=False,
 86 |     )
 87 |     parser.add_argument(
 88 |         "--fun",
 89 |         help=(
 90 |             "The function to test. "
 91 |             "If _ then sum for all/base, slice for dplyr"
 92 |         ),
 93 |         choices=["sum", "filter", "_"],
 94 |         default="_",
 95 |     )
 96 |     parser.add_argument(
 97 |         "--error",
 98 |         help="The error to expect",
 99 |     )
100 |     args = parser.parse_args()
101 | 
102 |     make_test(
103 |         args.module,
104 |         args.allow_conflict_names,
105 |         args.getattr,
106 |         args.fun,
107 |         args.error,
108 |     )
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     main()
113 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | from datar import options
2 | 
3 | 
4 | def pytest_sessionstart(session):
5 |     # Load no plugins
6 |     options(backends=[None])
7 | 


--------------------------------------------------------------------------------
/tests/test_array_ufunc.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa: F401
 2 | 
 3 | import numpy as np
 4 | from pipda import Context
 5 | from datar import f
 6 | from datar.core import plugin as _  # noqa: F401
 7 | from datar.apis.misc import array_ufunc
 8 | 
 9 | 
10 | def test_default():
11 |     out = np.sqrt(f)._pipda_eval([1, 4, 9], Context.EVAL)
12 |     assert out.tolist() == [1, 2, 3]
13 | 
14 | 
15 | def test_misc_obj():
16 |     class Foo(list):
17 |         pass
18 | 
19 |     @array_ufunc.register(Foo)
20 |     def _array_ufunc(x, ufunc, *args, kind, **kwargs):
21 |         return ufunc([i * 2 for i in x], *args, **kwargs)
22 | 
23 |     out = np.sqrt(f)._pipda_eval(Foo([2, 8, 18]), Context.EVAL)
24 |     assert out.tolist() == [2, 4, 6]
25 | 


--------------------------------------------------------------------------------
/tests/test_base.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from datar.base import (
  4 |     ceiling,
  5 |     cov,
  6 |     floor,
  7 |     mean,
  8 |     median,
  9 |     pmax,
 10 |     pmin,
 11 |     sqrt,
 12 |     var,
 13 |     scale,
 14 |     col_sums,
 15 |     col_means,
 16 |     col_sds,
 17 |     col_medians,
 18 |     row_sums,
 19 |     row_means,
 20 |     row_sds,
 21 |     row_medians,
 22 |     min_,
 23 |     max_,
 24 |     round_,
 25 |     sum_,
 26 |     abs_,
 27 |     prod,
 28 |     sign,
 29 |     signif,
 30 |     trunc,
 31 |     exp,
 32 |     log,
 33 |     log2,
 34 |     log10,
 35 |     log1p,
 36 |     sd,
 37 |     weighted_mean,
 38 |     quantile,
 39 |     bessel_i,
 40 |     bessel_j,
 41 |     bessel_k,
 42 |     bessel_y,
 43 |     as_double,
 44 |     as_integer,
 45 |     as_logical,
 46 |     as_character,
 47 |     as_factor,
 48 |     as_ordered,
 49 |     as_date,
 50 |     as_numeric,
 51 |     arg,
 52 |     conj,
 53 |     mod,
 54 |     re_,
 55 |     im,
 56 |     as_complex,
 57 |     is_complex,
 58 |     cummax,
 59 |     cummin,
 60 |     cumprod,
 61 |     cumsum,
 62 |     droplevels,
 63 |     levels,
 64 |     set_levels,
 65 |     is_factor,
 66 |     is_ordered,
 67 |     nlevels,
 68 |     factor,
 69 |     ordered,
 70 |     cut,
 71 |     diff,
 72 |     expand_grid,
 73 |     outer,
 74 |     make_names,
 75 |     make_unique,
 76 |     rank,
 77 |     identity,
 78 |     is_logical,
 79 |     is_true,
 80 |     is_false,
 81 |     is_na,
 82 |     is_finite,
 83 |     is_infinite,
 84 |     any_na,
 85 |     as_null,
 86 |     is_null,
 87 |     set_seed,
 88 |     rep,
 89 |     c_,
 90 |     c,
 91 |     length,
 92 |     lengths,
 93 |     order,
 94 |     sort,
 95 |     rev,
 96 |     sample,
 97 |     seq,
 98 |     seq_along,
 99 |     seq_len,
100 |     match,
101 |     beta,
102 |     lgamma,
103 |     digamma,
104 |     trigamma,
105 |     choose,
106 |     factorial,
107 |     gamma,
108 |     lfactorial,
109 |     lchoose,
110 |     lbeta,
111 |     psigamma,
112 |     rnorm,
113 |     runif,
114 |     rpois,
115 |     rbinom,
116 |     rcauchy,
117 |     rchisq,
118 |     rexp,
119 |     is_character,
120 |     grep,
121 |     grepl,
122 |     sub,
123 |     gsub,
124 |     strsplit,
125 |     paste,
126 |     paste0,
127 |     sprintf,
128 |     substr,
129 |     substring,
130 |     startswith,
131 |     endswith,
132 |     strtoi,
133 |     trimws,
134 |     toupper,
135 |     tolower,
136 |     chartr,
137 |     nchar,
138 |     nzchar,
139 |     table,
140 |     tabulate,
141 |     is_atomic,
142 |     is_double,
143 |     is_element,
144 |     is_integer,
145 |     is_numeric,
146 |     any_,
147 |     all_,
148 |     acos,
149 |     acosh,
150 |     asin,
151 |     asinh,
152 |     atan,
153 |     atanh,
154 |     cos,
155 |     cosh,
156 |     cospi,
157 |     sin,
158 |     sinh,
159 |     sinpi,
160 |     tan,
161 |     tanh,
162 |     tanpi,
163 |     atan2,
164 |     append,
165 |     colnames,
166 |     set_colnames,
167 |     rownames,
168 |     set_rownames,
169 |     dim,
170 |     diag,
171 |     duplicated,
172 |     intersect,
173 |     ncol,
174 |     nrow,
175 |     proportions,
176 |     setdiff,
177 |     setequal,
178 |     unique,
179 |     t,
180 |     union,
181 |     max_col,
182 |     complete_cases,
183 |     head,
184 |     tail,
185 |     which,
186 |     which_min,
187 |     which_max,
188 | )
189 | 
190 | from datar.core.utils import NotImplementedByCurrentBackendError
191 | 
192 | 
193 | @pytest.mark.parametrize("fun,args", [
194 |     (ceiling, [1]),
195 |     (cov, [[1, 2], [3, 4]]),
196 |     (floor, [1]),
197 |     (mean, [1]),
198 |     (median, [1]),
199 |     (pmax, [1]),
200 |     (pmin, [1]),
201 |     (sqrt, [1]),
202 |     (var, [1]),
203 |     (scale, [1]),
204 |     (col_sums, [1]),
205 |     (col_means, [1]),
206 |     (col_sds, [1]),
207 |     (col_medians, [1]),
208 |     (row_sums, [1]),
209 |     (row_means, [1]),
210 |     (row_sds, [1]),
211 |     (row_medians, [1]),
212 |     (min_, [1]),
213 |     (max_, [1]),
214 |     (round_, [1]),
215 |     (sum_, [1]),
216 |     (abs_, [1]),
217 |     (prod, [1]),
218 |     (sign, [1]),
219 |     (signif, [1]),
220 |     (trunc, [1]),
221 |     (exp, [1]),
222 |     (log, [1]),
223 |     (log2, [1]),
224 |     (log10, [1]),
225 |     (log1p, [1]),
226 |     (sd, [1]),
227 |     (weighted_mean, [1]),
228 |     (quantile, [1]),
229 |     (bessel_i, [1, 2]),
230 |     (bessel_j, [1, 2]),
231 |     (bessel_k, [1, 2]),
232 |     (bessel_y, [1, 2]),
233 |     (as_double, [1]),
234 |     (as_integer, [1]),
235 |     (as_logical, [1]),
236 |     (as_character, [1]),
237 |     (as_factor, [1]),
238 |     (as_ordered, [1]),
239 |     (as_date, [1]),
240 |     (as_numeric, [1]),
241 |     (arg, [1]),
242 |     (conj, [1]),
243 |     (mod, [1]),
244 |     (re_, [1]),
245 |     (im, [1]),
246 |     (as_complex, [1]),
247 |     (is_complex, [1]),
248 |     (cummax, [1]),
249 |     (cummin, [1]),
250 |     (cumprod, [1]),
251 |     (cumsum, [1]),
252 |     (droplevels, [1]),
253 |     (levels, [1]),
254 |     (set_levels, [1, 1]),
255 |     (is_factor, [1]),
256 |     (is_ordered, [1]),
257 |     (nlevels, [1]),
258 |     (factor, [1]),
259 |     (ordered, [1]),
260 |     (cut, [1, 1]),
261 |     (diff, [1]),
262 |     (expand_grid, [1]),
263 |     (outer, [1, 1]),
264 |     (rank, [1]),
265 |     (is_logical, [1]),
266 |     (is_true, [1]),
267 |     (is_false, [1]),
268 |     (is_na, [1]),
269 |     (is_finite, [1]),
270 |     (is_infinite, [1]),
271 |     (any_na, [1]),
272 |     (as_null, [1]),
273 |     (is_null, [1]),
274 |     (set_seed, [1]),
275 |     (rep, [1]),
276 |     (c_, [1]),
277 |     (c, [1]),
278 |     (length, [1]),
279 |     (lengths, [1]),
280 |     (order, [1]),
281 |     (rev, [1]),
282 |     (seq, [1]),
283 |     (seq_along, [1]),
284 |     (seq_len, [1]),
285 |     (sort, [1]),
286 |     (sample, [1]),
287 |     (match, [1, 1]),
288 |     (is_element, [1, 1]),
289 |     (is_atomic, [1]),
290 |     (is_double, [1]),
291 |     (is_integer, [1]),
292 |     (is_numeric, [1]),
293 |     (any_, [1]),
294 |     (all_, [1]),
295 |     (acos, [1]),
296 |     (acosh, [1]),
297 |     (asin, [1]),
298 |     (asinh, [1]),
299 |     (atan, [1]),
300 |     (atanh, [1]),
301 |     (cos, [1]),
302 |     (cosh, [1]),
303 |     (cospi, [1]),
304 |     (sin, [1]),
305 |     (sinh, [1]),
306 |     (sinpi, [1]),
307 |     (tan, [1]),
308 |     (tanh, [1]),
309 |     (tanpi, [1]),
310 |     (atan2, [1, 1]),
311 |     (beta, [1, 1]),
312 |     (choose, [1, 1]),
313 |     (digamma, [1]),
314 |     (lgamma, [1]),
315 |     (lbeta, [1, 1]),
316 |     (trigamma, [1]),
317 |     (factorial, [1]),
318 |     (gamma, [1]),
319 |     (lchoose, [1, 1]),
320 |     (lfactorial, [1]),
321 |     (psigamma, [1, 1]),
322 |     (rnorm, [1, 1]),
323 |     (runif, [1, 1]),
324 |     (rcauchy, [1, 1]),
325 |     (rchisq, [1, 1]),
326 |     (rexp, [1, 1]),
327 |     (rpois, [1, 1]),
328 |     (rbinom, [1, 1, 1]),
329 |     (is_character, [1]),
330 |     (grep, [1, 1]),
331 |     (grepl, [1, 1]),
332 |     (sub, [1, 1, 1]),
333 |     (gsub, [1, 1, 1]),
334 |     (strsplit, [1, 1]),
335 |     (paste, [1]),
336 |     (paste0, [1]),
337 |     (sprintf, [1]),
338 |     (substr, [1, 1, 1]),
339 |     (tolower, [1]),
340 |     (toupper, [1]),
341 |     (trimws, [1]),
342 |     (strtoi, [1]),
343 |     (substring, [1, 1]),
344 |     (startswith, [1, 1]),
345 |     (endswith, [1, 1]),
346 |     (chartr, [1, 1, 1]),
347 |     (nchar, [1]),
348 |     (nzchar, [1]),
349 |     (table, [1]),
350 |     (tabulate, [1]),
351 |     (append, [1, 1]),
352 |     (colnames, [1]),
353 |     (set_colnames, [1, 1]),
354 |     (rownames, [1]),
355 |     (set_rownames, [1, 1]),
356 |     (dim, [1]),
357 |     (diag, [1]),
358 |     (duplicated, [1]),
359 |     (intersect, [1, 1]),
360 |     (ncol, [1]),
361 |     (nrow, [1]),
362 |     (proportions, [1]),
363 |     (setdiff, [1, 1]),
364 |     (setequal, [1, 1]),
365 |     (unique, [1]),
366 |     (union, [1, 1]),
367 |     (t, [1]),
368 |     (max_col, [1]),
369 |     (complete_cases, [1]),
370 |     (head, [1]),
371 |     (tail, [1]),
372 |     (which, [1]),
373 |     (which_min, [1]),
374 |     (which_max, [1]),
375 | ])
376 | def test_default_implementation(fun, args):
377 |     with pytest.raises(NotImplementedByCurrentBackendError):
378 |         fun(*args)
379 | 
380 | 
381 | @pytest.mark.parametrize("x, uniq, y", [
382 |     (["a", "b", "c"], False, ["a", "b", "c"]),
383 |     ("a", False, ["a"]),
384 |     (1, False, ["_1"]),
385 | ])
386 | def test_make_names(x, uniq, y):
387 |     out = make_names(x, uniq)
388 |     assert out == y
389 | 
390 | 
391 | @pytest.mark.parametrize("x, y", [
392 |     (["a", "b", "c"], ["a", "b", "c"]),
393 |     ("a", ["a"]),
394 | ])
395 | def test_make_unique(x, y):
396 |     out = make_unique(x)
397 |     assert out == y
398 | 
399 | 
400 | def test_identify():
401 |     out = identity(1)
402 |     assert out == 1
403 | 


--------------------------------------------------------------------------------
/tests/test_conflict_names.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import subprocess
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | 
 7 | 
 8 | def _run_conflict_names(module, allow_conflict_names, getat, error):
 9 |     here = Path(__file__).parent
10 |     conflict_names = here / "conflict_names.py"
11 |     cmd = [
12 |         sys.executable,
13 |         str(conflict_names),
14 |         "--module",
15 |         module,
16 |     ]
17 |     if error:
18 |         cmd += ["--error", error]
19 |     if allow_conflict_names:
20 |         cmd.append("--allow-conflict-names")
21 |     if getat:
22 |         cmd.append("--getattr")
23 | 
24 |     p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
25 |     return p.wait(), " ".join(cmd)
26 | 
27 | 
28 | def test_from_all_import_allow_conflict_names_true():
29 |     r, cmd = _run_conflict_names("all", True, False, None)
30 |     assert r == 0, cmd
31 | 
32 | 
33 | def test_from_all_import_allow_conflict_names_false():
34 |     r, cmd = _run_conflict_names("all", False, False, "ImportError")
35 |     assert r == 0, cmd
36 | 
37 | 
38 | def test_all_getattr_allow_conflict_names_true():
39 |     r, cmd = _run_conflict_names("all", True, True, None)
40 |     assert r == 0, cmd
41 | 
42 | 
43 | def test_all_getattr_allow_conflict_names_false():
44 |     r, cmd = _run_conflict_names("all", False, True, None)
45 |     assert r == 0, cmd
46 | 
47 | 
48 | def test_from_base_import_allow_conflict_names_true():
49 |     r, cmd = _run_conflict_names("base", True, False, None)
50 |     assert r == 0, cmd
51 | 
52 | 
53 | def test_from_base_import_allow_conflict_names_false():
54 |     r, cmd = _run_conflict_names("base", False, False, "ImportError")
55 |     assert r == 0, cmd
56 | 
57 | 
58 | def test_base_getattr_allow_conflict_names_true():
59 |     r, cmd = _run_conflict_names("base", True, True, None)
60 |     assert r == 0, cmd
61 | 
62 | 
63 | def test_base_getattr_allow_conflict_names_false():
64 |     r, cmd = _run_conflict_names("base", False, True, None)
65 |     assert r == 0, cmd
66 | 
67 | 
68 | def test_from_dplyr_import_allow_conflict_names_true():
69 |     r, cmd = _run_conflict_names("dplyr", True, False, None)
70 |     assert r == 0, cmd
71 | 
72 | 
73 | def test_from_dplyr_import_allow_conflict_names_false():
74 |     r, cmd = _run_conflict_names("dplyr", False, False, "ImportError")
75 |     assert r == 0, cmd
76 | 
77 | 
78 | def test_dplyr_getattr_allow_conflict_names_true():
79 |     r, cmd = _run_conflict_names("dplyr", True, True, None)
80 |     assert r == 0, cmd
81 | 
82 | 
83 | def test_dplyr_getattr_allow_conflict_names_false():
84 |     r, cmd = _run_conflict_names("dplyr", False, True, None)
85 |     assert r == 0, cmd
86 | 


--------------------------------------------------------------------------------
/tests/test_data.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from datar.data import descr_datasets, add_dataset
 3 | from datar.core.utils import NotImplementedByCurrentBackendError
 4 | 
 5 | 
 6 | def test_descr_datasets():
 7 |     x = descr_datasets()
 8 |     assert "iris" in x
 9 | 
10 |     x = descr_datasets("iris")
11 |     assert "iris" in x and len(x) == 1
12 | 
13 | 
14 | def test_add_dataset():
15 | 
16 |     add_dataset("test", {"url": ""})
17 |     assert "test" in descr_datasets()
18 | 
19 | 
20 | def test_load_dataset():
21 | 
22 |     with pytest.raises(NotImplementedByCurrentBackendError):
23 |         from datar.data import iris  # noqa: F401
24 | 
25 | 
26 | def test_no_such():
27 |     with pytest.raises(NotImplementedByCurrentBackendError):
28 |         from datar.data import nosuch  # noqa: F401
29 | 


--------------------------------------------------------------------------------
/tests/test_dplyr.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from datar.core.utils import NotImplementedByCurrentBackendError
  4 | from datar.dplyr import (
  5 |     across,
  6 |     add_count,
  7 |     add_tally,
  8 |     all_of,
  9 |     anti_join,
 10 |     any_of,
 11 |     arrange,
 12 |     between,
 13 |     bind_cols,
 14 |     bind_rows,
 15 |     c_across,
 16 |     case_when,
 17 |     coalesce,
 18 |     contains,
 19 |     count,
 20 |     cumall,
 21 |     cumany,
 22 |     cume_dist,
 23 |     cummean,
 24 |     cur_column,
 25 |     cur_data,
 26 |     cur_data_all,
 27 |     cur_group,
 28 |     cur_group_id,
 29 |     cur_group_rows,
 30 |     dense_rank,
 31 |     desc,
 32 |     distinct,
 33 |     ends_with,
 34 |     everything,
 35 |     filter_,
 36 |     first,
 37 |     full_join,
 38 |     glimpse,
 39 |     group_by,
 40 |     group_by_drop_default,
 41 |     group_cols,
 42 |     group_data,
 43 |     group_indices,
 44 |     group_keys,
 45 |     group_map,
 46 |     group_modify,
 47 |     group_rows,
 48 |     group_size,
 49 |     group_split,
 50 |     group_trim,
 51 |     group_vars,
 52 |     group_walk,
 53 |     if_all,
 54 |     if_any,
 55 |     if_else,
 56 |     inner_join,
 57 |     lag,
 58 |     last,
 59 |     last_col,
 60 |     lead,
 61 |     left_join,
 62 |     matches,
 63 |     min_rank,
 64 |     mutate,
 65 |     n,
 66 |     n_distinct,
 67 |     n_groups,
 68 |     na_if,
 69 |     near,
 70 |     nest_join,
 71 |     nth,
 72 |     ntile,
 73 |     num_range,
 74 |     order_by,
 75 |     percent_rank,
 76 |     pull,
 77 |     recode,
 78 |     recode_factor,
 79 |     relocate,
 80 |     rename,
 81 |     rename_with,
 82 |     right_join,
 83 |     row_number,
 84 |     rows_append,
 85 |     rows_delete,
 86 |     rows_insert,
 87 |     rows_patch,
 88 |     rows_update,
 89 |     rows_upsert,
 90 |     rowwise,
 91 |     select,
 92 |     semi_join,
 93 |     slice_,
 94 |     slice_head,
 95 |     slice_min,
 96 |     slice_sample,
 97 |     slice_tail,
 98 |     slice_max,
 99 |     starts_with,
100 |     summarise,
101 |     tally,
102 |     transmute,
103 |     ungroup,
104 |     union_all,
105 |     where,
106 |     with_groups,
107 |     with_order,
108 |     pick,
109 |     symdiff,
110 |     consecutive_id,
111 |     case_match,
112 |     cross_join,
113 | )
114 | 
115 | 
116 | @pytest.mark.parametrize("verb, data, args, kwargs", [
117 |     (add_count, None, [], None),
118 |     (add_tally, None, [], None),
119 |     (anti_join, None, [None], None),
120 |     (arrange, None, [], None),
121 |     (between, None, [1, 2], None),
122 |     (bind_cols, None, [], None),
123 |     (bind_rows, None, [], None),
124 |     (case_when, None, [1], None),
125 |     (coalesce, None, [], None),
126 |     (count, None, [], None),
127 |     (cumall, None, [], None),
128 |     (cumany, None, [], None),
129 |     (cume_dist, None, [], None),
130 |     (cummean, None, [], None),
131 |     (cur_column, None, [1], None),
132 |     (dense_rank, None, [], None),
133 |     (desc, None, [], None),
134 |     (distinct, None, [], None),
135 |     (filter_, None, [], None),
136 |     (first, None, [], None),
137 |     (full_join, None, [1], None),
138 |     (glimpse, None, [], None),
139 |     (group_by, None, [], None),
140 |     (group_by_drop_default, None, [], None),
141 |     (group_cols, None, [], None),
142 |     (group_data, None, [], None),
143 |     (group_indices, None, [], None),
144 |     (group_keys, None, [], None),
145 |     (group_map, None, [1], None),
146 |     (group_modify, None, [1], None),
147 |     (group_rows, None, [], None),
148 |     (group_size, None, [], None),
149 |     (group_split, None, [], None),
150 |     (group_trim, None, [], None),
151 |     (group_vars, None, [], None),
152 |     (group_walk, None, [1], None),
153 |     (if_else, None, [1, 2], None),
154 |     (inner_join, None, [1], None),
155 |     (lag, None, [], None),
156 |     (last, None, [], None),
157 |     (lead, None, [], None),
158 |     (left_join, None, [1], None),
159 |     (min_rank, None, [], None),
160 |     (mutate, None, [], None),
161 |     (n_distinct, None, [], None),
162 |     (n_groups, None, [], None),
163 |     (na_if, None, [1], None),
164 |     (near, None, [1], None),
165 |     (nest_join, None, [1], None),
166 |     (nth, None, [1], None),
167 |     (ntile, None, [], None),
168 |     (num_range, None, [1], None),
169 |     (order_by, None, [1], None),
170 |     (percent_rank, None, [], None),
171 |     (pull, None, [], None),
172 |     (recode, None, [], None),
173 |     (recode_factor, None, [], None),
174 |     (relocate, None, [], None),
175 |     (rename, None, [], None),
176 |     (rename_with, None, [1], None),
177 |     (right_join, None, [1], None),
178 |     (row_number, None, [], None),
179 |     (rows_append, None, [None], None),
180 |     (rows_delete, None, [None], None),
181 |     (rows_insert, None, [None], None),
182 |     (rows_patch, None, [None], None),
183 |     (rows_update, None, [None], None),
184 |     (rows_upsert, None, [None], None),
185 |     (rowwise, None, [], None),
186 |     (select, None, [], None),
187 |     (semi_join, None, [1], None),
188 |     (slice_, None, [], None),
189 |     (slice_head, None, [], None),
190 |     (slice_min, None, [1], None),
191 |     (slice_sample, None, [], None),
192 |     (slice_tail, None, [], None),
193 |     (slice_max, None, [1], None),
194 |     (summarise, None, [], None),
195 |     (tally, None, [], None),
196 |     (transmute, None, [], None),
197 |     (ungroup, None, [], None),
198 |     (union_all, None, [1], None),
199 |     (with_groups, None, [1, 2], None),
200 |     (with_order, None, [1, 2], None),
201 |     (symdiff, None, [None], None),
202 |     (consecutive_id, None, [], None),
203 |     (case_match, None, [], None),
204 |     (cross_join, None, [1], None),
205 | ])
206 | def test_verb_not_implemented(verb, data, args, kwargs):
207 |     kwargs = kwargs or {}
208 |     with pytest.raises(NotImplementedByCurrentBackendError):
209 |         verb(data, *args, **kwargs)
210 | 
211 | 
212 | @pytest.mark.parametrize("verb, data, args, kwargs", [
213 |     (pick, None, [], None),
214 |     (across, None, [], None),
215 |     (if_any, None, [], None),
216 |     (if_all, None, [], None),
217 |     (c_across, None, [], None),
218 |     (cur_data, None, [], None),
219 |     (n, None, [], None),
220 |     (cur_data_all, None, [], None),
221 |     (cur_group, None, [], None),
222 |     (cur_group_id, None, [], None),
223 |     (cur_group_rows, None, [], None),
224 |     (where, None, [1], None),
225 |     (everything, None, [], None),
226 |     (last_col, None, [], None),
227 |     (starts_with, None, [1], None),
228 |     (ends_with, None, [1], None),
229 |     (contains, None, [1], None),
230 |     (matches, None, [1], None),
231 |     (all_of, None, [1], None),
232 |     (any_of, None, [1], None),
233 | ])
234 | def test_dep_verbs(verb, data, args, kwargs):
235 |     kwargs = kwargs or {}
236 |     with pytest.raises(NotImplementedByCurrentBackendError):
237 |         data >> verb(*args, **kwargs)
238 | 


--------------------------------------------------------------------------------
/tests/test_forcats.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa: F401
 2 | 
 3 | from datar.core.utils import NotImplementedByCurrentBackendError
 4 | from datar.forcats import (
 5 |     fct_anon,
 6 |     fct_c,
 7 |     fct_collapse,
 8 |     fct_count,
 9 |     fct_cross,
10 |     fct_drop,
11 |     fct_expand,
12 |     fct_explicit_na,
13 |     fct_infreq,
14 |     fct_inorder,
15 |     fct_inseq,
16 |     fct_lump,
17 |     fct_lump_lowfreq,
18 |     fct_lump_min,
19 |     fct_lump_n,
20 |     fct_lump_prop,
21 |     fct_match,
22 |     fct_other,
23 |     fct_recode,
24 |     fct_relabel,
25 |     fct_relevel,
26 |     fct_reorder,
27 |     fct_reorder2,
28 |     fct_rev,
29 |     fct_shift,
30 |     fct_shuffle,
31 |     fct_unify,
32 |     fct_unique,
33 |     first2,
34 |     last2,
35 |     lvls_expand,
36 |     lvls_reorder,
37 |     lvls_revalue,
38 |     lvls_union,
39 | )
40 | 
41 | 
42 | @pytest.mark.parametrize("verb, data, args, kwargs", [
43 |     (fct_anon, None, [], None),
44 |     (fct_c, None, [], None),
45 |     (fct_collapse, None, [], None),
46 |     (fct_count, None, [], None),
47 |     (fct_cross, None, [], None),
48 |     (fct_drop, None, [], None),
49 |     (fct_expand, None, [], None),
50 |     (fct_explicit_na, None, [], None),
51 |     (fct_infreq, None, [], None),
52 |     (fct_inorder, None, [], None),
53 |     (fct_inseq, None, [], None),
54 |     (fct_lump, None, [], None),
55 |     (fct_lump_lowfreq, None, [], None),
56 |     (fct_lump_min, None, [1], None),
57 |     (fct_lump_n, None, [1], None),
58 |     (fct_lump_prop, None, [1], None),
59 |     (fct_match, None, [1], None),
60 |     (fct_other, None, [], None),
61 |     (fct_recode, None, [], None),
62 |     (fct_relabel, None, [1], None),
63 |     (fct_relevel, None, [], None),
64 |     (fct_reorder, None, [1], None),
65 |     (fct_reorder2, None, [1], None),
66 |     (fct_rev, None, [], None),
67 |     (fct_shift, None, [], None),
68 |     (fct_shuffle, None, [], None),
69 |     (fct_unify, None, [], None),
70 |     (fct_unique, None, [], None),
71 |     (first2, None, [1], None),
72 |     (last2, None, [1], None),
73 |     (lvls_expand, None, [1], None),
74 |     (lvls_reorder, None, [1], None),
75 |     (lvls_revalue, None, [1], None),
76 |     (lvls_union, None, [], None),
77 | ])
78 | def test_default_impl(verb, data, args, kwargs):
79 |     kwargs = kwargs or {}
80 |     with pytest.raises(NotImplementedByCurrentBackendError):
81 |         verb(data, *args, **kwargs)
82 | 


--------------------------------------------------------------------------------
/tests/test_names.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/r-lib/vctrs/blob/master/tests/testthat/test-names.R
  2 | import pytest
  3 | from typing import Iterable
  4 | 
  5 | import numpy as np
  6 | from string import ascii_letters
  7 | 
  8 | from datar.core.names import (
  9 |     NameNonUniqueError,
 10 |     repair_names,
 11 | )
 12 | 
 13 | 
 14 | @pytest.mark.parametrize(
 15 |     "names,expect",
 16 |     [
 17 |         ([1, 2, 3], ["1", "2", "3"]),
 18 |         (["", np.nan], ["", ""]),
 19 |         (["", np.nan], ["", ""]),
 20 |         (["", "", np.nan], ["", "", ""]),
 21 |         (repair_names(["", "", np.nan], repair="minimal"), ["", "", ""]),
 22 |     ],
 23 | )
 24 | def test_minimal(names, expect):
 25 |     assert repair_names(names, repair="minimal") == expect
 26 | 
 27 | 
 28 | @pytest.mark.parametrize(
 29 |     "names,expect",
 30 |     [
 31 |         ([np.nan, np.nan], ["__0", "__1"]),
 32 |         (["x", "x"], ["x__0", "x__1"]),
 33 |         (["x", "y"], ["x", "y"]),
 34 |         (["", "x", "y", "x"], ["__0", "x__1", "y", "x__3"]),
 35 |         ([""], ["__0"]),
 36 |         ([np.nan], ["__0"]),
 37 |         (
 38 |             ["__20", "a__33", "b", "", "a__2__34"],
 39 |             ["__0", "a__1", "b", "__3", "a__4"],
 40 |         ),
 41 |         (["a__1"], ["a"]),
 42 |         (["a__2", "a"], ["a__0", "a__1"]),
 43 |         (["a__3", "a", "a"], ["a__0", "a__1", "a__2"]),
 44 |         (["a__2", "a", "a"], ["a__0", "a__1", "a__2"]),
 45 |         (["a__2", "a__2", "a__2"], ["a__0", "a__1", "a__2"]),
 46 |         (
 47 |             ["__20", "a__1", "b", "", "a__2"],
 48 |             ["__0", "a__1", "b", "__3", "a__4"],
 49 |         ),
 50 |         (
 51 |             repair_names(["__20", "a__1", "b", "", "a__2"], repair="unique"),
 52 |             ["__0", "a__1", "b", "__3", "a__4"],
 53 |         ),
 54 |         (
 55 |             ["", "x", "", "y", "x", "_2", "__"],
 56 |             ["__0", "x__1", "__2", "y", "x__4", "__5", "__6"],
 57 |         ),
 58 |     ],
 59 | )
 60 | def test_unique(names, expect):
 61 |     assert repair_names(names, repair="unique") == expect
 62 | 
 63 | 
 64 | def test_unique_algebraic_y():
 65 |     x = ["__20", "a__1", "b", "", "a__2", "d"]
 66 |     y = ["", "a__3", "b", "__3", "e"]
 67 |     # fix names on each, catenate, fix the whole
 68 |     z1 = repair_names(
 69 |         repair_names(x, repair="unique") + repair_names(y, repair="unique"),
 70 |         repair="unique",
 71 |     )
 72 |     z2 = repair_names(repair_names(x, repair="unique") + y, repair="unique")
 73 |     z3 = repair_names(x + repair_names(y, repair="unique"), repair="unique")
 74 |     z4 = repair_names(x + y, repair="unique")
 75 |     assert z1 == z2 == z3 == z4
 76 | 
 77 | 
 78 | @pytest.mark.parametrize(
 79 |     "names,expect",
 80 |     [
 81 |         (list(ascii_letters), list(ascii_letters)),
 82 |         (
 83 |             [np.nan, "", "x", "x", "a1:", "_x_y}"],
 84 |             ["__0", "__1", "x__2", "x__3", "a1_", "_x_y_"],
 85 |         ),
 86 |         (
 87 |             repair_names(
 88 |                 [np.nan, "", "x", "x", "a1:", "_x_y}"], repair="universal"
 89 |             ),
 90 |             ["__0", "__1", "x__2", "x__3", "a1_", "_x_y_"],
 91 |         ),
 92 |         (["a", "b", "a", "c", "b"], ["a__0", "b__1", "a__2", "c", "b__4"]),
 93 |         ([""], ["__0"]),
 94 |         ([np.nan], ["__0"]),
 95 |         (["__"], ["__0"]),
 96 |         (["_"], ["_"]),
 97 |         (["_", "_"], ["___0", "___1"]),
 98 |         (["", "_"], ["__0", "_"]),
 99 |         (["", "", "_"], ["__0", "__1", "_"]),
100 |         (["_", "_", ""], ["___0", "___1", "__2"]),
101 |         (["_", "", "_"], ["___0", "__1", "___2"]),
102 |         (["", "_", ""], ["__0", "_", "__2"]),
103 |         (["__6", "__1__2"], ["__0", "__1"]),
104 |         (["if__2"], ["_if"]),
105 |         (
106 |             ["", "_", np.nan, "if__4", "if", "if__8", "for", "if){]1"],
107 |             [
108 |                 "__0",
109 |                 "_",
110 |                 "__2",
111 |                 "_if__3",
112 |                 "_if__4",
113 |                 "_if__5",
114 |                 "_for",
115 |                 "if___1",
116 |             ],
117 |         ),
118 |         (["a b", "b c"], ["a_b", "b_c"]),
119 |         (
120 |             ["", "_2", "_3", "__4", "___5", "____6", "_____7", "__"],
121 |             ["__0", "__1", "__2", "__3", "___5", "____6", "_____7", "__7"],
122 |         ),
123 |         (
124 |             repair_names(
125 |                 ["", "_2", "_3", "__4", "___5", "____6", "_____7", "__"],
126 |                 repair="unique",
127 |             ),
128 |             ["__0", "__1", "__2", "__3", "___5", "____6", "_____7", "__7"],
129 |         ),
130 |         (
131 |             [7, 4, 3, 6, 5, 1, 2, 8],
132 |             ["_7", "_4", "_3", "_6", "_5", "_1", "_2", "_8"],
133 |         ),
134 |         (
135 |             repair_names([7, 4, 3, 6, 5, 1, 2, 8], repair="unique"),
136 |             ["_7", "_4", "_3", "_6", "_5", "_1", "_2", "_8"],
137 |         ),
138 |     ],
139 | )
140 | def test_universal(names, expect):
141 |     assert repair_names(names, repair="universal") == expect
142 | 
143 | 
144 | def test_check_unique():
145 |     with pytest.raises(NameNonUniqueError):
146 |         repair_names([np.nan], repair="check_unique")
147 |     with pytest.raises(NameNonUniqueError):
148 |         repair_names([""], repair="check_unique")
149 |     with pytest.raises(NameNonUniqueError):
150 |         repair_names(["a", "a"], repair="check_unique")
151 |     with pytest.raises(NameNonUniqueError):
152 |         repair_names(["__1"], repair="check_unique")
153 |     with pytest.raises(NameNonUniqueError):
154 |         repair_names(["__"], repair="check_unique")
155 |     assert repair_names(["a", "b"], repair="check_unique") == ["a", "b"]
156 | 
157 | 
158 | def test_custom_repair():
159 |     def replace(names: Iterable[str]):
160 |         return ["a", "b", "c"]
161 | 
162 |     out = repair_names([1, 2, 3], repair=replace)
163 |     assert out == ["a", "b", "c"]
164 | 
165 |     with pytest.raises(ValueError):
166 |         repair_names([1, 2, 3], repair=1)
167 | 
168 |     out = repair_names(["a", "b", "c"], repair=str.upper)
169 |     assert out == ["A", "B", "C"]
170 | 
171 |     out = repair_names(["a", "b", "c"], repair=["x", "y", "z"])
172 |     assert out == ["x", "y", "z"]
173 | 


--------------------------------------------------------------------------------
/tests/test_options.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from datar.core.options import (
 3 |     options,
 4 |     options_context,
 5 |     add_option,
 6 |     get_option,
 7 | )
 8 | 
 9 | 
10 | @pytest.fixture(autouse=True)
11 | def reset_options():
12 |     opts = options()
13 |     add_option("x_y_z", True)
14 |     yield
15 |     options(opts)
16 | 
17 | 
18 | def test_options_empty_args_returns_full_options():
19 |     from datar.core.options import OPTIONS
20 |     out = options()
21 |     assert out == OPTIONS
22 | 
23 | 
24 | def test_options_with_names_only_selects_options():
25 |     out = options("x_y_z")
26 |     assert len(out) == 1
27 |     assert out["x_y_z"]
28 | 
29 | 
30 | def test_opts_with_names_nameval_pairs_mixed_rets_sel_opts_and_changes_option():
31 |     out = options(x_y_z=False, _return=True)
32 |     assert out == {"x_y_z": True}
33 |     assert not get_option("x.y.z")
34 | 
35 | 
36 | def test_options_with_dict_updates_options():
37 |     out = options({"x_y_z": True}, _return=True)
38 |     assert get_option("x_y_z")
39 |     assert out.x_y_z
40 | 
41 | 
42 | def test_options_context():
43 |     assert get_option("x_y_z")
44 |     with options_context(x_y_z=False):
45 |         assert not get_option("x_y_z")
46 | 
47 |     assert get_option("x_y_z")
48 | 


--------------------------------------------------------------------------------
/tests/test_plugin.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import numpy as np
  4 | from simplug import MultipleImplsForSingleResultHookWarning
  5 | from pipda import Context
  6 | from pipda.utils import MultiImplementationsWarning
  7 | from datar import f
  8 | from datar.core.plugin import plugin
  9 | from datar.core.operator import DatarOperator
 10 | 
 11 | 
 12 | class TestPlugin1:
 13 | 
 14 |     @plugin.impl
 15 |     def get_versions():
 16 |         return {"abc": "1.2.3"}
 17 | 
 18 |     @plugin.impl
 19 |     def load_dataset(name, metadata):
 20 |         return name * 2
 21 | 
 22 |     @plugin.impl
 23 |     def misc_api():
 24 |         from datar.apis.misc import array_ufunc
 25 | 
 26 |         @array_ufunc.register(object, backend="testplugin1")
 27 |         def _array_ufunc(x, ufunc, *args, kind, **kwargs):
 28 |             return ufunc([i * 3 for i in x], *args, **kwargs)
 29 | 
 30 |         return {"other_var": 1}
 31 | 
 32 |     @plugin.impl
 33 |     def operate(op, x, y=None):
 34 |         if op == "add":
 35 |             return x + y + x * y
 36 |         return None
 37 | 
 38 |     @plugin.impl
 39 |     def c_getitem(item):
 40 |         return item * 2
 41 | 
 42 | 
 43 | class TestPlugin2:
 44 | 
 45 |     @plugin.impl
 46 |     def load_dataset(name, metadata):
 47 |         return name * 3
 48 | 
 49 |     @plugin.impl
 50 |     def c_getitem(item):
 51 |         return item * 4
 52 | 
 53 |     @plugin.impl
 54 |     def operate(op, x, y=None):
 55 |         if op == "add":
 56 |             return x + y + 2 * x * y
 57 |         return None
 58 | 
 59 | 
 60 | def setup_function(function):
 61 |     plugin.register(TestPlugin1)
 62 |     plugin.register(TestPlugin2)
 63 |     plugin.get_plugin("testplugin1").disable()
 64 |     plugin.get_plugin("testplugin2").disable()
 65 | 
 66 | 
 67 | @pytest.fixture
 68 | def with_test_plugin1():
 69 |     plugin.get_plugin("testplugin1").enable()
 70 |     yield
 71 |     plugin.get_plugin("testplugin1").disable()
 72 | 
 73 | 
 74 | @pytest.fixture
 75 | def with_test_plugin2():
 76 |     plugin.get_plugin("testplugin2").enable()
 77 |     yield
 78 |     plugin.get_plugin("testplugin2").disable()
 79 | 
 80 | 
 81 | def test_get_versions(with_test_plugin1, capsys):
 82 |     from datar import get_versions
 83 |     assert get_versions(prnt=False)["abc"] == "1.2.3"
 84 | 
 85 |     get_versions()
 86 |     assert "datar" in capsys.readouterr().out
 87 | 
 88 | 
 89 | def test_misc_api(with_test_plugin1):
 90 |     from datar import all, misc
 91 |     plugin.hooks.misc_api()
 92 |     from importlib import reload
 93 |     reload(misc)
 94 |     assert misc.other_var == 1
 95 | 
 96 |     reload(all)
 97 |     from datar.all import other_var
 98 |     assert other_var == 1
 99 | 
100 | 
101 | def test_misc_api_array_ufunc(with_test_plugin1):
102 |     from datar import f
103 |     from datar.apis.misc import array_ufunc
104 | 
105 |     plugin.hooks.misc_api()
106 | 
107 |     with pytest.warns(MultiImplementationsWarning):
108 |         out = np.sqrt(f)._pipda_eval([3, 12, 27], Context.EVAL)
109 | 
110 |     assert out.tolist() == [3, 6, 9]
111 | 
112 |     with array_ufunc.with_backend("_default"):
113 |         out = np.sqrt(f)._pipda_eval([1, 4, 9], Context.EVAL)
114 | 
115 |     assert out.tolist() == [1, 2, 3]
116 | 
117 | 
118 | def test_load_dataset(with_test_plugin1, with_test_plugin2):
119 |     with pytest.warns(MultipleImplsForSingleResultHookWarning):
120 |         from datar.data import iris
121 | 
122 |     assert iris == "irisirisiris"
123 | 
124 |     from datar.data import load_dataset
125 |     assert load_dataset("iris", __backend="testplugin1") == "irisiris"
126 | 
127 | 
128 | def test_operate(with_test_plugin1):
129 | 
130 |     expr = f[0] + f[1]
131 |     assert expr._pipda_eval([3, 2], Context.EVAL) == 11
132 | 
133 | 
134 | def test_operate2(with_test_plugin1, with_test_plugin2):
135 |     expr = f[0] + f[1]
136 |     with pytest.warns(MultipleImplsForSingleResultHookWarning):
137 |         assert expr._pipda_eval([3, 2], Context.EVAL) == 17
138 | 
139 |     with DatarOperator.with_backend("testplugin1"):
140 |         assert expr._pipda_eval([3, 2], Context.EVAL) == 11
141 | 
142 |     with pytest.warns(MultipleImplsForSingleResultHookWarning):
143 |         assert expr._pipda_eval([3, 2], Context.EVAL) == 17
144 | 
145 | 
146 | def test_c_getitem(with_test_plugin1):
147 |     from datar.base import c
148 |     assert c[11] == 22
149 | 
150 | 
151 | def test_c_getitem2(with_test_plugin1, with_test_plugin2):
152 |     from datar.base import c
153 |     with pytest.warns(MultipleImplsForSingleResultHookWarning):
154 |         assert c[11] == 44
155 | 
156 |     with c.with_backend("testplugin1"):
157 |         assert c[11] == 22
158 | 
159 |     with pytest.warns(MultipleImplsForSingleResultHookWarning):
160 |         assert c[11] == 44
161 | 


--------------------------------------------------------------------------------
/tests/test_tibble.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa: F401
 2 | 
 3 | from datar.core.utils import NotImplementedByCurrentBackendError
 4 | from datar.tibble import (
 5 |     add_column,
 6 |     add_row,
 7 |     as_tibble,
 8 |     column_to_rownames,
 9 |     deframe,
10 |     enframe,
11 |     has_rownames,
12 |     remove_rownames,
13 |     rowid_to_column,
14 |     rownames_to_column,
15 |     tibble,
16 |     tibble_,
17 |     tibble_row,
18 |     tribble,
19 | )
20 | 
21 | 
22 | @pytest.mark.parametrize("verb, data, args, kwargs", [
23 |     (add_column, None, [1], None),
24 |     (add_row, None, [1], None),
25 |     (as_tibble, None, [], None),
26 |     (column_to_rownames, None, [], None),
27 |     (deframe, None, [], None),
28 |     (enframe, None, [], None),
29 |     (has_rownames, None, [], None),
30 |     (remove_rownames, None, [], None),
31 |     (rowid_to_column, None, ["x"], None),
32 |     (rownames_to_column, None, ["x"], None),
33 |     (tibble, None, [], None),
34 |     (tibble_, None, [], None),
35 |     (tibble_row, None, [], None),
36 |     (tribble, None, [], None),
37 | ])
38 | def test_default_impl(verb, data, args, kwargs):
39 |     kwargs = kwargs or {}
40 |     with pytest.raises(NotImplementedByCurrentBackendError):
41 |         verb(data, *args, **kwargs)
42 | 


--------------------------------------------------------------------------------
/tests/test_tidyr.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from datar.core.utils import NotImplementedByCurrentBackendError
 4 | from datar.tidyr import (
 5 |     chop,
 6 |     complete,
 7 |     crossing,
 8 |     drop_na,
 9 |     expand,
10 |     extract,
11 |     fill,
12 |     full_seq,
13 |     nest,
14 |     nesting,
15 |     pack,
16 |     pivot_longer,
17 |     pivot_wider,
18 |     replace_na,
19 |     separate,
20 |     separate_rows,
21 |     unchop,
22 |     uncount,
23 |     unite,
24 |     unnest,
25 |     unpack,
26 | )
27 | 
28 | 
29 | @pytest.mark.parametrize("verb, data, args, kwargs", [
30 |     (chop, None, [], None),
31 |     (complete, None, [], None),
32 |     (crossing, None, [], None),
33 |     (drop_na, None, [], None),
34 |     (expand, None, [], None),
35 |     (extract, None, [1, 1], None),
36 |     (fill, None, [], None),
37 |     (full_seq, None, [1], None),
38 |     (nest, None, [], None),
39 |     (nesting, None, [], None),
40 |     (pack, None, [], None),
41 |     (pivot_longer, None, [1], None),
42 |     (pivot_wider, None, [], None),
43 |     (replace_na, None, [], None),
44 |     (separate, None, [1, 1], None),
45 |     (separate_rows, None, [], None),
46 |     (unchop, None, [], None),
47 |     (uncount, None, [1], None),
48 |     (unite, None, [1], None),
49 |     (unnest, None, [], None),
50 |     (unpack, None, [1], None),
51 | ])
52 | def test_default_impl(verb, data, args, kwargs):
53 |     kwargs = kwargs or {}
54 |     with pytest.raises(NotImplementedByCurrentBackendError):
55 |         verb(data, *args, **kwargs)
56 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from datar.core.utils import arg_match
 3 | 
 4 | 
 5 | def test_arg_match():
 6 |     with pytest.raises(ValueError, match='abc'):
 7 |         arg_match('a', 'a', ['b', 'c'], errmsg='abc')
 8 |     with pytest.raises(ValueError, match='must be one of'):
 9 |         arg_match('a', 'a', ['b', 'c'])
10 | 
11 |     assert arg_match('a', 'a', ['a', 'b', 'c']) == 'a'
12 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | ignore = E203, W503, E731
 3 | per-file-ignores =
 4 |     # imported but unused
 5 |     __init__.py: F401, E402
 6 |     datar/all.py: F401, E402, F403, F811
 7 |     datar/apis/base.py: F401
 8 |     datar/apis/dplyr.py: F401
 9 |     datar/apis/forcats.py: F401
10 |     datar/apis/tidyr.py: F401
11 |     datar/forcats.py: F401, F403
12 |     datar/tidyr.py: F401, F403
13 |     datar/tibble.py: F401, F403
14 |     datar/base.py: F401, F402, F403, E402
15 |     datar/dplyr.py: F401, F402, F403, E402
16 |     datar/data/metadata.py: E501
17 |     tests/test_conflict_names.py: F401
18 | max-line-length = 81
19 | 


--------------------------------------------------------------------------------