├── .codesandbox ├── Dockerfile └── setup.sh ├── .coveragerc ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── feature_request.yml │ └── submit_question.yml └── workflows │ ├── ci.yml │ └── docs.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── datar ├── __init__.py ├── all.py ├── apis │ ├── __init__.py │ ├── base.py │ ├── dplyr.py │ ├── forcats.py │ ├── misc.py │ ├── tibble.py │ └── tidyr.py ├── base.py ├── core │ ├── __init__.py │ ├── defaults.py │ ├── load_plugins.py │ ├── names.py │ ├── operator.py │ ├── options.py │ ├── plugin.py │ └── utils.py ├── data │ ├── __init__.py │ ├── airlines.csv.gz │ ├── airports.csv.gz │ ├── airquality.csv.gz │ ├── anscombe.csv.gz │ ├── band_instruments.csv.gz │ ├── band_instruments2.csv.gz │ ├── band_members.csv.gz │ ├── billboard.csv.gz │ ├── chickweight.csv.gz │ ├── cms_patient_care.csv.gz │ ├── cms_patient_experience.csv.gz │ ├── construction.csv.gz │ ├── diamonds.csv.gz │ ├── economics.csv.gz │ ├── economics_long.csv.gz │ ├── faithful.csv.gz │ ├── faithfuld.csv.gz │ ├── fish_encounters.csv.gz │ ├── flights.csv.gz │ ├── gss_cat.csv.gz │ ├── household.csv.gz │ ├── iris.csv.gz │ ├── luv_colours.csv.gz │ ├── metadata.py │ ├── midwest.csv.gz │ ├── mpg.csv.gz │ ├── msleep.csv.gz │ ├── mtcars.csv.gz │ ├── planes.csv.gz │ ├── population.csv.gz │ ├── presidential.csv.gz │ ├── relig_income.csv.gz │ ├── seals.csv.gz │ ├── smiths.csv.gz │ ├── starwars.csv.gz │ ├── state_abb.csv.gz │ ├── state_division.csv.gz │ ├── state_region.csv.gz │ ├── storms.csv.gz │ ├── table1.csv.gz │ ├── table2.csv.gz │ ├── table3.csv.gz │ ├── table4a.csv.gz │ ├── table4b.csv.gz │ ├── table5.csv.gz │ ├── toothgrowth.csv.gz │ ├── txhousing.csv.gz │ ├── us_rent_income.csv.gz │ ├── warpbreaks.csv.gz │ ├── weather.csv.gz │ ├── who.csv.gz │ ├── who2.csv.gz │ └── world_bank_pop.csv.gz ├── datasets.py ├── dplyr.py ├── forcats.py ├── misc.py ├── tibble.py └── tidyr.py ├── docs ├── CHANGELOG.md ├── backends.md ├── data.md ├── f.md ├── favicon.png ├── func_factory.png ├── import.md ├── notebooks │ ├── across.ipynb │ ├── add_column.ipynb │ ├── add_row.ipynb │ ├── arrange.ipynb │ ├── base-arithmetic.ipynb │ ├── base-funs.ipynb │ ├── base.ipynb │ ├── between.ipynb │ ├── bind.ipynb │ ├── case_when.ipynb │ ├── chop.ipynb │ ├── coalesce.ipynb │ ├── complete.ipynb │ ├── context.ipynb │ ├── count.ipynb │ ├── cumall.ipynb │ ├── desc.ipynb │ ├── distinct.ipynb │ ├── drop_na.ipynb │ ├── enframe.ipynb │ ├── expand.ipynb │ ├── expand_grid.ipynb │ ├── extract.ipynb │ ├── fill.ipynb │ ├── filter-joins.ipynb │ ├── filter.ipynb │ ├── forcats_fct_multi.ipynb │ ├── forcats_lvl_addrm.ipynb │ ├── forcats_lvl_order.ipynb │ ├── forcats_lvl_value.ipynb │ ├── forcats_misc.ipynb │ ├── full_seq.ipynb │ ├── group_by.ipynb │ ├── group_map.ipynb │ ├── group_split.ipynb │ ├── group_trim.ipynb │ ├── lead-lag.ipynb │ ├── mutate-joins.ipynb │ ├── mutate.ipynb │ ├── n_distinct.ipynb │ ├── na_if.ipynb │ ├── nb_helpers.py │ ├── near.ipynb │ ├── nest-join.ipynb │ ├── nest.ipynb │ ├── nth.ipynb │ ├── other.ipynb │ ├── pack.ipynb │ ├── pivot_longer.ipynb │ ├── pivot_wider.ipynb │ ├── pull.ipynb │ ├── ranking.ipynb │ ├── readme.ipynb │ ├── recode.ipynb │ ├── relocate.ipynb │ ├── rename.ipynb │ ├── replace_na.ipynb │ ├── rownames.ipynb │ ├── rows.ipynb │ ├── rowwise.ipynb │ ├── select.ipynb │ ├── separate.ipynb │ ├── setops.ipynb │ ├── slice.ipynb │ ├── summarise.ipynb │ ├── tibble.ipynb │ ├── uncount.ipynb │ ├── unite.ipynb │ └── with_groups.ipynb ├── options.md ├── reference-maps │ ├── ALL.md │ ├── base.md │ ├── datasets.md │ ├── dplyr.md │ ├── forcats.md │ ├── other.md │ ├── stats.md │ ├── tibble.md │ ├── tidyr.md │ └── utils.md └── style.css ├── example.png ├── example2.png ├── mkdocs.yml ├── poetry.lock ├── pyproject.toml ├── setup.py ├── tests ├── __init__.py ├── conflict_names.py ├── conftest.py ├── test_array_ufunc.py ├── test_base.py ├── test_conflict_names.py ├── test_data.py ├── test_dplyr.py ├── test_forcats.py ├── test_names.py ├── test_options.py ├── test_plugin.py ├── test_tibble.py ├── test_tidyr.py └── test_utils.py └── tox.ini /.codesandbox/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.12 2 | 3 | RUN apt-get update && apt-get install -y npm fish && \ 4 | pip install -U pip && \ 5 | pip install poetry && \ 6 | poetry config virtualenvs.create false && \ 7 | chsh -s /usr/bin/fish -------------------------------------------------------------------------------- /.codesandbox/setup.sh: -------------------------------------------------------------------------------- 1 | WORKSPACE="/workspace" 2 | 3 | # Install python dependencies 4 | poetry update && poetry install 5 | 6 | cd $WORKSPACE 7 | 8 | # Install whichpy 9 | WHICHPY="https://gist.githubusercontent.com/pwwang/879966128b0408c2459eb0a0b413fa69/raw/2f2573d191edec1937a2bf0873aa33a646b5ef29/whichpy.fish" 10 | curl -sS $WHICHPY -o ~/.config/fish/functions/whichpy.fish 11 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | exclude_lines = 3 | pragma: no cover 4 | if TYPE_CHECKING: 5 | omit = 6 | datar/datasets.py 7 | */site-packages/* 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: Report incorrect behavior in the datar library 3 | title: "[BUG] " 4 | labels: [bug] 5 | 6 | body: 7 | - type: checkboxes 8 | id: checks 9 | attributes: 10 | label: datar version checks 11 | options: 12 | - label: > 13 | I have checked that this issue has not already been reported. 14 | required: true 15 | - label: > 16 | I have confirmed this bug exists on the 17 | **latest version** of datar and its backends. 18 | required: true 19 | - type: textarea 20 | id: problem 21 | attributes: 22 | label: Issue Description 23 | description: > 24 | Please provide a description of the issue shown in the reproducible example. 25 | validations: 26 | required: true 27 | - type: textarea 28 | id: expected-behavior 29 | attributes: 30 | label: Expected Behavior 31 | description: > 32 | Please describe or show a code example of the expected behavior. 33 | validations: 34 | required: true 35 | - type: textarea 36 | id: version 37 | attributes: 38 | label: Installed Versions 39 | description: > 40 | Please paste the output of ``datar.get_versions()`` 41 | value: > 42 |
43 | 44 | 45 | Replace this line with the output of datar.get_versions() 46 | 47 | 48 |
49 | validations: 50 | required: true 51 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature Request 2 | description: Suggest an idea for datar 3 | title: "[ENH] " 4 | labels: [enhancement] 5 | 6 | body: 7 | - type: checkboxes 8 | id: checks 9 | attributes: 10 | label: Feature Type 11 | description: Please check what type of feature request you would like to propose. 12 | options: 13 | - label: > 14 | Adding new functionality to datar 15 | - label: > 16 | Changing existing functionality in datar 17 | - label: > 18 | Removing existing functionality in datar 19 | - type: textarea 20 | id: description 21 | attributes: 22 | label: Problem Description 23 | description: > 24 | Please describe what problem the feature would solve, e.g. "I wish I could use datar to ..." 25 | placeholder: > 26 | I wish I could use datar to port the purrr package from R. 27 | validations: 28 | required: true 29 | - type: textarea 30 | id: feature 31 | attributes: 32 | label: Feature Description 33 | description: > 34 | Please describe how the new feature would be implemented, using psudocode if relevant. 35 | placeholder: > 36 | Add a new module `datar.purrr` with functions `map`, `map2`, `map_df`, etc. 37 | validations: 38 | required: true 39 | - type: textarea 40 | id: context 41 | attributes: 42 | label: Additional Context 43 | description: > 44 | Please provide any relevant GitHub issues, code examples or references that help describe and support 45 | the feature request. 46 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/submit_question.yml: -------------------------------------------------------------------------------- 1 | name: Submit Question 2 | description: Ask a general question about datar 3 | title: "[QST] " 4 | labels: [question] 5 | 6 | body: 7 | - type: textarea 8 | id: question 9 | attributes: 10 | label: Question about datar 11 | description: > 12 | Try to provide a clear and concise description of your question. 13 | placeholder: | 14 | ```python 15 | # Your code here, if applicable 16 | 17 | ``` 18 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | release: 7 | types: [published] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: [3.9, "3.10", "3.11", "3.12"] 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Setup Python # Set Python version 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | python -m pip install poetry 26 | poetry config virtualenvs.create false 27 | # poetry install -v 28 | python -m pip install . 29 | python -m pip install flake8 pytest pytest-cov six numpy python-slugify 30 | - name: Run flake8 31 | run: flake8 datar 32 | - name: Test with pytest 33 | run: poetry run pytest tests/ --junitxml=junit/test-results-${{ matrix.python-version }}.xml 34 | - name: Upload pytest test results 35 | uses: actions/upload-artifact@v4 36 | with: 37 | name: pytest-results-${{ matrix.python-version }} 38 | path: junit/test-results-${{ matrix.python-version }}.xml 39 | # Use always() to always run this step to publish test results when there are test failures 40 | if: ${{ always() }} 41 | - name: Run codacy-coverage-reporter 42 | uses: codacy/codacy-coverage-reporter-action@master 43 | if: matrix.python-version == 3.12 44 | with: 45 | project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} 46 | coverage-reports: cov.xml 47 | 48 | deploy: 49 | needs: build 50 | runs-on: ubuntu-latest 51 | if: github.event_name == 'release' 52 | strategy: 53 | matrix: 54 | python-version: ["3.12"] 55 | steps: 56 | - uses: actions/checkout@v4 57 | - name: Setup Python # Set Python version 58 | uses: actions/setup-python@v5 59 | with: 60 | python-version: ${{ matrix.python-version }} 61 | - name: Install dependencies 62 | run: | 63 | python -m pip install --upgrade pip 64 | python -m pip install poetry 65 | - name: Publish to PyPI 66 | run: poetry publish --build -u ${{ secrets.PYPI_USER }} -p ${{ secrets.PYPI_PASSWORD }} 67 | if: success() 68 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Build Docs 2 | 3 | on: [push] 4 | 5 | jobs: 6 | docs: 7 | runs-on: ubuntu-latest 8 | # if: github.ref == 'refs/heads/master' 9 | strategy: 10 | matrix: 11 | python-version: ["3.12"] 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Setup Python # Set Python version 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | - name: Install dependencies 19 | run: | 20 | python -m pip install poetry 21 | poetry config virtualenvs.create false 22 | poetry install -v --with docs 23 | python -m pip install --upgrade pip 24 | # Can't skip optional deps with poetry install -v 25 | # poetry install -v 26 | python -m pip install . 27 | - name: Build docs 28 | run: | 29 | # python -m pip install -r docs/requirements.txt 30 | python -m ipykernel install --user --name python --display-name python 31 | python -m ipykernel install --user --name python3 --display-name python3 32 | cd docs 33 | cp ../README.md index.md 34 | cp ../example.png example.png 35 | cp ../example2.png example2.png 36 | # cp ../logo.png logo.png 37 | cd .. 38 | mkdocs build 39 | if : success() 40 | - name: Deploy docs 41 | run: | 42 | mkdocs gh-deploy --clean --force 43 | # if: success() && github.ref == 'refs/heads/master' 44 | 45 | fix-index: 46 | needs: docs 47 | runs-on: ubuntu-latest 48 | # if: github.ref == 'refs/heads/master' 49 | strategy: 50 | matrix: 51 | python-version: ["3.12"] 52 | steps: 53 | - uses: actions/checkout@v4 54 | with: 55 | ref: gh-pages 56 | - name: Fix index.html 57 | run: | 58 | echo ':: head of index.html - before ::' 59 | head index.html 60 | sed -i '1,5{/^$/d}' index.html 61 | echo ':: head of index.html - after ::' 62 | head index.html 63 | if: success() 64 | - name: Commit changes 65 | run: | 66 | git config --local user.email "action@github.com" 67 | git config --local user.name "GitHub Action" 68 | git commit -m "Add changes" -a 69 | if: success() 70 | - name: Push changes 71 | uses: ad-m/github-push-action@master 72 | with: 73 | github_token: ${{ secrets.GITHUB_TOKEN }} 74 | branch: gh-pages 75 | if: success() 76 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | .coverage.xml 46 | cov.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # IPython Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # dotenv 81 | .env 82 | 83 | # virtualenv 84 | venv/ 85 | ENV/ 86 | 87 | # Spyder project settings 88 | .spyderproject 89 | 90 | # Rope project settings 91 | .ropeproject 92 | 93 | workdir/ 94 | node_modules/ 95 | _book/ 96 | .vscode 97 | export/ 98 | *.svg 99 | *.dot 100 | *.queue.txt 101 | site/ 102 | 103 | # poetry 104 | # poetry.lock 105 | 106 | # backup files 107 | *.bak 108 | 109 | docs/index.md 110 | docs/logo.png 111 | docs/example.png 112 | docs/example2.png 113 | docs/api/ 114 | docs/*.nbconvert.ipynb 115 | docs/*/*.nbconvert.ipynb 116 | 117 | # vscode's local history extension 118 | .history/ 119 | 120 | # For quick test 121 | /_t.py 122 | /_t.ipynb 123 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | fail_fast: true 2 | repos: 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: 5df1a4bf6f04a1ed3a643167b38d502575e29aef 5 | hooks: 6 | - id: trailing-whitespace 7 | - id: end-of-file-fixer 8 | - id: check-yaml 9 | exclude: 'mkdocs.yml' 10 | - repo: local 11 | hooks: 12 | - id: flake8 13 | name: Run flake8 14 | files: ^datar/.+$ 15 | pass_filenames: false 16 | entry: flake8 17 | args: [datar] 18 | types: [python] 19 | language: system 20 | - id: versionchecker 21 | name: Check version agreement in pyproject and __version__ 22 | entry: bash -c 23 | language: system 24 | args: 25 | - get_ver() { echo $(egrep "^__version|^version" $1 | cut -d= -f2 | sed 's/\"\| //g'); }; 26 | v1=`get_ver pyproject.toml`; 27 | v2=`get_ver datar/__init__.py`; 28 | if [[ $v1 == $v2 ]]; then exit 0; else exit 1; fi 29 | pass_filenames: false 30 | files: ^pyproject\.toml|datar/__init__\.py$ 31 | - id: pytest 32 | name: Run pytest 33 | entry: pytest 34 | language: system 35 | args: [tests/] 36 | pass_filenames: false 37 | files: ^tests/.+$|^datar/.+$ 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 pwwang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datar 2 | 3 | A Grammar of Data Manipulation in python 4 | 5 | 6 | [![Pypi][6]][7] [![Github][8]][9] ![Building][10] [![Docs and API][11]][5] [![Codacy][12]][13] [![Codacy coverage][14]][13] [![Downloads][20]][7] 7 | 8 | [Documentation][5] | [Reference Maps][15] | [Notebook Examples][16] | [API][17] 9 | 10 | `datar` is a re-imagining of APIs for data manipulation in python with multiple backends supported. Those APIs are aligned with tidyverse packages in R as much as possible. 11 | 12 | ## Installation 13 | 14 | ```shell 15 | pip install -U datar 16 | 17 | # install with a backend 18 | pip install -U datar[pandas] 19 | 20 | # More backends support coming soon 21 | ``` 22 | 23 | 28 | 29 | ## Backends 30 | 31 | |Repo|Badges| 32 | |-|-| 33 | |[datar-numpy][1]|![3] ![18]| 34 | |[datar-pandas][2]|![4] ![19]| 35 | |[datar-arrow][22]|![23] ![24]| 36 | 37 | ## Example usage 38 | 39 | ```python 40 | # with pandas backend 41 | from datar import f 42 | from datar.dplyr import mutate, filter_, if_else 43 | from datar.tibble import tibble 44 | # or 45 | # from datar.all import f, mutate, filter_, if_else, tibble 46 | 47 | df = tibble( 48 | x=range(4), # or c[:4] (from datar.base import c) 49 | y=['zero', 'one', 'two', 'three'] 50 | ) 51 | df >> mutate(z=f.x) 52 | """# output 53 | x y z 54 | 55 | 0 0 zero 0 56 | 1 1 one 1 57 | 2 2 two 2 58 | 3 3 three 3 59 | """ 60 | 61 | df >> mutate(z=if_else(f.x>1, 1, 0)) 62 | """# output: 63 | x y z 64 | 65 | 0 0 zero 0 66 | 1 1 one 0 67 | 2 2 two 1 68 | 3 3 three 1 69 | """ 70 | 71 | df >> filter_(f.x>1) 72 | """# output: 73 | x y 74 | 75 | 0 2 two 76 | 1 3 three 77 | """ 78 | 79 | df >> mutate(z=if_else(f.x>1, 1, 0)) >> filter_(f.z==1) 80 | """# output: 81 | x y z 82 | 83 | 0 2 two 1 84 | 1 3 three 1 85 | """ 86 | ``` 87 | 88 | ```python 89 | # works with plotnine 90 | # example grabbed from https://github.com/has2k1/plydata 91 | import numpy 92 | from datar import f 93 | from datar.base import sin, pi 94 | from datar.tibble import tibble 95 | from datar.dplyr import mutate, if_else 96 | from plotnine import ggplot, aes, geom_line, theme_classic 97 | 98 | df = tibble(x=numpy.linspace(0, 2 * pi, 500)) 99 | ( 100 | df 101 | >> mutate(y=sin(f.x), sign=if_else(f.y >= 0, "positive", "negative")) 102 | >> ggplot(aes(x="x", y="y")) 103 | + theme_classic() 104 | + geom_line(aes(color="sign"), size=1.2) 105 | ) 106 | ``` 107 | 108 | ![example](./example.png) 109 | 110 | ```python 111 | # very easy to integrate with other libraries 112 | # for example: klib 113 | import klib 114 | from pipda import register_verb 115 | from datar import f 116 | from datar.data import iris 117 | from datar.dplyr import pull 118 | 119 | dist_plot = register_verb(func=klib.dist_plot) 120 | iris >> pull(f.Sepal_Length) >> dist_plot() 121 | ``` 122 | 123 | ![example](./example2.png) 124 | 125 | ## Testimonials 126 | 127 | [@coforfe](https://github.com/coforfe): 128 | > Thanks for your excellent package to port R (`dplyr`) flow of processing to Python. I have been using other alternatives, and yours is the one that offers the most extensive and equivalent to what is possible now with `dplyr`. 129 | 130 | [1]: https://github.com/pwwang/datar-numpy 131 | [2]: https://github.com/pwwang/datar-pandas 132 | [3]: https://img.shields.io/codacy/coverage/0a7519dad44246b6bab30576895f6766?style=flat-square 133 | [4]: https://img.shields.io/codacy/coverage/45f4ea84ae024f1a8cf84be54dd144f7?style=flat-square 134 | [5]: https://pwwang.github.io/datar/ 135 | [6]: https://img.shields.io/pypi/v/datar?style=flat-square 136 | [7]: https://pypi.org/project/datar/ 137 | [8]: https://img.shields.io/github/v/tag/pwwang/datar?style=flat-square 138 | [9]: https://github.com/pwwang/datar 139 | [10]: https://img.shields.io/github/actions/workflow/status/pwwang/datar/ci.yml?branch=master&style=flat-square 140 | [11]: https://img.shields.io/github/actions/workflow/status/pwwang/datar/docs.yml?branch=master&style=flat-square 141 | [12]: https://img.shields.io/codacy/grade/3d9bdff4d7a34bdfb9cd9e254184cb35?style=flat-square 142 | [13]: https://app.codacy.com/gh/pwwang/datar 143 | [14]: https://img.shields.io/codacy/coverage/3d9bdff4d7a34bdfb9cd9e254184cb35?style=flat-square 144 | [15]: https://pwwang.github.io/datar/reference-maps/ALL/ 145 | [16]: https://pwwang.github.io/datar/notebooks/across/ 146 | [17]: https://pwwang.github.io/datar/api/datar/ 147 | [18]: https://img.shields.io/pypi/v/datar-numpy?style=flat-square 148 | [19]: https://img.shields.io/pypi/v/datar-pandas?style=flat-square 149 | [20]: https://img.shields.io/pypi/dm/datar?style=flat-square 150 | [21]: https://github.com/tidyverse/dplyr 151 | [22]: https://github.com/pwwang/datar-arrow 152 | [23]: https://img.shields.io/codacy/coverage/5f4ef9dd2503437db18786ff9e841d8b?style=flat-square 153 | [24]: https://img.shields.io/pypi/v/datar-arrow?style=flat-square 154 | -------------------------------------------------------------------------------- /datar/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Mapping as _Mapping 2 | 3 | from .core import operator as _ 4 | from .core.defaults import f 5 | from .core.options import options, get_option, options_context 6 | 7 | __version__ = "0.15.9" 8 | 9 | 10 | def get_versions(prnt: bool = True) -> _Mapping[str, str]: 11 | """Return/Print the versions of the dependencies. 12 | 13 | Args: 14 | prnt: If True, print the versions, otherwise return them. 15 | 16 | Returns: 17 | A dict of the versions of the dependencies if `prnt` is False. 18 | """ 19 | import sys 20 | import executing 21 | import pipda 22 | import simplug 23 | from .core.load_plugins import plugin 24 | 25 | versions = { 26 | "python": sys.version, 27 | "datar": __version__, 28 | "simplug": simplug.__version__, 29 | "executing": executing.__version__, 30 | "pipda": pipda.__version__, 31 | } 32 | 33 | versions_plg = plugin.hooks.get_versions() 34 | versions.update(versions_plg) 35 | 36 | if not prnt: 37 | return versions 38 | 39 | keylen = max(map(len, versions)) 40 | for key in versions: 41 | ver = versions[key] 42 | verlines = ver.splitlines() 43 | print(f"{key.ljust(keylen)}: {verlines.pop(0)}") 44 | for verline in verlines: # pragma: no cover 45 | print(f"{' ' * keylen} {verline}") 46 | 47 | return None 48 | -------------------------------------------------------------------------------- /datar/all.py: -------------------------------------------------------------------------------- 1 | """Import all constants, verbs and functions""" 2 | 3 | from .core import load_plugins as _ 4 | from .core.defaults import f 5 | 6 | from .base import _conflict_names as _base_conflict_names 7 | from .dplyr import _conflict_names as _dplyr_conflict_names 8 | 9 | from .base import * 10 | from .dplyr import * 11 | from .forcats import * 12 | from .tibble import * 13 | from .tidyr import * 14 | from .misc import * 15 | 16 | __all__ = [key for key in locals() if not key.startswith("_")] 17 | 18 | if get_option("allow_conflict_names"): # noqa: F405 19 | __all__.extend(_base_conflict_names | _dplyr_conflict_names) 20 | for name in _base_conflict_names | _dplyr_conflict_names: 21 | locals()[name] = locals()[name + "_"] 22 | 23 | 24 | def __getattr__(name): 25 | """Even when allow_conflict_names is False, datar.base.sum should be fine 26 | """ 27 | if name in _base_conflict_names | _dplyr_conflict_names: 28 | import sys 29 | import ast 30 | from executing import Source 31 | node = Source.executing(sys._getframe(1)).node 32 | if isinstance(node, (ast.Call, ast.Attribute)): 33 | # import datar.all as d 34 | # d.sum(...) or getattr(d, "sum")(...) 35 | return globals()[name + "_"] 36 | 37 | raise AttributeError 38 | -------------------------------------------------------------------------------- /datar/apis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/apis/__init__.py -------------------------------------------------------------------------------- /datar/apis/misc.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | 3 | from pipda import register_func 4 | 5 | 6 | @contextmanager 7 | def _array_ufunc_with_backend(backend: str): 8 | """Use a backend for the operator""" 9 | old_backend = array_ufunc.backend 10 | array_ufunc.backend = backend 11 | yield 12 | array_ufunc.backend = old_backend 13 | 14 | 15 | @register_func(cls=object, dispatchable="first") 16 | def array_ufunc(x, ufunc, *args, kind, **kwargs): 17 | """Implement the array ufunc 18 | 19 | Allow other backends to override the behavior of the ufunc on 20 | different types of data. 21 | """ 22 | return ufunc(x, *args, **kwargs) 23 | 24 | 25 | array_ufunc.backend = None 26 | array_ufunc.with_backend = _array_ufunc_with_backend 27 | -------------------------------------------------------------------------------- /datar/apis/tibble.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations as _ 2 | from typing import Any, Callable as _Callable 3 | 4 | from pipda import ( 5 | register_verb as _register_verb, 6 | register_func as _register_func, 7 | ) 8 | 9 | from ..core.utils import ( 10 | NotImplementedByCurrentBackendError as _NotImplementedByCurrentBackendError, 11 | ) 12 | 13 | 14 | @_register_func(plain=True) 15 | def tibble( 16 | *args, 17 | _name_repair: str | _Callable = "check_unique", 18 | _rows: int = None, 19 | _dtypes=None, 20 | _drop_index: bool = False, 21 | _index=None, 22 | **kwargs, 23 | ) -> Any: 24 | """Constructs a data frame 25 | 26 | Args: 27 | *args: and 28 | **kwargs: A set of name-value pairs. 29 | _name_repair: treatment of problematic column names: 30 | - "minimal": No name repair or checks, beyond basic existence, 31 | - "unique": Make sure names are unique and not empty, 32 | - "check_unique": (default value), no name repair, 33 | but check they are unique, 34 | - "universal": Make the names unique and syntactic 35 | - a function: apply custom name repair 36 | _rows: Number of rows of a 0-col dataframe when args and kwargs are 37 | not provided. When args or kwargs are provided, this is ignored. 38 | _dtypes: The dtypes for each columns to convert to. 39 | _drop_index: Whether drop the index for the final data frame 40 | _index: The new index of the output frame 41 | 42 | Returns: 43 | A constructed tibble 44 | """ 45 | raise _NotImplementedByCurrentBackendError("tibble") 46 | 47 | 48 | @_register_func(pipeable=True, dispatchable=True) 49 | def tibble_( 50 | *args, 51 | _name_repair: str | _Callable = "check_unique", 52 | _rows: int = None, 53 | _dtypes=None, 54 | _drop_index: bool = False, 55 | _index=None, 56 | **kwargs, 57 | ) -> Any: 58 | raise _NotImplementedByCurrentBackendError("tibble_") 59 | 60 | 61 | @_register_func(plain=True) 62 | def tribble( 63 | *dummies, 64 | _name_repair: str | _Callable = "minimal", 65 | _dtypes=None, 66 | ) -> Any: 67 | """Create dataframe using an easier to read row-by-row layout 68 | Unlike original API that uses formula (`f.col`) to indicate the column 69 | names, we use `f.col` to indicate them. 70 | 71 | Args: 72 | *dummies: Arguments specifying the structure of a dataframe 73 | Variable names should be specified with `f.name` 74 | _dtypes: The dtypes for each columns to convert to. 75 | 76 | Examples: 77 | >>> tribble( 78 | >>> f.colA, f.colB, 79 | >>> "a", 1, 80 | >>> "b", 2, 81 | >>> "c", 3, 82 | >>> ) 83 | 84 | Returns: 85 | A dataframe 86 | """ 87 | raise _NotImplementedByCurrentBackendError("tribble") 88 | 89 | 90 | @_register_func(plain=True) 91 | def tibble_row( 92 | *args, 93 | _name_repair: str | _Callable = "check_unique", 94 | _dtypes=None, 95 | **kwargs, 96 | ) -> Any: 97 | """Constructs a data frame that is guaranteed to occupy one row. 98 | Scalar values will be wrapped with `[]` 99 | Args: 100 | *args: and 101 | **kwargs: A set of name-value pairs. 102 | _name_repair: treatment of problematic column names: 103 | - "minimal": No name repair or checks, beyond basic existence, 104 | - "unique": Make sure names are unique and not empty, 105 | - "check_unique": (default value), no name repair, 106 | but check they are unique, 107 | - "universal": Make the names unique and syntactic 108 | - a function: apply custom name repair 109 | Returns: 110 | A constructed dataframe 111 | """ 112 | raise _NotImplementedByCurrentBackendError("tibble_row") 113 | 114 | 115 | @_register_verb() 116 | def as_tibble(df) -> Any: 117 | """Convert a DataFrame object to Tibble object""" 118 | raise _NotImplementedByCurrentBackendError("as_tibble", df) 119 | 120 | 121 | @_register_verb() 122 | def enframe(x, name="name", value="value") -> Any: 123 | """Converts mappings or lists to one- or two-column data frames. 124 | 125 | Args: 126 | x: a list, a dictionary or a dataframe with one or two columns 127 | name: and 128 | value: value Names of the columns that store the names and values. 129 | If `None`, a one-column dataframe is returned. 130 | `value` cannot be `None` 131 | 132 | Returns: 133 | A data frame with two columns if `name` is not None (default) or 134 | one-column otherwise. 135 | """ 136 | raise _NotImplementedByCurrentBackendError("enframe", x) 137 | 138 | 139 | @_register_verb() 140 | def deframe(x) -> Any: 141 | """Converts two-column data frames to a dictionary 142 | using the first column as name and the second column as value. 143 | If the input has only one column, a list. 144 | 145 | Args: 146 | x: A data frame. 147 | 148 | Returns: 149 | A dictionary or a list if only one column in the data frame. 150 | """ 151 | raise _NotImplementedByCurrentBackendError("deframe", x) 152 | 153 | 154 | @_register_verb() 155 | def add_row( 156 | _data, 157 | *args, 158 | _before=None, 159 | _after=None, 160 | **kwargs, 161 | ) -> Any: 162 | """Add one or more rows of data to an existing data frame. 163 | 164 | Aliases `add_case` 165 | 166 | Args: 167 | _data: Data frame to append to. 168 | *args: and 169 | **kwargs: Name-value pairs to add to the data frame. 170 | _before: and 171 | _after: row index where to add the new rows. 172 | (default to add after the last row) 173 | 174 | Returns: 175 | The dataframe with the added rows 176 | 177 | """ 178 | raise _NotImplementedByCurrentBackendError("add_row", _data) 179 | 180 | 181 | @_register_verb() 182 | def add_column( 183 | _data, 184 | *args, 185 | _before=None, 186 | _after=None, 187 | _name_repair="check_unique", 188 | _dtypes=None, 189 | **kwargs, 190 | ) -> Any: 191 | """Add one or more columns to an existing data frame. 192 | 193 | Args: 194 | _data: Data frame to append to 195 | *args: and 196 | **kwargs: Name-value pairs to add to the data frame 197 | _before: and 198 | _after: Column index or name where to add the new columns 199 | (default to add after the last column) 200 | _dtypes: The dtypes for the new columns, either a uniform dtype or a 201 | dict of dtypes with keys the column names 202 | 203 | Returns: 204 | The dataframe with the added columns 205 | """ 206 | raise _NotImplementedByCurrentBackendError("add_column", _data) 207 | 208 | 209 | @_register_verb() 210 | def has_rownames(_data) -> bool: 211 | """Detect if a data frame has row names 212 | 213 | Aliases `has_index` 214 | 215 | Args: 216 | _data: The data frame to check 217 | 218 | Returns: 219 | True if the data frame has index otherwise False. 220 | 221 | """ 222 | raise _NotImplementedByCurrentBackendError("has_rownames", _data) 223 | 224 | 225 | @_register_verb() 226 | def remove_rownames(_data) -> Any: 227 | """Remove the index/rownames of a data frame 228 | 229 | Aliases `remove_index`, `drop_index`, `remove_rownames` 230 | 231 | Args: 232 | _data: The data frame 233 | 234 | Returns: 235 | The data frame with index removed 236 | 237 | """ 238 | raise _NotImplementedByCurrentBackendError("remove_rownames", _data) 239 | 240 | 241 | @_register_verb() 242 | def rownames_to_column(_data, var="rowname") -> Any: 243 | """Add rownames as a column 244 | 245 | Aliases `index_to_column` 246 | 247 | Args: 248 | _data: The data frame 249 | var: The name of the column 250 | 251 | Returns: 252 | The data frame with rownames added as one column. Note that the 253 | original index is removed. 254 | """ 255 | raise _NotImplementedByCurrentBackendError("rownames_to_column", _data) 256 | 257 | 258 | @_register_verb() 259 | def rowid_to_column(_data, var="rowid") -> Any: 260 | """Add rownames as a column 261 | 262 | Args: 263 | _data: The data frame 264 | var: The name of the column 265 | 266 | Returns: 267 | The data frame with row ids added as one column. 268 | 269 | """ 270 | raise _NotImplementedByCurrentBackendError("rowid_to_column", _data) 271 | 272 | 273 | @_register_verb() 274 | def column_to_rownames(_data, var="rowname") -> Any: 275 | """Set rownames/index with one column, and remove it 276 | 277 | Aliases `column_to_index` 278 | 279 | Args: 280 | _data: The data frame 281 | var: The column to conver to the rownames 282 | 283 | Returns: 284 | The data frame with the column converted to rownames 285 | """ 286 | raise _NotImplementedByCurrentBackendError("column_to_rownames", _data) 287 | 288 | 289 | # aliases 290 | add_case = add_row 291 | has_index = has_rownames 292 | remove_index = drop_index = remove_rownames 293 | index_to_column = rownames_to_column 294 | column_to_index = column_to_rownames 295 | -------------------------------------------------------------------------------- /datar/base.py: -------------------------------------------------------------------------------- 1 | 2 | from .core.load_plugins import plugin as _plugin 3 | from .apis.base import * 4 | 5 | locals().update(_plugin.hooks.base_api()) 6 | __all__ = [key for key in locals() if not key.startswith("_")] 7 | _conflict_names = {"min", "max", "sum", "abs", "round", "all", "any", "re"} 8 | 9 | if get_option("allow_conflict_names"): # noqa: F405 10 | __all__.extend(_conflict_names) 11 | for name in _conflict_names: 12 | locals()[name] = locals()[name + "_"] 13 | 14 | 15 | def __getattr__(name): 16 | """Even when allow_conflict_names is False, datar.base.sum should be fine 17 | """ 18 | if name in _conflict_names: 19 | import sys 20 | import ast 21 | from executing import Source 22 | node = Source.executing(sys._getframe(1)).node 23 | if isinstance(node, (ast.Call, ast.Attribute)): 24 | # import datar.base as d 25 | # d.sum(...) 26 | return globals()[name + "_"] 27 | 28 | raise AttributeError 29 | -------------------------------------------------------------------------------- /datar/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/core/__init__.py -------------------------------------------------------------------------------- /datar/core/defaults.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from pipda import Symbolic 4 | 5 | f = Symbolic() 6 | 7 | OPTION_FILE_HOME = Path("~/.datar.toml").expanduser() 8 | OPTION_FILE_CWD = Path("./.datar.toml").resolve() 9 | -------------------------------------------------------------------------------- /datar/core/load_plugins.py: -------------------------------------------------------------------------------- 1 | from pipda import register_array_ufunc 2 | 3 | from .options import get_option 4 | from .plugin import plugin 5 | 6 | 7 | def _array_ufunc_to_register(ufunc, x, *args, kind, **kwargs): 8 | """Register the array ufunc to pipda""" 9 | from ..apis.misc import array_ufunc 10 | 11 | return array_ufunc( 12 | x, 13 | ufunc, 14 | *args, 15 | kind=kind, 16 | **kwargs, 17 | __backend=array_ufunc.backend, 18 | ) 19 | 20 | 21 | plugin.load_entrypoints(only=get_option("backends")) 22 | 23 | plugin.hooks.setup() 24 | register_array_ufunc(_array_ufunc_to_register) 25 | -------------------------------------------------------------------------------- /datar/core/names.py: -------------------------------------------------------------------------------- 1 | """Name repairing""" 2 | import inspect 3 | import re 4 | import keyword 5 | import math 6 | from numbers import Number 7 | from typing import Any, Callable, List, Union, Iterable, Tuple 8 | 9 | from .utils import logger 10 | 11 | 12 | class NameNonUniqueError(ValueError): 13 | """Error for non-unique names""" 14 | 15 | 16 | def _isnan(x: Any) -> bool: 17 | """Check if x is nan""" 18 | return isinstance(x, Number) and math.isnan(x) 19 | 20 | 21 | def _is_scalar(x: Any) -> bool: 22 | """Check if x is scalar""" 23 | if isinstance(x, str): # pragma: no cover 24 | return True 25 | try: 26 | iter(x) 27 | except TypeError: 28 | return True 29 | return False 30 | 31 | 32 | def _log_changed_names(changed_names: List[Tuple[str, str]]) -> None: 33 | """Log the changed names""" 34 | if not changed_names: 35 | return 36 | 37 | logger.warning("New names:") 38 | for orig_name, new_name in changed_names: 39 | logger.warning("* %r -> %r", orig_name, new_name) 40 | 41 | 42 | def _repair_names_minimal(names: Iterable[str]) -> List[str]: 43 | """Minimal repairing""" 44 | return ["" if name is None or _isnan(name) else str(name) for name in names] 45 | 46 | 47 | def _repair_names_unique( 48 | names: Iterable[str], 49 | quiet: bool = False, 50 | sanitizer: Callable = None, 51 | ) -> List[str]: 52 | """Make sure names are unique""" 53 | min_names = _repair_names_minimal(names) 54 | neat_names = [ 55 | re.sub(r"(?:(? 1 or neat_name == "": 66 | neat_name = f"{neat_name}__{i}" 67 | if neat_name != name: 68 | changed_names.append((name, neat_name)) 69 | new_names.append(neat_name) 70 | if not quiet: 71 | _log_changed_names(changed_names) 72 | return new_names 73 | 74 | 75 | def _repair_names_universal( 76 | names: Iterable[str], 77 | quiet: bool = False, 78 | ) -> List[str]: 79 | """Make sure names are safely to be used as variable or attribute""" 80 | min_names = _repair_names_minimal(names) 81 | neat_names = [re.sub(r"[^\w]", "_", name) for name in min_names] 82 | new_names = _repair_names_unique( 83 | neat_names, 84 | quiet=True, 85 | sanitizer=lambda name: ( 86 | f"_{name}" 87 | if keyword.iskeyword(name) or (name and name[0].isdigit()) 88 | else name 89 | ), 90 | ) 91 | if not quiet: 92 | changed_names = [ 93 | (orig_name, new_name) 94 | for orig_name, new_name in zip(names, new_names) 95 | if orig_name != new_name 96 | ] 97 | _log_changed_names(changed_names) 98 | return new_names 99 | 100 | 101 | def _repair_names_check_unique(names: Iterable[str]) -> Iterable[str]: 102 | """Just check the uniqueness""" 103 | for name in names: 104 | if names.count(name) > 1: 105 | raise NameNonUniqueError(f"Names must be unique: {name}") 106 | if name == "" or _isnan(name): 107 | raise NameNonUniqueError(f"Names can't be empty: {name}") 108 | if re.search(r"(?:(? List[str]: 127 | """Repair names based on the method 128 | 129 | Args: 130 | names: The names to be repaired 131 | repair: The method to repair 132 | - `minimal`: Minimal names are never None or NA. 133 | When an element doesn't have a name, its minimal name 134 | is an empty string. 135 | - `unique`: Unique names are unique. A suffix is appended to 136 | duplicate names to make them unique. 137 | - `universal`: Universal names are unique and syntactic, 138 | meaning that you can safely use the names as variables without 139 | causing a syntax error (like `f.`). 140 | - A function, accepts either a list of names or a single name. 141 | Function accepts a list of names must annotate the first 142 | argument with `typing.Iterable` or `typing.Sequence`. 143 | 144 | Examples: 145 | >>> repair_names([None]*3, repair="minimal") 146 | >>> # ["", "", ""] 147 | >>> repair_names(["x", NA], repair="minimal") 148 | >>> # ["x", ""] 149 | >>> repair_names(["", "x", "", "y", "x", "_2", "__"], repair="unique") 150 | >>> # ["__1", "x__2", "__3", "y", "x__5", "__6", "__7"] 151 | >>> repair_names(["", "x", NA, "x"], repair="universal") 152 | >>> # ["__1", "x__2", "__3", "x__4"] 153 | >>> repair_names(["(y)" "_z" ".2fa" "False"], repair="universal") 154 | >>> # ["_y_", "_z", "_2fa", "_False"] 155 | 156 | Returns: 157 | The repaired names 158 | 159 | Raises: 160 | ValueError: when repair is not a string or callable 161 | NameNonUniqueError: when check_unique fails 162 | """ 163 | if isinstance(repair, str): 164 | repair = BUILTIN_REPAIR_METHODS[repair] # type: ignore 165 | elif ( 166 | not _is_scalar(repair) 167 | and all(isinstance(elem, str) for elem in repair) 168 | ): 169 | return repair # type: ignore 170 | elif not callable(repair): 171 | raise ValueError("Expect a function for name repairing.") 172 | 173 | parameters = inspect.signature(repair).parameters # type: ignore 174 | annotation = list(parameters.values())[0].annotation 175 | if annotation is inspect._empty or annotation._name not in ( 176 | "Iterable", 177 | "Sequence", 178 | ): # scalar input 179 | return [repair(name) for name in names] 180 | 181 | return repair(names) 182 | -------------------------------------------------------------------------------- /datar/core/operator.py: -------------------------------------------------------------------------------- 1 | """Operators for datar""" 2 | from typing import Callable 3 | from contextlib import contextmanager 4 | 5 | from pipda import register_operator, Operator 6 | 7 | 8 | @register_operator 9 | class DatarOperator(Operator): 10 | """Operator class for datar""" 11 | 12 | backend = None 13 | 14 | @classmethod 15 | @contextmanager 16 | def with_backend(cls, backend: str): 17 | """Use a backend for the operator""" 18 | old_backend = cls.backend 19 | cls.backend = backend 20 | yield 21 | cls.backend = old_backend 22 | 23 | def __getattr__(self, name: str) -> Callable: 24 | from .plugin import plugin 25 | return lambda x, y=None: plugin.hooks.operate( 26 | name, 27 | x, 28 | y, 29 | __plugin=self.__class__.backend, 30 | ) 31 | -------------------------------------------------------------------------------- /datar/core/options.py: -------------------------------------------------------------------------------- 1 | """Provide options""" 2 | from __future__ import annotations 3 | 4 | from typing import Any, Generator, Mapping 5 | from contextlib import contextmanager 6 | 7 | from diot import Diot 8 | from simpleconf import Config 9 | 10 | from .defaults import OPTION_FILE_CWD, OPTION_FILE_HOME 11 | 12 | _key_transform = lambda key: key.replace("_", ".") 13 | _dict_transform_back = lambda dic: { 14 | key.replace(".", "_"): val for key, val in dic.items() 15 | } 16 | 17 | OPTIONS = Diot( 18 | Config.load( 19 | { 20 | # Do we allow to use conflict names directly? 21 | "allow_conflict_names": False, 22 | # Disable some installed backends 23 | "backends": [], 24 | }, 25 | OPTION_FILE_HOME, 26 | OPTION_FILE_CWD, 27 | ignore_nonexist=True, 28 | ), 29 | diot_transform=_key_transform, 30 | ) 31 | 32 | 33 | def options( 34 | *args: str | Mapping[str, Any], 35 | _return: bool = None, 36 | **kwargs: Any, 37 | ) -> Mapping[str, Any]: 38 | """Allow the user to set and examine a variety of global options 39 | 40 | Args: 41 | *args: Names of options to return 42 | **kwargs: name-value pair to create/set an option 43 | _return: Whether return the options. 44 | If `None`, turned to `True` when option names provided in `args`. 45 | 46 | Returns: 47 | The options before updating if `_return` is `True`. 48 | """ 49 | if not args and not kwargs and (_return is None or _return is True): 50 | # Make sure the options won't be changed 51 | return OPTIONS.copy() 52 | 53 | names = [arg.replace(".", "_") for arg in args if isinstance(arg, str)] 54 | pairs = {} 55 | for arg in args: 56 | if isinstance(arg, dict): 57 | pairs.update(_dict_transform_back(arg)) 58 | pairs.update(_dict_transform_back(kwargs)) 59 | 60 | out = None 61 | if _return is None: 62 | _return = names 63 | 64 | if _return: 65 | out = Diot( 66 | { 67 | name: value 68 | for name, value in OPTIONS.items() 69 | if name in names or name in pairs 70 | }, 71 | diot_transform=_key_transform, 72 | ) 73 | 74 | for key, val in pairs.items(): 75 | oldval = OPTIONS[key] 76 | if oldval == val: 77 | continue 78 | OPTIONS[key] = val 79 | 80 | return out 81 | 82 | 83 | @contextmanager 84 | def options_context(**kwargs: Any) -> Generator: 85 | """A context manager to execute code with temporary options 86 | 87 | Note that this is not thread-safe. 88 | """ 89 | opts = options() # type: Mapping[str, Any] 90 | options(**kwargs) 91 | yield 92 | options(opts) 93 | 94 | 95 | def get_option(x: str, default: Any = None) -> Any: 96 | """Get the current value set for option `x`, 97 | or `default` (which defaults to `NULL`) if the option is unset. 98 | 99 | Args: 100 | x: The name of the option 101 | default: The default value if `x` is unset 102 | """ 103 | return OPTIONS.get(x, default) 104 | 105 | 106 | def add_option(x: str, default: Any = None) -> None: 107 | """Add an option 108 | 109 | Args: 110 | x: The name of the option 111 | default: The default value if `x` is unset 112 | """ 113 | OPTIONS.setdefault(x, default) 114 | -------------------------------------------------------------------------------- /datar/core/plugin.py: -------------------------------------------------------------------------------- 1 | """Plugin system to support different backends""" 2 | from typing import Any, List, Mapping, Tuple, Callable 3 | 4 | from simplug import Simplug, SimplugResult, makecall 5 | 6 | plugin = Simplug("datar") 7 | 8 | 9 | def _collect(calls: List[Tuple[Callable, Tuple, Mapping]]) -> Mapping[str, Any]: 10 | """Collect the results from plugins""" 11 | collected = {} 12 | for call in calls: 13 | out = makecall(call) 14 | if out is not None: 15 | collected.update(out) 16 | return collected 17 | 18 | 19 | @plugin.spec 20 | def setup(): 21 | """Initialize the backend""" 22 | 23 | 24 | @plugin.spec(result=_collect) 25 | def get_versions(): 26 | """Return the versions of the dependencies of the plugin.""" 27 | 28 | 29 | @plugin.spec(result=SimplugResult.TRY_SINGLE) 30 | def load_dataset(name: str, metadata: Mapping): 31 | """Implementations for load_dataset()""" 32 | 33 | 34 | @plugin.spec(result=_collect) 35 | def base_api(): 36 | """What is implemented the base APIs.""" 37 | 38 | 39 | @plugin.spec(result=_collect) 40 | def dplyr_api(): 41 | """What is implemented the dplyr APIs.""" 42 | 43 | 44 | @plugin.spec(result=_collect) 45 | def tibble_api(): 46 | """What is implemented the tibble APIs.""" 47 | 48 | 49 | @plugin.spec(result=_collect) 50 | def forcats_api(): 51 | """What is implemented the forcats APIs.""" 52 | 53 | 54 | @plugin.spec(result=_collect) 55 | def tidyr_api(): 56 | """What is implemented the tidyr APIs.""" 57 | 58 | 59 | @plugin.spec(result=_collect) 60 | def misc_api(): 61 | """What is implemented the misc APIs.""" 62 | 63 | 64 | @plugin.spec(result=SimplugResult.SINGLE) 65 | def c_getitem(item): 66 | """Get item for c""" 67 | 68 | 69 | @plugin.spec(result=SimplugResult.SINGLE) 70 | def operate(op: str, x: Any, y: Any = None): 71 | """Operate on x and y""" 72 | -------------------------------------------------------------------------------- /datar/core/utils.py: -------------------------------------------------------------------------------- 1 | """Utilities for datar""" 2 | import sys 3 | import logging 4 | from typing import Any, Callable 5 | from contextlib import contextmanager 6 | 7 | from .plugin import plugin 8 | 9 | # logger 10 | logger = logging.getLogger("datar") 11 | logger.setLevel(logging.INFO) 12 | stream_handler = logging.StreamHandler(sys.stderr) 13 | stream_handler.setFormatter( 14 | logging.Formatter( 15 | "[%(asctime)s][%(name)s][%(levelname)7s] %(message)s", 16 | datefmt="%Y-%m-%d %H:%M:%S", 17 | ) 18 | ) 19 | logger.addHandler(stream_handler) 20 | 21 | 22 | class NotImplementedByCurrentBackendError(NotImplementedError): 23 | """Raised when a function is not implemented by the current backend""" 24 | 25 | def __init__(self, func: str, data: Any = None) -> None: 26 | data_msg = "" 27 | if data is not None: 28 | data_msg = f"data type: {type(data).__name__}, " 29 | msg = ( 30 | f"'{func}' " 31 | f"({data_msg}backends: " 32 | f"{', '.join(plugin.get_enabled_plugin_names())})" 33 | ) 34 | super().__init__(msg) 35 | 36 | 37 | class CollectionFunction: 38 | """Enables c[1:3] to be interpreted as 1:3""" 39 | 40 | def __init__(self, c_func: Callable) -> None: 41 | self.c = c_func 42 | self.backend = None 43 | 44 | def __call__(self, *args, **kwargs): 45 | kwargs["__ast_fallback"] = "normal" 46 | return self.c(*args, **kwargs) 47 | 48 | @contextmanager 49 | def with_backend(self, backend: str): 50 | """Set the backend for c[]""" 51 | _backend = self.backend 52 | self.backend = backend 53 | yield 54 | self.backend = _backend 55 | 56 | def __getitem__(self, item): 57 | """Allow c[1:3] to be interpreted as 1:3""" 58 | return plugin.hooks.c_getitem(item, __plugin=self.backend) 59 | 60 | 61 | def arg_match(arg, argname, values, errmsg=None): 62 | """Make sure arg is in one of the values. 63 | 64 | Mimics `rlang::arg_match`. 65 | """ 66 | if not errmsg: 67 | values = list(values) 68 | errmsg = f"`{argname}` must be one of {values}." 69 | if arg not in values: 70 | raise ValueError(errmsg) 71 | return arg 72 | -------------------------------------------------------------------------------- /datar/data/__init__.py: -------------------------------------------------------------------------------- 1 | """Collects datasets from R-datasets, dplyr and tidyr packages""" 2 | import functools 3 | from typing import Any, List 4 | 5 | from ..core.load_plugins import plugin 6 | from .metadata import Metadata, metadata 7 | 8 | 9 | # Should never do `from datar.data import *` 10 | __all__ = [] # type: List[str] 11 | 12 | 13 | def descr_datasets(*names: str): 14 | """Get the information of the given datasets 15 | 16 | Args: 17 | *names: Names of the datasets to get the information of. 18 | """ 19 | return { 20 | key: val 21 | for key, val in metadata.items() 22 | if key in names or not names 23 | } 24 | 25 | 26 | def add_dataset(name: str, meta: Metadata): 27 | """Add a dataset to the registry 28 | 29 | Args: 30 | name: The name of the dataset 31 | metadata: The metadata of the dataset 32 | """ 33 | metadata[name] = meta 34 | 35 | 36 | @functools.lru_cache() 37 | def load_dataset(name: str, __backend: str = None) -> Any: 38 | """Load the specific dataset""" 39 | loaded = plugin.hooks.load_dataset(name, metadata, __plugin=__backend) 40 | if loaded is None: 41 | from ..core.utils import NotImplementedByCurrentBackendError 42 | raise NotImplementedByCurrentBackendError(f"loading dataset '{name}'") 43 | 44 | return loaded 45 | 46 | 47 | def __getattr__(name: str): 48 | # mkapi accesses quite a lot of attributes starting with _ 49 | if not name.isidentifier() or name.startswith("__"): # pragma: no cover 50 | raise AttributeError(name) 51 | 52 | return load_dataset(name.lower()) 53 | -------------------------------------------------------------------------------- /datar/data/airlines.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/airlines.csv.gz -------------------------------------------------------------------------------- /datar/data/airports.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/airports.csv.gz -------------------------------------------------------------------------------- /datar/data/airquality.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/airquality.csv.gz -------------------------------------------------------------------------------- /datar/data/anscombe.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/anscombe.csv.gz -------------------------------------------------------------------------------- /datar/data/band_instruments.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/band_instruments.csv.gz -------------------------------------------------------------------------------- /datar/data/band_instruments2.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/band_instruments2.csv.gz -------------------------------------------------------------------------------- /datar/data/band_members.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/band_members.csv.gz -------------------------------------------------------------------------------- /datar/data/billboard.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/billboard.csv.gz -------------------------------------------------------------------------------- /datar/data/chickweight.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/chickweight.csv.gz -------------------------------------------------------------------------------- /datar/data/cms_patient_care.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/cms_patient_care.csv.gz -------------------------------------------------------------------------------- /datar/data/cms_patient_experience.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/cms_patient_experience.csv.gz -------------------------------------------------------------------------------- /datar/data/construction.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/construction.csv.gz -------------------------------------------------------------------------------- /datar/data/diamonds.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/diamonds.csv.gz -------------------------------------------------------------------------------- /datar/data/economics.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/economics.csv.gz -------------------------------------------------------------------------------- /datar/data/economics_long.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/economics_long.csv.gz -------------------------------------------------------------------------------- /datar/data/faithful.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/faithful.csv.gz -------------------------------------------------------------------------------- /datar/data/faithfuld.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/faithfuld.csv.gz -------------------------------------------------------------------------------- /datar/data/fish_encounters.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/fish_encounters.csv.gz -------------------------------------------------------------------------------- /datar/data/flights.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/flights.csv.gz -------------------------------------------------------------------------------- /datar/data/gss_cat.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/gss_cat.csv.gz -------------------------------------------------------------------------------- /datar/data/household.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/household.csv.gz -------------------------------------------------------------------------------- /datar/data/iris.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/iris.csv.gz -------------------------------------------------------------------------------- /datar/data/luv_colours.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/luv_colours.csv.gz -------------------------------------------------------------------------------- /datar/data/midwest.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/midwest.csv.gz -------------------------------------------------------------------------------- /datar/data/mpg.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/mpg.csv.gz -------------------------------------------------------------------------------- /datar/data/msleep.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/msleep.csv.gz -------------------------------------------------------------------------------- /datar/data/mtcars.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/mtcars.csv.gz -------------------------------------------------------------------------------- /datar/data/planes.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/planes.csv.gz -------------------------------------------------------------------------------- /datar/data/population.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/population.csv.gz -------------------------------------------------------------------------------- /datar/data/presidential.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/presidential.csv.gz -------------------------------------------------------------------------------- /datar/data/relig_income.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/relig_income.csv.gz -------------------------------------------------------------------------------- /datar/data/seals.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/seals.csv.gz -------------------------------------------------------------------------------- /datar/data/smiths.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/smiths.csv.gz -------------------------------------------------------------------------------- /datar/data/starwars.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/starwars.csv.gz -------------------------------------------------------------------------------- /datar/data/state_abb.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/state_abb.csv.gz -------------------------------------------------------------------------------- /datar/data/state_division.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/state_division.csv.gz -------------------------------------------------------------------------------- /datar/data/state_region.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/state_region.csv.gz -------------------------------------------------------------------------------- /datar/data/storms.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/storms.csv.gz -------------------------------------------------------------------------------- /datar/data/table1.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/table1.csv.gz -------------------------------------------------------------------------------- /datar/data/table2.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/table2.csv.gz -------------------------------------------------------------------------------- /datar/data/table3.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/table3.csv.gz -------------------------------------------------------------------------------- /datar/data/table4a.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/table4a.csv.gz -------------------------------------------------------------------------------- /datar/data/table4b.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/table4b.csv.gz -------------------------------------------------------------------------------- /datar/data/table5.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/table5.csv.gz -------------------------------------------------------------------------------- /datar/data/toothgrowth.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/toothgrowth.csv.gz -------------------------------------------------------------------------------- /datar/data/txhousing.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/txhousing.csv.gz -------------------------------------------------------------------------------- /datar/data/us_rent_income.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/us_rent_income.csv.gz -------------------------------------------------------------------------------- /datar/data/warpbreaks.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/warpbreaks.csv.gz -------------------------------------------------------------------------------- /datar/data/weather.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/weather.csv.gz -------------------------------------------------------------------------------- /datar/data/who.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/who.csv.gz -------------------------------------------------------------------------------- /datar/data/who2.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/who2.csv.gz -------------------------------------------------------------------------------- /datar/data/world_bank_pop.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/datar/data/world_bank_pop.csv.gz -------------------------------------------------------------------------------- /datar/datasets.py: -------------------------------------------------------------------------------- 1 | # pragma: no cover 2 | import warnings 3 | 4 | 5 | class DatasetsDeprecatedWarning(DeprecationWarning): 6 | ... 7 | 8 | 9 | warnings.simplefilter("always", DatasetsDeprecatedWarning) 10 | 11 | warnings.warn( 12 | "Import data from `datar.datasets` is deprecated and " 13 | "will be removed in the future. try `datar.data` instead.", 14 | DatasetsDeprecatedWarning, 15 | ) 16 | 17 | 18 | def __getattr__(name: str): 19 | from . import data 20 | return getattr(data, name) 21 | -------------------------------------------------------------------------------- /datar/dplyr.py: -------------------------------------------------------------------------------- 1 | 2 | from .core.load_plugins import plugin as _plugin 3 | from .core.options import get_option as _get_option 4 | from .apis.dplyr import * 5 | 6 | locals().update(_plugin.hooks.dplyr_api()) 7 | __all__ = [key for key in locals() if not key.startswith("_")] 8 | _conflict_names = {"filter", "slice"} 9 | 10 | if _get_option("allow_conflict_names"): 11 | __all__.extend(_conflict_names) 12 | for name in _conflict_names: 13 | locals()[name] = locals()[name + "_"] 14 | 15 | 16 | def __getattr__(name): 17 | """Even when allow_conflict_names is False, datar.base.sum should be fine 18 | """ 19 | if name in _conflict_names: 20 | import sys 21 | import ast 22 | from executing import Source 23 | node = Source.executing(sys._getframe(1)).node 24 | if isinstance(node, (ast.Call, ast.Attribute)): 25 | # import datar.dplyr as d 26 | # d.sum(...) 27 | return globals()[name + "_"] 28 | 29 | raise AttributeError 30 | -------------------------------------------------------------------------------- /datar/forcats.py: -------------------------------------------------------------------------------- 1 | 2 | from .core.load_plugins import plugin as _plugin 3 | from .apis.forcats import * 4 | 5 | locals().update(_plugin.hooks.forcats_api()) 6 | -------------------------------------------------------------------------------- /datar/misc.py: -------------------------------------------------------------------------------- 1 | from .core.load_plugins import plugin as _plugin 2 | 3 | locals().update(_plugin.hooks.misc_api()) 4 | -------------------------------------------------------------------------------- /datar/tibble.py: -------------------------------------------------------------------------------- 1 | 2 | from .core.load_plugins import plugin as _plugin 3 | from .apis.tibble import * 4 | 5 | locals().update(_plugin.hooks.tibble_api()) 6 | -------------------------------------------------------------------------------- /datar/tidyr.py: -------------------------------------------------------------------------------- 1 | 2 | from .core.load_plugins import plugin as _plugin 3 | from .apis.tidyr import * 4 | 5 | locals().update(_plugin.hooks.tidyr_api()) 6 | -------------------------------------------------------------------------------- /docs/backends.md: -------------------------------------------------------------------------------- 1 | # Backends 2 | 3 | The `datar` package is a collection of APIs that are ported from a bunch of R packages. The APIs are implemented in a backend-agnostic way, so that they can be used with different backends. Currently, `datar` supports the following backends: 4 | 5 | - [`numpy`](https://github.com/pwwang/datar-numpy): Mostly the implementations of functions from `datar.base`. 6 | - [`pandas`](https://github.com/pwwang/datar-pandas): Implementations using `pandas` as backend. 7 | 8 | ## Installation of a backend 9 | 10 | ```bash 11 | pip install -U datar[] 12 | ``` 13 | 14 | ## Using desired backends 15 | 16 | You can install multiple backends, but can use a subset of them. 17 | 18 | ```python 19 | from datar import options 20 | 21 | options(backends=['pandas']) 22 | 23 | # Import the API functions then 24 | ``` 25 | 26 | ## Writing a backend 27 | 28 | A backend is supposed to implement as a `Simplug` plugin. There are a hooks to be implemented. 29 | 30 | ### Hooks 31 | 32 | - `setup()`: calleed before any API is imported. You can do some setup here. 33 | - `get_versions()`: return a dict of versions of the dependencies of the backend. The keys are the names of the packages, and the values are the versions. 34 | - `load_dataset(name: str, metadata: Mapping)`: load a dataset, which can be loaded using `from datar.data import `. 35 | - `base_api()`: load the implementation of `datar.apis.base`. 36 | - `dplyr_api()`: load the implementation of `datar.apis.dplyr`. 37 | - `tibble_api()`: load the implementation of `datar.apis.tibble`. 38 | - `forcats_api()`: load the implementation of `datar.apis.forcats`. 39 | - `tidyr_api()`: load the implementation of `datar.apis.tidyr`. 40 | - `other_api()`: load other backend-specific APIs. 41 | - `c_getitem(item)`: load the implementation of `datar.base.c.__getitem__` (`c[...]`). 42 | - `operate(op: str, x: Any, y: Any = None)`: load the implementation of the operators. 43 | 44 | ## Seleting a backend at runtime 45 | 46 | You can use `__backend` to select a backend at runtime. 47 | 48 | ```python 49 | from datar.tibble import tibble 50 | 51 | tibble(..., __backend="pandas") 52 | ``` 53 | 54 | ## Selecting a backend for operators 55 | 56 | If you have multiple backends installed, you can select a backend for operators. 57 | 58 | ```python 59 | from datar.core.operator import DatarOperator 60 | 61 | DatarOperator.backend = "pandas" 62 | 63 | # Or use the context manager 64 | with DatarOperator.with_backend("pandas"): 65 | data >> mutate(z=f.x + f.y) 66 | ``` 67 | 68 | ## Selecting a backend for `c[]` 69 | 70 | ```python 71 | from datar.base import c 72 | 73 | c.backend = "pandas" 74 | 75 | # Or use the context manager 76 | with c.with_backend("pandas"): 77 | data >> mutate(z=c[1:3]) 78 | ``` 79 | 80 | ## Selecting a backend for numpy ufuncs 81 | 82 | ```python 83 | from datar.apis.other import array_ufunc 84 | 85 | array_ufunc.backend = "pandas" 86 | 87 | # Or use the context manager 88 | with array_ufunc.with_backend("pandas"): 89 | data >> mutate(z=np.sin(f.x)) 90 | ``` 91 | -------------------------------------------------------------------------------- /docs/data.md: -------------------------------------------------------------------------------- 1 | 2 | See full reference of datasets at: [reference-maps/data][1] 3 | 4 | Datasets have to be imported individually by: 5 | 6 | ```python 7 | from datar.data import iris 8 | 9 | # or 10 | from datar import data 11 | 12 | iris = data.iris 13 | ``` 14 | 15 | To list all available datasets: 16 | 17 | ```python 18 | from datar import data 19 | print(datasets.descr_datasets()) 20 | ``` 21 | 22 | `file` shows the path to the csv file of the dataset, and `index` shows if it has index (rownames). 23 | 24 | !!! Note 25 | 26 | The column names are altered by replace `.` to `_`. For example `Sepal.Width` to `Sepal_Width`. 27 | 28 | !!! Note 29 | 30 | Dataset names are case-insensitive. So you can do: 31 | 32 | ```python 33 | from datar.datasets import ToothGrowth 34 | # or 35 | from datar.datasets import toothgrowth 36 | ``` 37 | 38 | See also [Backends][2] for implementations to loaad datasets. 39 | 40 | [1]: ./reference-maps/data 41 | [2]: ./backends 42 | -------------------------------------------------------------------------------- /docs/f.md: -------------------------------------------------------------------------------- 1 | ## Why `f`? 2 | 3 | It is just fast for you to type, since usually, it is `.` right after `f`. Then you have your left hand and right hand working together sequentially. 4 | 5 | ## The `Symbolic` object `f` 6 | 7 | You can import it by `from datar import f`, or `from datar.all import *` 8 | 9 | `f` is a universal `Symbolic` object, which does the magic to connect the expressions in verb arguments so that they can be delayed to execute. 10 | 11 | There are different uses for the `f`. 12 | 13 | - Use as a proxy to refer to dataframe columns (i.e. `f.x`, `f['x']`) 14 | - Use as the column name marker for `tribble`: 15 | 16 | ```python 17 | tribble( 18 | f.x, f.y 19 | 1, 2 20 | 3, 4 21 | ) 22 | ``` 23 | 24 | !!! note 25 | 26 | If you want a sequence literal, other than using `base.seq()`, you can 27 | also use `base.c[]`. 28 | 29 | For example, 30 | ```python 31 | from datar.base import c 32 | from datar.tibble import tibble 33 | df = tibble(x=c[1:5]) # 1, 2, 3, 4 34 | ``` 35 | 36 | 37 | ## If you don't like `f` ... 38 | 39 | Sometimes if you have mixed verbs with piping and you want to distinguish to proxies for different verbs: 40 | 41 | ```python 42 | # you can just replicate f with a different name 43 | g = f 44 | 45 | df = tibble(x=1, y=2) 46 | df >> left_join(df >> group_by(f.x), by=g.y) 47 | ``` 48 | 49 | Or you can instantiate a new `Symbolic` object: 50 | ```python 51 | from pipda.symbolic import Symbolic 52 | 53 | g = Symbolic() 54 | # assert f is g 55 | 56 | # f and g make no difference in execution technically 57 | ``` 58 | 59 | You can also alias `f` by: 60 | ```python 61 | from datar import f as g 62 | ``` 63 | -------------------------------------------------------------------------------- /docs/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/docs/favicon.png -------------------------------------------------------------------------------- /docs/func_factory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/docs/func_factory.png -------------------------------------------------------------------------------- /docs/import.md: -------------------------------------------------------------------------------- 1 | ## Import submodule, verbs and functions from datar 2 | 3 | You can import everything (all verbs and functions) from datar by: 4 | ```python 5 | from datar.all import * 6 | ``` 7 | 8 | which is not recommended. Instead, you can import individual verbs or functions by: 9 | ```python 10 | from datar.all import mutate 11 | ``` 12 | 13 | !!! Attention 14 | 15 | When you use `from datar.all import *`, you need to pay attention to the python builtin names that are covered by `datar` (will warn by default). For example, `slice` will be `datar.dplyr.slice` instead of `builtins.slice`. To refer to the builtin one, you need to: 16 | ```python 17 | import builtins 18 | 19 | s = builtins.slice(None, 3, None) # [:3] 20 | ``` 21 | 22 | Or if you know the origin of the verb, you can also do: 23 | ```python 24 | from datar.dplyr import mutate 25 | ``` 26 | 27 | You can also keep the namespace: 28 | ```python 29 | from datar import dplyr 30 | 31 | # df = tibble(x=1) 32 | # then use it with the dplyr namespace: 33 | df >> dplyr.mutate(y=2) 34 | ``` 35 | 36 | If you feel those namespaces are annoying, you can always use `datar.all`: 37 | ```python 38 | from datar.all import mutate 39 | ``` 40 | 41 | ## Import datasets from datar 42 | 43 | !!! note 44 | 45 | Dataset has to be imported individually. This means `from datar.datasets import *` won't work (you don't want all datasets to exhaust your memory). 46 | 47 | You don't have to worry about other datasets to be imported and take up the memory when you import one. The dataset is only loaded into memory when you explictly import it individually. 48 | 49 | See also [datasets](../datasets) for details about available datasets. 50 | 51 | ## About python reserved names to be masked by `datar` 52 | 53 | Sometimes it will be confusing especially when python builtin functions are overriden by `datar`. There are a couple of datar (`r-base`, `dplyr`) functions with the same name as python builtin functions. For example: `filter`, which is a python builtin function, but also a `dplyr` function. You should use `filter_` instead. By default, `datar` will raise an error when you try to import `filter`. You can set this option to `True` to allow this behavior. 54 | -------------------------------------------------------------------------------- /docs/notebooks/add_column.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "execution": { 8 | "iopub.execute_input": "2021-07-16T22:28:27.609283Z", 9 | "iopub.status.busy": "2021-07-16T22:28:27.607781Z", 10 | "iopub.status.idle": "2021-07-16T22:28:28.439771Z", 11 | "shell.execute_reply": "2021-07-16T22:28:28.440308Z" 12 | } 13 | }, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/html": [ 18 | "
Try this notebook on binder.
" 19 | ], 20 | "text/plain": [ 21 | "" 22 | ] 23 | }, 24 | "metadata": {}, 25 | "output_type": "display_data" 26 | }, 27 | { 28 | "data": { 29 | "text/markdown": [ 30 | "###
★ add_column
" 31 | ], 32 | "text/plain": [ 33 | "" 34 | ] 35 | }, 36 | "metadata": {}, 37 | "output_type": "display_data" 38 | }, 39 | { 40 | "data": { 41 | "text/markdown": [ 42 | "##### Add one or more columns to an existing data frame.\n", 43 | "\n", 44 | "##### Args:\n", 45 | "  `_data`: Data frame to append to \n", 46 | "  `*args`: and \n", 47 | "  `**kwargs`: Name-value pairs to add to the data frame \n", 48 | "  `_before`: and \n", 49 | "  `_after`: Column index or name where to add the new columns \n", 50 | "    (default to add after the last column) \n", 51 | "\n", 52 | "  `_dtypes`: The dtypes for the new columns, either a uniform dtype or a \n", 53 | "    dict of dtypes with keys the column names \n", 54 | "\n", 55 | "##### Returns:\n", 56 | "  The dataframe with the added columns \n" 57 | ], 58 | "text/plain": [ 59 | "" 60 | ] 61 | }, 62 | "metadata": {}, 63 | "output_type": "display_data" 64 | } 65 | ], 66 | "source": [ 67 | "# https://tibble.tidyverse.org/reference/add_column.html\n", 68 | "\n", 69 | "from datar import f\n", 70 | "from datar.tibble import *\n", 71 | "from datar.base import seq\n", 72 | "from datar.core.names import NameNonUniqueError\n", 73 | "\n", 74 | "%run nb_helpers.py\n", 75 | "nb_header(add_column)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 2, 81 | "metadata": { 82 | "execution": { 83 | "iopub.execute_input": "2021-07-16T22:28:28.449685Z", 84 | "iopub.status.busy": "2021-07-16T22:28:28.449088Z", 85 | "iopub.status.idle": "2021-07-16T22:28:28.691135Z", 86 | "shell.execute_reply": "2021-07-16T22:28:28.691675Z" 87 | } 88 | }, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/html": [ 93 | "
\n", 94 | "\n", 107 | "\n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | "
xyzw
<int64><int64><int64><int64>
013-10
12200
23110
\n", 148 | "
\n" 149 | ], 150 | "text/plain": [ 151 | " x y z w\n", 152 | " \n", 153 | "0 1 3 -1 0\n", 154 | "1 2 2 0 0\n", 155 | "2 3 1 1 0" 156 | ] 157 | }, 158 | "execution_count": 2, 159 | "metadata": {}, 160 | "output_type": "execute_result" 161 | } 162 | ], 163 | "source": [ 164 | "df = tibble(x=seq(1,3), y=seq(3,1))\n", 165 | "df >> add_column(z=seq(-1,1), w=0)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 3, 171 | "metadata": { 172 | "execution": { 173 | "iopub.execute_input": "2021-07-16T22:28:28.701403Z", 174 | "iopub.status.busy": "2021-07-16T22:28:28.700765Z", 175 | "iopub.status.idle": "2021-07-16T22:28:28.726181Z", 176 | "shell.execute_reply": "2021-07-16T22:28:28.725600Z" 177 | } 178 | }, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/html": [ 183 | "
\n", 184 | "\n", 197 | "\n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | "
xzy
<int64><int64><int64>
01-13
1202
2311
\n", 233 | "
\n" 234 | ], 235 | "text/plain": [ 236 | " x z y\n", 237 | " \n", 238 | "0 1 -1 3\n", 239 | "1 2 0 2\n", 240 | "2 3 1 1" 241 | ] 242 | }, 243 | "execution_count": 3, 244 | "metadata": {}, 245 | "output_type": "execute_result" 246 | } 247 | ], 248 | "source": [ 249 | "df >> add_column(z=seq(-1,1), _before=f.y)" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 4, 255 | "metadata": { 256 | "execution": { 257 | "iopub.execute_input": "2021-07-16T22:28:28.734549Z", 258 | "iopub.status.busy": "2021-07-16T22:28:28.733777Z", 259 | "iopub.status.idle": "2021-07-16T22:28:28.751592Z", 260 | "shell.execute_reply": "2021-07-16T22:28:28.751990Z" 261 | } 262 | }, 263 | "outputs": [ 264 | { 265 | "name": "stdout", 266 | "output_type": "stream", 267 | "text": [ 268 | "Names must be unique: x\n" 269 | ] 270 | } 271 | ], 272 | "source": [ 273 | "# You can't overwrite existing columns\n", 274 | "try:\n", 275 | " df >> add_column(x = seq(4,6))\n", 276 | "except NameNonUniqueError as err:\n", 277 | " print(err)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 5, 283 | "metadata": { 284 | "execution": { 285 | "iopub.execute_input": "2021-07-16T22:28:28.760324Z", 286 | "iopub.status.busy": "2021-07-16T22:28:28.759646Z", 287 | "iopub.status.idle": "2021-07-16T22:28:28.776413Z", 288 | "shell.execute_reply": "2021-07-16T22:28:28.776819Z" 289 | } 290 | }, 291 | "outputs": [ 292 | { 293 | "name": "stdout", 294 | "output_type": "stream", 295 | "text": [ 296 | "[ValueError] Value has incompatible index.\n" 297 | ] 298 | } 299 | ], 300 | "source": [ 301 | "# You can't create new observations\n", 302 | "with try_catch():\n", 303 | " df >> add_column(z = seq(1,5))" 304 | ] 305 | } 306 | ], 307 | "metadata": { 308 | "kernelspec": { 309 | "display_name": "Python 3.9.5 ('base')", 310 | "language": "python", 311 | "name": "python3" 312 | }, 313 | "language_info": { 314 | "codemirror_mode": { 315 | "name": "ipython", 316 | "version": 3 317 | }, 318 | "file_extension": ".py", 319 | "mimetype": "text/x-python", 320 | "name": "python", 321 | "nbconvert_exporter": "python", 322 | "pygments_lexer": "ipython3", 323 | "version": "3.9.5" 324 | }, 325 | "vscode": { 326 | "interpreter": { 327 | "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4" 328 | } 329 | } 330 | }, 331 | "nbformat": 4, 332 | "nbformat_minor": 2 333 | } 334 | -------------------------------------------------------------------------------- /docs/notebooks/coalesce.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "applicable-fault", 7 | "metadata": { 8 | "execution": { 9 | "iopub.execute_input": "2021-07-16T22:27:49.903448Z", 10 | "iopub.status.busy": "2021-07-16T22:27:49.902768Z", 11 | "iopub.status.idle": "2021-07-16T22:27:50.871433Z", 12 | "shell.execute_reply": "2021-07-16T22:27:50.871879Z" 13 | } 14 | }, 15 | "outputs": [ 16 | { 17 | "data": { 18 | "text/html": [ 19 | "
Try this notebook on binder.
" 20 | ], 21 | "text/plain": [ 22 | "" 23 | ] 24 | }, 25 | "metadata": {}, 26 | "output_type": "display_data" 27 | }, 28 | { 29 | "data": { 30 | "text/markdown": [ 31 | "###
★ coalesce
" 32 | ], 33 | "text/plain": [ 34 | "" 35 | ] 36 | }, 37 | "metadata": {}, 38 | "output_type": "display_data" 39 | }, 40 | { 41 | "data": { 42 | "text/markdown": [ 43 | "##### Replace missing values with the first non-missing value\n", 44 | "\n", 45 | "The original API: \n", 46 | "https://dplyr.tidyverse.org/reference/coalesce.html \n", 47 | "\n", 48 | "##### Args:\n", 49 | "  `x`: A vector \n", 50 | "  `*replace`: Values to replace missing values with. \n", 51 | "\n", 52 | "##### Returns:\n", 53 | "  An array of values \n" 54 | ], 55 | "text/plain": [ 56 | "" 57 | ] 58 | }, 59 | "metadata": {}, 60 | "output_type": "display_data" 61 | } 62 | ], 63 | "source": [ 64 | "# https://dplyr.tidyverse.org/reference/coalesce.html\n", 65 | "%run nb_helpers.py\n", 66 | "\n", 67 | "from datar.all import *\n", 68 | "\n", 69 | "nb_header(coalesce)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 2, 75 | "id": "smoking-gilbert", 76 | "metadata": { 77 | "execution": { 78 | "iopub.execute_input": "2021-07-16T22:27:50.894245Z", 79 | "iopub.status.busy": "2021-07-16T22:27:50.893678Z", 80 | "iopub.status.idle": "2021-07-16T22:27:51.105408Z", 81 | "shell.execute_reply": "2021-07-16T22:27:51.103096Z" 82 | } 83 | }, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "0 5.0\n", 89 | "1 4.0\n", 90 | "2 3.0\n", 91 | "3 0.0\n", 92 | "4 2.0\n", 93 | "5 0.0\n", 94 | "6 1.0\n", 95 | "7 0.0\n", 96 | "Name: y, dtype: float64" 97 | ] 98 | }, 99 | "execution_count": 2, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "df = tibble(x=[5,4,3,NA,2,NA,1,NA])\n", 106 | "df >> mutate(y=coalesce(f.x, 0)) >> pull(f.y)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 3, 112 | "id": "intense-liver", 113 | "metadata": { 114 | "execution": { 115 | "iopub.execute_input": "2021-07-16T22:27:51.132360Z", 116 | "iopub.status.busy": "2021-07-16T22:27:51.131696Z", 117 | "iopub.status.idle": "2021-07-16T22:27:51.159635Z", 118 | "shell.execute_reply": "2021-07-16T22:27:51.158647Z" 119 | } 120 | }, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "0 1.0\n", 126 | "1 2.0\n", 127 | "2 3.0\n", 128 | "3 4.0\n", 129 | "4 5.0\n", 130 | "Name: m, dtype: float64" 131 | ] 132 | }, 133 | "execution_count": 3, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "df = tibble(\n", 140 | " y=[1,2,NA,NA,5],\n", 141 | " z=[NA,NA,3,4,5]\n", 142 | ")\n", 143 | "df >> mutate(m=coalesce(f.y, f.z)) >> pull(f.m)" 144 | ] 145 | } 146 | ], 147 | "metadata": { 148 | "kernelspec": { 149 | "display_name": "Python 3.9.5 ('base')", 150 | "language": "python", 151 | "name": "python3" 152 | }, 153 | "language_info": { 154 | "codemirror_mode": { 155 | "name": "ipython", 156 | "version": 3 157 | }, 158 | "file_extension": ".py", 159 | "mimetype": "text/x-python", 160 | "name": "python", 161 | "nbconvert_exporter": "python", 162 | "pygments_lexer": "ipython3", 163 | "version": "3.9.5" 164 | }, 165 | "vscode": { 166 | "interpreter": { 167 | "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4" 168 | } 169 | } 170 | }, 171 | "nbformat": 4, 172 | "nbformat_minor": 5 173 | } 174 | -------------------------------------------------------------------------------- /docs/notebooks/desc.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "important-empty", 7 | "metadata": { 8 | "execution": { 9 | "iopub.execute_input": "2021-07-16T22:27:38.744602Z", 10 | "iopub.status.busy": "2021-07-16T22:27:38.743026Z", 11 | "iopub.status.idle": "2021-07-16T22:27:39.602512Z", 12 | "shell.execute_reply": "2021-07-16T22:27:39.602933Z" 13 | } 14 | }, 15 | "outputs": [ 16 | { 17 | "data": { 18 | "text/html": [ 19 | "
Try this notebook on binder.
" 20 | ], 21 | "text/plain": [ 22 | "" 23 | ] 24 | }, 25 | "metadata": {}, 26 | "output_type": "display_data" 27 | }, 28 | { 29 | "data": { 30 | "text/markdown": [ 31 | "###
★ desc
" 32 | ], 33 | "text/plain": [ 34 | "" 35 | ] 36 | }, 37 | "metadata": {}, 38 | "output_type": "display_data" 39 | }, 40 | { 41 | "data": { 42 | "text/markdown": [ 43 | "##### Transform a vector into a format that will be sorted in descending order\n", 44 | "\n", 45 | "This is useful within arrange(). \n", 46 | "\n", 47 | "The original API: \n", 48 | "https://dplyr.tidyverse.org/reference/desc.html \n", 49 | "\n", 50 | "##### Args:\n", 51 | "  `x`: vector to transform \n", 52 | "\n", 53 | "##### Returns:\n", 54 | "  The descending order of x \n" 55 | ], 56 | "text/plain": [ 57 | "" 58 | ] 59 | }, 60 | "metadata": {}, 61 | "output_type": "display_data" 62 | } 63 | ], 64 | "source": [ 65 | "%run nb_helpers.py\n", 66 | "from datar.base import factor, letters\n", 67 | "from datar.dplyr import desc\n", 68 | "\n", 69 | "nb_header(desc)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 2, 75 | "id": "equal-software", 76 | "metadata": { 77 | "execution": { 78 | "iopub.execute_input": "2021-07-16T22:27:39.613373Z", 79 | "iopub.status.busy": "2021-07-16T22:27:39.612755Z", 80 | "iopub.status.idle": "2021-07-16T22:27:39.620797Z", 81 | "shell.execute_reply": "2021-07-16T22:27:39.621622Z" 82 | } 83 | }, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "array([ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10])" 89 | ] 90 | }, 91 | "execution_count": 2, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "desc(range(1,11))" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 3, 103 | "id": "delayed-lincoln", 104 | "metadata": { 105 | "execution": { 106 | "iopub.execute_input": "2021-07-16T22:27:39.642189Z", 107 | "iopub.status.busy": "2021-07-16T22:27:39.641582Z", 108 | "iopub.status.idle": "2021-07-16T22:27:39.651348Z", 109 | "shell.execute_reply": "2021-07-16T22:27:39.651772Z" 110 | } 111 | }, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/plain": [ 116 | "array([ -0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10.,\n", 117 | " -11., -12., -13., -14., -15., -16., -17., -18., -19., -20., -21.,\n", 118 | " -22., -23., -24., -25.])" 119 | ] 120 | }, 121 | "execution_count": 3, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "desc(factor(letters))" 128 | ] 129 | } 130 | ], 131 | "metadata": { 132 | "kernelspec": { 133 | "display_name": "Python 3.9.5 ('base')", 134 | "language": "python", 135 | "name": "python3" 136 | }, 137 | "language_info": { 138 | "codemirror_mode": { 139 | "name": "ipython", 140 | "version": 3 141 | }, 142 | "file_extension": ".py", 143 | "mimetype": "text/x-python", 144 | "name": "python", 145 | "nbconvert_exporter": "python", 146 | "pygments_lexer": "ipython3", 147 | "version": "3.9.5" 148 | }, 149 | "vscode": { 150 | "interpreter": { 151 | "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4" 152 | } 153 | } 154 | }, 155 | "nbformat": 4, 156 | "nbformat_minor": 5 157 | } 158 | -------------------------------------------------------------------------------- /docs/notebooks/forcats_fct_multi.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/html": [ 11 | "
Try this notebook on binder.
" 12 | ], 13 | "text/plain": [ 14 | "" 15 | ] 16 | }, 17 | "metadata": {}, 18 | "output_type": "display_data" 19 | }, 20 | { 21 | "data": { 22 | "text/markdown": [ 23 | "###
★ fct_c
" 24 | ], 25 | "text/plain": [ 26 | "" 27 | ] 28 | }, 29 | "metadata": {}, 30 | "output_type": "display_data" 31 | }, 32 | { 33 | "data": { 34 | "text/markdown": [ 35 | "##### Concatenate factors, combining levels\n", 36 | "\n", 37 | "This is a useful ways of patching together factors from multiple sources \n", 38 | "that really should have the same levels but don't. \n", 39 | "\n", 40 | "##### Args:\n", 41 | "  `*fs`: factors to concatenate \n", 42 | "\n", 43 | "##### Returns:\n", 44 | "  The concatenated factor \n" 45 | ], 46 | "text/plain": [ 47 | "" 48 | ] 49 | }, 50 | "metadata": {}, 51 | "output_type": "display_data" 52 | }, 53 | { 54 | "data": { 55 | "text/markdown": [ 56 | "###
★ fct_cross
" 57 | ], 58 | "text/plain": [ 59 | "" 60 | ] 61 | }, 62 | "metadata": {}, 63 | "output_type": "display_data" 64 | }, 65 | { 66 | "data": { 67 | "text/markdown": [ 68 | "##### Combine levels from two or more factors to create a new factor\n", 69 | "\n", 70 | "Computes a factor whose levels are all the combinations of \n", 71 | "the levels of the input factors. \n", 72 | "\n", 73 | "##### Args:\n", 74 | "  `*fs`: factors to cross \n", 75 | "  `sep`: A string to separate levels \n", 76 | "  `keep_empty`: If True, keep combinations with no observations as levels \n", 77 | "\n", 78 | "##### Returns:\n", 79 | "  The new factor \n" 80 | ], 81 | "text/plain": [ 82 | "" 83 | ] 84 | }, 85 | "metadata": {}, 86 | "output_type": "display_data" 87 | } 88 | ], 89 | "source": [ 90 | "%run nb_helpers.py\n", 91 | "from datar.all import *\n", 92 | "\n", 93 | "nb_header(\n", 94 | " fct_c,\n", 95 | " fct_cross,\n", 96 | " book=\"forcat_fct_multi\",\n", 97 | ")\n" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "## fct_c" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 2, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "fa = factor(\"a\")\n", 114 | "fb = factor(\"b\")\n", 115 | "fab = factor(c(\"a\", \"b\"))\n", 116 | "\n", 117 | "# c(fa, fb, fab)\n", 118 | "# convert factor to integer for `c`?" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 3, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "text/plain": [ 129 | "['a', 'b', 'a', 'b']\n", 130 | "Categories (2, object): ['a', 'b']" 131 | ] 132 | }, 133 | "execution_count": 3, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "fct_c(fa, fb, fab)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 4, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "data": { 149 | "text/plain": [ 150 | "['a', 'b', 'a', 'b']\n", 151 | "Categories (2, object): ['a', 'b']" 152 | ] 153 | }, 154 | "execution_count": 4, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "fs = [fa, fb, fab]\n", 161 | "fct_c(*fs)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "## fct_cross" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 5, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "['apple:green', 'kiwi:green', 'apple:red', 'apple:green']\n", 180 | "Categories (3, object): ['apple:green', 'apple:red', 'kiwi:green']" 181 | ] 182 | }, 183 | "execution_count": 5, 184 | "metadata": {}, 185 | "output_type": "execute_result" 186 | } 187 | ], 188 | "source": [ 189 | "fruit = factor(c(\"apple\", \"kiwi\", \"apple\", \"apple\"))\n", 190 | "colour = factor(c(\"green\", \"green\", \"red\", \"green\"))\n", 191 | "eaten = c(\"yes\", \"no\", \"yes\", \"no\")\n", 192 | "fct_cross(fruit, colour)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 6, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "['apple:green:yes', 'kiwi:green:no', 'apple:red:yes', 'apple:green:no']\n", 204 | "Categories (4, object): ['apple:green:no', 'apple:green:yes', 'apple:red:yes', 'kiwi:green:no']" 205 | ] 206 | }, 207 | "execution_count": 6, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "fct_cross(fruit, colour, eaten)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 7, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "['apple:green', 'kiwi:green', 'apple:red', 'apple:green']\n", 225 | "Categories (4, object): ['apple:green', 'apple:red', 'kiwi:green', 'kiwi:red']" 226 | ] 227 | }, 228 | "execution_count": 7, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "fct_cross(fruit, colour, keep_empty = TRUE)" 235 | ] 236 | } 237 | ], 238 | "metadata": { 239 | "kernelspec": { 240 | "display_name": "Python 3.9.5 ('base')", 241 | "language": "python", 242 | "name": "python3" 243 | }, 244 | "language_info": { 245 | "codemirror_mode": { 246 | "name": "ipython", 247 | "version": 3 248 | }, 249 | "file_extension": ".py", 250 | "mimetype": "text/x-python", 251 | "name": "python", 252 | "nbconvert_exporter": "python", 253 | "pygments_lexer": "ipython3", 254 | "version": "3.9.5" 255 | }, 256 | "orig_nbformat": 4, 257 | "vscode": { 258 | "interpreter": { 259 | "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4" 260 | } 261 | } 262 | }, 263 | "nbformat": 4, 264 | "nbformat_minor": 2 265 | } 266 | -------------------------------------------------------------------------------- /docs/notebooks/full_seq.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "occasional-onion", 7 | "metadata": { 8 | "execution": { 9 | "iopub.execute_input": "2021-07-16T22:27:58.337680Z", 10 | "iopub.status.busy": "2021-07-16T22:27:58.336855Z", 11 | "iopub.status.idle": "2021-07-16T22:27:59.226466Z", 12 | "shell.execute_reply": "2021-07-16T22:27:59.226860Z" 13 | } 14 | }, 15 | "outputs": [ 16 | { 17 | "data": { 18 | "text/html": [ 19 | "
Try this notebook on binder.
" 20 | ], 21 | "text/plain": [ 22 | "" 23 | ] 24 | }, 25 | "metadata": {}, 26 | "output_type": "display_data" 27 | }, 28 | { 29 | "data": { 30 | "text/markdown": [ 31 | "###
★ full_seq
" 32 | ], 33 | "text/plain": [ 34 | "" 35 | ] 36 | }, 37 | "metadata": {}, 38 | "output_type": "display_data" 39 | }, 40 | { 41 | "data": { 42 | "text/markdown": [ 43 | "##### Create the full sequence of values in a vector\n", 44 | "\n", 45 | "##### Args:\n", 46 | "  `x`: A numeric vector. \n", 47 | "  `period`: Gap between each observation. The existing data will be \n", 48 | "    checked to ensure that it is actually of this periodicity. \n", 49 | "\n", 50 | "  `tol`: Numerical tolerance for checking periodicity. \n", 51 | "\n", 52 | "##### Returns:\n", 53 | "  The full sequence \n" 54 | ], 55 | "text/plain": [ 56 | "" 57 | ] 58 | }, 59 | "metadata": {}, 60 | "output_type": "display_data" 61 | } 62 | ], 63 | "source": [ 64 | "# https://tidyr.tidyverse.org/reference/full_seq.html\n", 65 | "%run nb_helpers.py\n", 66 | "\n", 67 | "from datar.all import *\n", 68 | "\n", 69 | "nb_header(full_seq)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 2, 75 | "id": "convenient-professional", 76 | "metadata": { 77 | "execution": { 78 | "iopub.execute_input": "2021-07-16T22:27:59.233189Z", 79 | "iopub.status.busy": "2021-07-16T22:27:59.232551Z", 80 | "iopub.status.idle": "2021-07-16T22:27:59.245528Z", 81 | "shell.execute_reply": "2021-07-16T22:27:59.246036Z" 82 | } 83 | }, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 89 | ] 90 | }, 91 | "execution_count": 2, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "full_seq(c(1, 2, 4, 5, 10), 1)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "id": "ad52e92c", 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [] 107 | } 108 | ], 109 | "metadata": { 110 | "kernelspec": { 111 | "display_name": "Python 3.9.5 ('base')", 112 | "language": "python", 113 | "name": "python3" 114 | }, 115 | "language_info": { 116 | "codemirror_mode": { 117 | "name": "ipython", 118 | "version": 3 119 | }, 120 | "file_extension": ".py", 121 | "mimetype": "text/x-python", 122 | "name": "python", 123 | "nbconvert_exporter": "python", 124 | "pygments_lexer": "ipython3", 125 | "version": "3.9.5" 126 | }, 127 | "vscode": { 128 | "interpreter": { 129 | "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4" 130 | } 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 5 135 | } 136 | -------------------------------------------------------------------------------- /docs/notebooks/group_trim.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "9941c94b", 7 | "metadata": { 8 | "execution": { 9 | "iopub.execute_input": "2021-07-16T22:28:00.630674Z", 10 | "iopub.status.busy": "2021-07-16T22:28:00.630102Z", 11 | "iopub.status.idle": "2021-07-16T22:28:01.530300Z", 12 | "shell.execute_reply": "2021-07-16T22:28:01.530718Z" 13 | } 14 | }, 15 | "outputs": [ 16 | { 17 | "data": { 18 | "text/html": [ 19 | "
Try this notebook on binder.
" 20 | ], 21 | "text/plain": [ 22 | "" 23 | ] 24 | }, 25 | "metadata": {}, 26 | "output_type": "display_data" 27 | }, 28 | { 29 | "data": { 30 | "text/markdown": [ 31 | "###
★ group_trim
" 32 | ], 33 | "text/plain": [ 34 | "" 35 | ] 36 | }, 37 | "metadata": {}, 38 | "output_type": "display_data" 39 | }, 40 | { 41 | "data": { 42 | "text/markdown": [ 43 | "##### Remove empty groups\n", 44 | "\n", 45 | "The original API: \n", 46 | "https://dplyr.tidyverse.org/reference/group_trim.html \n", 47 | "\n", 48 | "##### Args:\n", 49 | "  `_data`: A grouped frame \n", 50 | "  `_drop`: See `group_by`. \n", 51 | "\n", 52 | "##### Returns:\n", 53 | "  A grouped frame \n" 54 | ], 55 | "text/plain": [ 56 | "" 57 | ] 58 | }, 59 | "metadata": {}, 60 | "output_type": "display_data" 61 | } 62 | ], 63 | "source": [ 64 | "# https://dplyr.tidyverse.org/reference/group_trim.html\n", 65 | "%run nb_helpers.py\n", 66 | "\n", 67 | "from datar.all import *\n", 68 | "\n", 69 | "nb_header(group_trim)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 3, 75 | "id": "d4c86c45", 76 | "metadata": { 77 | "execution": { 78 | "iopub.execute_input": "2021-07-16T22:28:01.592992Z", 79 | "iopub.status.busy": "2021-07-16T22:28:01.583766Z", 80 | "iopub.status.idle": "2021-07-16T22:28:01.685184Z", 81 | "shell.execute_reply": "2021-07-16T22:28:01.684381Z" 82 | } 83 | }, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/html": [ 88 | "
\n", 89 | "\n", 102 | "\n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | "
x_rows
<category><object>
0a[0]
1b[1]
2c[]
\n", 133 | "
\n" 134 | ], 135 | "text/plain": [ 136 | " x _rows\n", 137 | " \n", 138 | "0 a [0]\n", 139 | "1 b [1]\n", 140 | "2 c []" 141 | ] 142 | }, 143 | "execution_count": 3, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "df = tibble(x=factor([\"a\", \"b\"], levels=list(\"abc\")))\n", 150 | "df >> group_by(f.x, _drop=False) >> group_data()" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 4, 156 | "id": "a11f49fc", 157 | "metadata": { 158 | "execution": { 159 | "iopub.execute_input": "2021-07-16T22:28:01.749108Z", 160 | "iopub.status.busy": "2021-07-16T22:28:01.742401Z", 161 | "iopub.status.idle": "2021-07-16T22:28:01.861904Z", 162 | "shell.execute_reply": "2021-07-16T22:28:01.862322Z" 163 | } 164 | }, 165 | "outputs": [ 166 | { 167 | "data": { 168 | "text/html": [ 169 | "
\n", 170 | "\n", 183 | "\n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | "
x_rows
<category><object>
0a[0]
1b[1]
\n", 209 | "
\n" 210 | ], 211 | "text/plain": [ 212 | " x _rows\n", 213 | " \n", 214 | "0 a [0]\n", 215 | "1 b [1]" 216 | ] 217 | }, 218 | "execution_count": 4, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "df >> group_by(f.x, _drop=False) >> group_trim() >> group_data()" 225 | ] 226 | } 227 | ], 228 | "metadata": { 229 | "kernelspec": { 230 | "display_name": "Python 3.9.5 ('base')", 231 | "language": "python", 232 | "name": "python3" 233 | }, 234 | "language_info": { 235 | "codemirror_mode": { 236 | "name": "ipython", 237 | "version": 3 238 | }, 239 | "file_extension": ".py", 240 | "mimetype": "text/x-python", 241 | "name": "python", 242 | "nbconvert_exporter": "python", 243 | "pygments_lexer": "ipython3", 244 | "version": "3.9.5" 245 | }, 246 | "vscode": { 247 | "interpreter": { 248 | "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4" 249 | } 250 | } 251 | }, 252 | "nbformat": 4, 253 | "nbformat_minor": 5 254 | } 255 | -------------------------------------------------------------------------------- /docs/notebooks/n_distinct.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "several-cowboy", 7 | "metadata": { 8 | "execution": { 9 | "iopub.execute_input": "2021-07-16T22:28:07.196543Z", 10 | "iopub.status.busy": "2021-07-16T22:28:07.195916Z", 11 | "iopub.status.idle": "2021-07-16T22:28:08.127610Z", 12 | "shell.execute_reply": "2021-07-16T22:28:08.128233Z" 13 | } 14 | }, 15 | "outputs": [ 16 | { 17 | "data": { 18 | "text/html": [ 19 | "
Try this notebook on binder.
" 20 | ], 21 | "text/plain": [ 22 | "" 23 | ] 24 | }, 25 | "metadata": {}, 26 | "output_type": "display_data" 27 | }, 28 | { 29 | "data": { 30 | "text/markdown": [ 31 | "###
★ sample
" 32 | ], 33 | "text/plain": [ 34 | "" 35 | ] 36 | }, 37 | "metadata": {}, 38 | "output_type": "display_data" 39 | }, 40 | { 41 | "data": { 42 | "text/markdown": [ 43 | "##### Sample a vector\n", 44 | "\n", 45 | "##### Args:\n", 46 | "  `x`: a vector or scaler \n", 47 | "  `size`: the size of the sample \n", 48 | "  `replace`: whether to sample with replacement \n", 49 | "  `prob`: the probabilities of sampling each element \n", 50 | "\n", 51 | "##### Returns:\n", 52 | "  The sampled vector \n" 53 | ], 54 | "text/plain": [ 55 | "" 56 | ] 57 | }, 58 | "metadata": {}, 59 | "output_type": "display_data" 60 | }, 61 | { 62 | "data": { 63 | "text/markdown": [ 64 | "###
★ n_distinct
" 65 | ], 66 | "text/plain": [ 67 | "" 68 | ] 69 | }, 70 | "metadata": {}, 71 | "output_type": "display_data" 72 | }, 73 | { 74 | "data": { 75 | "text/markdown": [ 76 | "##### Count the number of distinct values\n", 77 | "\n", 78 | "The original API: \n", 79 | "https://dplyr.tidyverse.org/reference/distinct.html \n", 80 | "\n", 81 | "##### Args:\n", 82 | "  `_data`: A data frame \n", 83 | "  `na_rm`: If `True`, remove missing values before counting. \n", 84 | "\n", 85 | "##### Returns:\n", 86 | "  The number of distinct values \n" 87 | ], 88 | "text/plain": [ 89 | "" 90 | ] 91 | }, 92 | "metadata": {}, 93 | "output_type": "display_data" 94 | } 95 | ], 96 | "source": [ 97 | "# https://dplyr.tidyverse.org/reference/n_distinct.html\n", 98 | "%run nb_helpers.py\n", 99 | "\n", 100 | "from datar.all import sample, n_distinct\n", 101 | "\n", 102 | "nb_header(sample, n_distinct, book='n_distinct')" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 2, 108 | "id": "sharing-michigan", 109 | "metadata": { 110 | "execution": { 111 | "iopub.execute_input": "2021-07-16T22:28:08.142741Z", 112 | "iopub.status.busy": "2021-07-16T22:28:08.141395Z", 113 | "iopub.status.idle": "2021-07-16T22:28:08.149693Z", 114 | "shell.execute_reply": "2021-07-16T22:28:08.150142Z" 115 | } 116 | }, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "100000" 122 | ] 123 | }, 124 | "execution_count": 2, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "x = sample(range(10), 1e5, replace=True)\n", 131 | "len(x)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 3, 137 | "id": "interested-store", 138 | "metadata": { 139 | "execution": { 140 | "iopub.execute_input": "2021-07-16T22:28:08.215359Z", 141 | "iopub.status.busy": "2021-07-16T22:28:08.214713Z", 142 | "iopub.status.idle": "2021-07-16T22:28:08.311083Z", 143 | "shell.execute_reply": "2021-07-16T22:28:08.310511Z" 144 | } 145 | }, 146 | "outputs": [ 147 | { 148 | "data": { 149 | "text/plain": [ 150 | "10" 151 | ] 152 | }, 153 | "execution_count": 3, 154 | "metadata": {}, 155 | "output_type": "execute_result" 156 | } 157 | ], 158 | "source": [ 159 | "n_distinct(x)" 160 | ] 161 | } 162 | ], 163 | "metadata": { 164 | "kernelspec": { 165 | "display_name": "Python 3.9.5 ('base')", 166 | "language": "python", 167 | "name": "python3" 168 | }, 169 | "language_info": { 170 | "codemirror_mode": { 171 | "name": "ipython", 172 | "version": 3 173 | }, 174 | "file_extension": ".py", 175 | "mimetype": "text/x-python", 176 | "name": "python", 177 | "nbconvert_exporter": "python", 178 | "pygments_lexer": "ipython3", 179 | "version": "3.9.5" 180 | }, 181 | "vscode": { 182 | "interpreter": { 183 | "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4" 184 | } 185 | } 186 | }, 187 | "nbformat": 4, 188 | "nbformat_minor": 5 189 | } 190 | -------------------------------------------------------------------------------- /docs/notebooks/nb_helpers.py: -------------------------------------------------------------------------------- 1 | """helpers for notebooks""" 2 | from contextlib import contextmanager 3 | 4 | from IPython.display import display, Markdown, HTML 5 | from IPython.core.interactiveshell import InteractiveShell 6 | import pardoc 7 | from varname.helpers import debug # noqa 8 | from datar import options 9 | 10 | options(allow_conflict_names=True) 11 | 12 | InteractiveShell.ast_node_interactivity = "all" 13 | 14 | BINDER_URL = ( 15 | "https://mybinder.org/v2/gh/pwwang/datar/" 16 | "dev?filepath=docs%2Fnotebooks%2F" 17 | ) 18 | 19 | 20 | def nb_header(*funcs, book=None): 21 | """Print the header of a notebooks, mostly the docs""" 22 | if book is None: 23 | book = funcs[0].__name__ 24 | display( 25 | HTML( 26 | '
' 27 | 'Try this notebook on ' 28 | f'' 29 | "binder.
" 30 | ) 31 | ) 32 | 33 | for func in funcs: 34 | try: 35 | parsed = pardoc.google_parser.parse(func.__doc__) 36 | try: 37 | del parsed["Examples"] 38 | except KeyError: 39 | pass 40 | except Exception: 41 | formatted = func.__doc__ 42 | else: 43 | formatted = pardoc.google_parser.format( 44 | parsed, 45 | to="markdown", 46 | heading=5, 47 | indent_base="  ", 48 | ) 49 | 50 | display(Markdown( 51 | f'{"#"*3} ' 52 | '
' 53 | f'★ {func.__name__}' 54 | '
') 55 | ) 56 | display(Markdown(formatted)) 57 | 58 | 59 | @contextmanager 60 | def try_catch(): 61 | """Catch the error and print it out""" 62 | try: 63 | yield 64 | except Exception as exc: 65 | print(f"[{type(exc).__name__}] {exc}") 66 | -------------------------------------------------------------------------------- /docs/notebooks/near.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "permanent-waters", 7 | "metadata": { 8 | "execution": { 9 | "iopub.execute_input": "2021-07-16T22:28:33.908144Z", 10 | "iopub.status.busy": "2021-07-16T22:28:33.907513Z", 11 | "iopub.status.idle": "2021-07-16T22:28:34.718530Z", 12 | "shell.execute_reply": "2021-07-16T22:28:34.718946Z" 13 | } 14 | }, 15 | "outputs": [ 16 | { 17 | "data": { 18 | "text/html": [ 19 | "
Try this notebook on binder.
" 20 | ], 21 | "text/plain": [ 22 | "" 23 | ] 24 | }, 25 | "metadata": {}, 26 | "output_type": "display_data" 27 | }, 28 | { 29 | "data": { 30 | "text/markdown": [ 31 | "###
★ near
" 32 | ], 33 | "text/plain": [ 34 | "" 35 | ] 36 | }, 37 | "metadata": {}, 38 | "output_type": "display_data" 39 | }, 40 | { 41 | "data": { 42 | "text/markdown": [ 43 | "##### Check if values are approximately equal\n", 44 | "\n", 45 | "The original API: \n", 46 | "https://dplyr.tidyverse.org/reference/near.html \n", 47 | "\n", 48 | "##### Args:\n", 49 | "  `x`: A numeric vector \n", 50 | "  `y`: A numeric vector \n", 51 | "  `tol`: Tolerance \n", 52 | "\n", 53 | "##### Returns:\n", 54 | "  An array of boolean values \n" 55 | ], 56 | "text/plain": [ 57 | "" 58 | ] 59 | }, 60 | "metadata": {}, 61 | "output_type": "display_data" 62 | } 63 | ], 64 | "source": [ 65 | "# https://dplyr.tidyverse.org/reference/near.html\n", 66 | "%run nb_helpers.py\n", 67 | "\n", 68 | "from datar.all import *\n", 69 | "\n", 70 | "nb_header(near)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 2, 76 | "id": "employed-supplier", 77 | "metadata": { 78 | "execution": { 79 | "iopub.execute_input": "2021-07-16T22:28:34.724636Z", 80 | "iopub.status.busy": "2021-07-16T22:28:34.723973Z", 81 | "iopub.status.idle": "2021-07-16T22:28:34.727483Z", 82 | "shell.execute_reply": "2021-07-16T22:28:34.727978Z" 83 | } 84 | }, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "False" 90 | ] 91 | }, 92 | "execution_count": 2, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "sqrt(2.0) ** 2.0 == 2.0" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 3, 104 | "id": "black-decimal", 105 | "metadata": { 106 | "execution": { 107 | "iopub.execute_input": "2021-07-16T22:28:34.734438Z", 108 | "iopub.status.busy": "2021-07-16T22:28:34.733793Z", 109 | "iopub.status.idle": "2021-07-16T22:28:34.736689Z", 110 | "shell.execute_reply": "2021-07-16T22:28:34.737085Z" 111 | } 112 | }, 113 | "outputs": [ 114 | { 115 | "data": { 116 | "text/plain": [ 117 | "True" 118 | ] 119 | }, 120 | "execution_count": 3, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "near(sqrt(2.0) ** 2.0, 2.0)" 127 | ] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "Python 3.9.5 ('base')", 133 | "language": "python", 134 | "name": "python3" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 3 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython3", 146 | "version": "3.9.5" 147 | }, 148 | "vscode": { 149 | "interpreter": { 150 | "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4" 151 | } 152 | } 153 | }, 154 | "nbformat": 4, 155 | "nbformat_minor": 5 156 | } 157 | -------------------------------------------------------------------------------- /docs/notebooks/nest-join.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "adverse-thesis", 7 | "metadata": { 8 | "execution": { 9 | "iopub.execute_input": "2021-07-16T22:28:21.040914Z", 10 | "iopub.status.busy": "2021-07-16T22:28:21.040207Z", 11 | "iopub.status.idle": "2021-07-16T22:28:22.128495Z", 12 | "shell.execute_reply": "2021-07-16T22:28:22.128914Z" 13 | } 14 | }, 15 | "outputs": [ 16 | { 17 | "data": { 18 | "text/html": [ 19 | "
Try this notebook on binder.
" 20 | ], 21 | "text/plain": [ 22 | "" 23 | ] 24 | }, 25 | "metadata": {}, 26 | "output_type": "display_data" 27 | }, 28 | { 29 | "data": { 30 | "text/markdown": [ 31 | "###
★ nest_join
" 32 | ], 33 | "text/plain": [ 34 | "" 35 | ] 36 | }, 37 | "metadata": {}, 38 | "output_type": "display_data" 39 | }, 40 | { 41 | "data": { 42 | "text/markdown": [ 43 | "##### Nest join two data frames by matching rows.\n", 44 | "\n", 45 | "The original API: \n", 46 | "https://dplyr.tidyverse.org/reference/join.html \n", 47 | "\n", 48 | "##### Args:\n", 49 | "  `x`: A data frame \n", 50 | "  `y`: A data frame \n", 51 | "  `by`: A list of column names to join by. \n", 52 | "    If None, use the intersection of the columns of x and y. \n", 53 | "\n", 54 | "  `copy`: If True, always copy the data. \n", 55 | "  `keep`: If True, keep the grouping variables in the output. \n", 56 | "  `name`: The name of the column to store the nested data frame. \n", 57 | "\n", 58 | "##### Returns:\n", 59 | "  A data frame \n" 60 | ], 61 | "text/plain": [ 62 | "" 63 | ] 64 | }, 65 | "metadata": {}, 66 | "output_type": "display_data" 67 | } 68 | ], 69 | "source": [ 70 | "# https://dplyr.tidyverse.org/reference/nest_join.html\n", 71 | "%run nb_helpers.py\n", 72 | "\n", 73 | "from datar.data import band_members, band_instruments\n", 74 | "from datar.all import *\n", 75 | "\n", 76 | "nb_header(nest_join, book='nest-join')" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 2, 82 | "id": "green-continuity", 83 | "metadata": { 84 | "execution": { 85 | "iopub.execute_input": "2021-07-16T22:28:22.136012Z", 86 | "iopub.status.busy": "2021-07-16T22:28:22.135245Z", 87 | "iopub.status.idle": "2021-07-16T22:28:22.213886Z", 88 | "shell.execute_reply": "2021-07-16T22:28:22.214257Z" 89 | } 90 | }, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/html": [ 95 | "
\n", 96 | "\n", 109 | "\n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | "
nameband_y_joined
<object><object><object>
0MickStones<DF 0x1>
1JohnBeatles<DF 1x1>
2PaulBeatles<DF 1x1>
\n", 145 | "
\n" 146 | ], 147 | "text/plain": [ 148 | " name band _y_joined\n", 149 | " \n", 150 | "0 Mick Stones \n", 151 | "1 John Beatles \n", 152 | "2 Paul Beatles " 153 | ] 154 | }, 155 | "execution_count": 2, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "nested = band_members >> nest_join(band_instruments)\n", 162 | "nested" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 3, 168 | "id": "french-egyptian", 169 | "metadata": { 170 | "execution": { 171 | "iopub.execute_input": "2021-07-16T22:28:22.228931Z", 172 | "iopub.status.busy": "2021-07-16T22:28:22.228284Z", 173 | "iopub.status.idle": "2021-07-16T22:28:22.238218Z", 174 | "shell.execute_reply": "2021-07-16T22:28:22.237726Z" 175 | } 176 | }, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "[Empty Tibble\n", 182 | " Columns: [plays]\n", 183 | " Index: [],\n", 184 | " plays\n", 185 | " \n", 186 | " 0 guitar]" 187 | ] 188 | }, 189 | "execution_count": 3, 190 | "metadata": {}, 191 | "output_type": "execute_result" 192 | } 193 | ], 194 | "source": [ 195 | "nested >> head(2) >> pull(f._y_joined, to='list')" 196 | ] 197 | } 198 | ], 199 | "metadata": { 200 | "kernelspec": { 201 | "display_name": "Python 3.9.5 ('base')", 202 | "language": "python", 203 | "name": "python3" 204 | }, 205 | "language_info": { 206 | "codemirror_mode": { 207 | "name": "ipython", 208 | "version": 3 209 | }, 210 | "file_extension": ".py", 211 | "mimetype": "text/x-python", 212 | "name": "python", 213 | "nbconvert_exporter": "python", 214 | "pygments_lexer": "ipython3", 215 | "version": "3.9.5" 216 | }, 217 | "vscode": { 218 | "interpreter": { 219 | "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4" 220 | } 221 | } 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 5 225 | } 226 | -------------------------------------------------------------------------------- /docs/options.md: -------------------------------------------------------------------------------- 1 | Options are used to change some behaviors in `datar`. 2 | 3 | ## Available options 4 | 5 | ### allow_conflict_names 6 | 7 | Whether to allow conflict names that reversed by python. For example, `filter` is a python builtin function, but also a `dplyr` function. You should use `filter_` instead. By default, `datar` will raise an error when you try to import `filter`. You can set this option to `True` to allow this behavior. 8 | 9 | ```python 10 | >>> from datar.all import filter 11 | >>> # or from datar.dplyr import filter 12 | Traceback (most recent call last): 13 | File "", line 1, in 14 | ImportError: cannot import name 'filter' from 'datar.all' 15 | ``` 16 | 17 | ```python 18 | >>> from datar import options 19 | >>> options(allow_conflict_names=True) 20 | >>> from datar.all import filter 21 | >>> filter 22 | 23 | ``` 24 | 25 | The conflict names under `datar.base` are: 26 | 27 | - `min` 28 | - `max` 29 | - `sum` 30 | - `abs` 31 | - `round` 32 | - `all` 33 | - `any` 34 | - `re` 35 | 36 | The conflict names under `datar.dplyr` are: 37 | 38 | - `filter` 39 | - `slice` 40 | 41 | ### backends 42 | 43 | If you have multiple backends installed, you can use this option to specify which backends to use. 44 | 45 | ## Configuration files 46 | 47 | You can change the default behavior of datar by configuring a `.toml.toml` file in your home directory. For example, to always use underscore-suffixed names for conflicting names, you can add the following to your `~/.datar.toml` file: 48 | 49 | ```toml 50 | allow_conflict_names = true 51 | ``` 52 | 53 | You can also have a project/directory-based configuration file (`./.datar.toml`) in your current working directory, which has higher priority than the home directory configuration file. 54 | -------------------------------------------------------------------------------- /docs/reference-maps/ALL.md: -------------------------------------------------------------------------------- 1 | 2 | |Module|Description|Reference| 3 | |-|-|-| 4 | |`base`|APIs ported from `r-base/r-stats/r-utils`|[:octicons-cross-reference-16:][5]| 5 | |#|#|#| 6 | |`dplyr`|APIs ported from `tidyverse/dplyr`|[:octicons-cross-reference-16:][2]| 7 | |`tidyr`|APIs ported from `tidyverse/tidyr`|[:octicons-cross-reference-16:][4]| 8 | |`tibble`|APIs ported from `tidyverse/tibble`|[:octicons-cross-reference-16:][1]| 9 | |`forcats`|APIs ported from `tidyverse/forcats`|[:octicons-cross-reference-16:][9]| 10 | |#|#|#| 11 | |`datasets`|Datasets collected from `tidyverse` or other related packages|[:octicons-cross-reference-16:][3]| 12 | |#|#|#| 13 | |`datar`|Datar-specific verbs/functions|[:octicons-cross-reference-16:][6]| 14 | 15 | [1]: ../tibble 16 | [2]: ../dplyr 17 | [3]: ../datasets 18 | [4]: ../tidyr 19 | [5]: ../base 20 | [6]: ../datar 21 | [9]: ../forcats 22 | -------------------------------------------------------------------------------- /docs/reference-maps/datasets.md: -------------------------------------------------------------------------------- 1 | 11 | 12 | ## Reference of `datar.data` 13 | 14 | |API|Description|Source| 15 | |---|---|---:| 16 | |`airlines`|translation between two letter carrier codes and names|[`r-nycflights13`][1]| 17 | |`airports`|airport names and locations|[`r-nycflights13`][1]| 18 | |`flights`|all flights that departed from NYC in 2013|[`r-nycflights13`][1]| 19 | |`weather`|hourly meterological data for each airport|[`r-nycflights13`][1]| 20 | |`planes`|construction information about each plane|[`r-nycflights13`][1]| 21 | |#|#|#| 22 | |`state_abb`|character vector of 2-letter abbreviations for the state names.|[`r-datasets-state`][15]| 23 | |`state_division`|factor giving state divisions (New England, Middle Atlantic, South Atlantic, East South Central, West South Central, East North Central, West North Central, Mountain, and Pacific).|[`r-datasets-state`][15]| 24 | |`state_region`|factor giving the region (Northeast, South, North Central, West) that each state belongs to.|[`r-datasets-state`][15]| 25 | |#|#|#| 26 | |`airquality`|Daily air quality measurements in New York, May to September 1973.|[`r-datasets-airquality`][2]| 27 | |`anscombe`|Four x-y datasets which have the same traditional statistical properties|[`r-datasets-anscombe`][3]| 28 | |`faithful`|Waiting time between eruptions and the duration of the eruption for the Old Faithful geyser in Yellowstone National Park, Wyoming, USA|[`r-datasets-faithful`][31]| 29 | |`iris`|Edgar Anderson's Iris Data|[`r-datasets-iris`][9]| 30 | |`mtcars`|Motor Trend Car Road Tests|[`r-datasets-mtcars`][10]| 31 | |`warpbreaks`|The Number of Breaks in Yarn during Weaving|[`r-datasets-warpbreaks`][19]| 32 | |`ToothGrowth`|The Effect of Vitamin C on Tooth Growth in Guinea Pigs|[`r-datasets-ToothGrowth`][21]| 33 | |#|#|#| 34 | |`band_instruments`|Band members of the Beatles and Rolling Stones|[`r-dplyr-band_members`][4]| 35 | |`band_instruments2`|Band members of the Beatles and Rolling Stones|[`r-dplyr-band_members`][4]| 36 | |`band_members`|Band members of the Beatles and Rolling Stones|[`r-dplyr-band_members`][4]| 37 | |#|#|#| 38 | |`table1`|Example tabular representations|[`r-dplyr-storms`][17]| 39 | |`table2`|Example tabular representations|[`r-dplyr-storms`][17]| 40 | |`table3`|Example tabular representations|[`r-dplyr-storms`][17]| 41 | |`table4a`|Example tabular representations|[`r-dplyr-storms`][17]| 42 | |`table4b`|Example tabular representations|[`r-dplyr-storms`][17]| 43 | |`table5`|Example tabular representations|[`r-dplyr-storms`][17]| 44 | |#|#|#| 45 | |`starwars`|Starwars characters (columns `films`, `vehicles` and `starships` are not included)|[`r-dplyr-starwars`][14]| 46 | |`storms`|This data is a subset of the NOAA Atlantic hurricane database best track data|[`r-dplyr-storms`][16]| 47 | |`us_rent_income`|US rent and income data|[`r-dplyr-us_rent_income`][18]| 48 | |`world_bank_pop`|Population data from the world bank|[`r-dplyr-world_bank_pop`][20]| 49 | |#|#|#| 50 | |`billboard`|Song rankings for Billboard top 100 in the year 2000|[`r-tidyr-billboard`][5]| 51 | |`construction`|Completed construction in the US in 2018|[`r-tidyr-construction`][6]| 52 | |`fish_encounters`|Information about fish swimming down a river|[`r-tidyr-fish_encounters`][8]| 53 | |`population`|A subset of data from the World Health Organization Global Tuberculosis Report, and accompanying global populations.|[`r-tidyr-who`][11]| 54 | |`relig_income`|Pew religion and income survey|[`r-tidyr-relig_income`][12]| 55 | |`smiths`|A small demo dataset describing John and Mary Smith.|[`r-tidyr-smiths`][13]| 56 | |`who`|A subset of data from the World Health Organization Global Tuberculosis Report, and accompanying global populations.|[`r-tidyr-who`][11]| 57 | |#|#|#| 58 | |`diamonds`|A dataset containing the prices and other attributes of almost 54,000 diamonds|[`r-ggplot2-diamonds`][7]| 59 | |`economics` `economics_long`|US economic time series|[`r-ggplot2-economics`][22]| 60 | |`faithfuld`|2d density estimate of Old Faithful data|[`r-ggplot2-faithfuld`][23]| 61 | |`midwest`|Midwest demographics|[`r-ggplot2-midwest`][24]| 62 | |`mpg`|Fuel economy data from 1999 to 2008 for 38 popular models of cars|[`r-ggplot2-mpg`][25]| 63 | |`msleep`|An updated and expanded version of the mammals sleep dataset|[`r-ggplot2-msleep`][26]| 64 | |`presidential`|Terms of 11 presidents from Eisenhower to Obama|[`r-ggplot2-presidential`][27]| 65 | |`seals`|Vector field of seal movements|[`r-ggplot2-seals`][28]| 66 | |`txhousing`|Housing sales in TX|[`r-ggplot2-txhousing`][29]| 67 | |`luv_colours`|`colors()` in Luv space|[`r-ggplot2-luv_colours`][30]| 68 | |#|#| 69 | |`gss_cat`|A sample of categorical variables from the General Social survey|[`r-forcats-gss_cat`][32]| 70 | 71 | [1]: https://github.com/tidyverse/nycflights13 72 | [2]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/airquality 73 | [3]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/anscombe 74 | [4]: https://dplyr.tidyverse.org/reference/band_members.html 75 | [5]: https://tidyr.tidyverse.org/reference/billboard.html 76 | [6]: https://tidyr.tidyverse.org/reference/construction.html 77 | [7]: https://ggplot2.tidyverse.org/reference/diamonds.html 78 | [8]: https://tidyr.tidyverse.org/reference/fish_encounters.html 79 | [9]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/iris 80 | [10]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/mtcars 81 | [11]: https://tidyr.tidyverse.org/reference/who.html 82 | [12]: https://tidyr.tidyverse.org/reference/relig_income.html 83 | [13]: https://tidyr.tidyverse.org/reference/smiths.html 84 | [14]: https://dplyr.tidyverse.org/reference/starwars.html 85 | [15]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/state 86 | [16]: https://dplyr.tidyverse.org/reference/storms.html 87 | [17]: https://tidyr.tidyverse.org/reference/table1.html 88 | [18]: https://tidyr.tidyverse.org/reference/us_rent_income.html 89 | [19]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/warpbreaks 90 | [20]: https://tidyr.tidyverse.org/reference/world_bank_pop.html 91 | [21]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/ToothGrowth 92 | [22]: https://ggplot2.tidyverse.org/reference/economics.html 93 | [23]: https://ggplot2.tidyverse.org/reference/faithfuld.html 94 | [24]: https://ggplot2.tidyverse.org/reference/midwest.html 95 | [25]: https://ggplot2.tidyverse.org/reference/mpg.html 96 | [26]: https://ggplot2.tidyverse.org/reference/msleep.html 97 | [27]: https://ggplot2.tidyverse.org/reference/presidential.html 98 | [28]: https://ggplot2.tidyverse.org/reference/seals.html 99 | [29]: https://ggplot2.tidyverse.org/reference/txhousing.html 100 | [30]: https://ggplot2.tidyverse.org/reference/luv_colours.html 101 | [31]: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/faithfulZZ 102 | [32]: https://forcats.tidyverse.org/reference/gss_cat.html 103 | -------------------------------------------------------------------------------- /docs/reference-maps/forcats.md: -------------------------------------------------------------------------------- 1 | 11 | 12 | ## Reference of `datar.forcats` 13 | 14 | Reference map of `r-tidyverse-forcats` can be found [here][1]. 15 | 16 | **Legend:** 17 | 18 | |Sample|Status| 19 | |---|---| 20 | |[normal]()|API that is regularly ported| 21 | |[strike-through]()|API that is not ported, or not an API originally| 22 | |[**bold**]()|API that is unique in `datar`| 23 | |[_italic_]()|Working in process| 24 | 25 | ### Change order of levels 26 | 27 | |API|Description|Notebook example| 28 | |---|---|---:| 29 | |[fct_relevel()][2]|Reorder factor levels by hand|[:material-notebook:][3]| 30 | |[fct_inorder()][4] [fct_infreq()][5] [fct_inseq()][6]|Reorder factor levels by first appearance, frequency, or numeric order|[:material-notebook:][3]| 31 | |[fct_reorder()][7] [fct_reorder2()][8] [last2()][9] [first2()][10]|Reorder factor levels by sorting along another variable|[:material-notebook:][3]| 32 | |[fct_shuffle()][11]|Randomly permute factor levels|[:material-notebook:][3]| 33 | |[fct_rev()][12]|Reverse order of factor levels|[:material-notebook:][3]| 34 | |[fct_shift()][13]|Shift factor levels to left or right, wrapping around at end|[:material-notebook:][3]| 35 | 36 | ### Change value of levels 37 | 38 | |API|Description|Notebook example| 39 | |---|---|---:| 40 | |[fct_anon()][15]|Anonymise factor levels|[:material-notebook:][14]| 41 | |[fct_collapse()][16]|Collapse factor levels into manually defined groups|[:material-notebook:][14]| 42 | |[fct_lump()][17] [fct_lump_min()][18] [fct_lump_prop()][19] [fct_lump_n()][20] [fct_lump_lowfreq()][41]|Lump together actor levels into "other"|[:material-notebook:][14]| 43 | |[fct_other()][21]|Replace levels with "other"|[:material-notebook:][14]| 44 | |[fct_recode()][22]|Change factor levels by hand|[:material-notebook:][14]| 45 | |[fct_relabel()][23]|Automatically relabel factor levels, collapse as necessary|[:material-notebook:][14]| 46 | 47 | ### Add/remove levels 48 | 49 | |API|Description|Notebook example| 50 | |---|---|---:| 51 | |[fct_expand()][25]|Add additional levels to a factor|[:material-notebook:][24]| 52 | |[fct_explicit_na()][26]|Make missing values explicit||[:material-notebook:][24]| 53 | |[fct_drop()][27]|Drop unused levels||[:material-notebook:][24]| 54 | |[fct_unify()][28]|Unify the levels in a list of factors||[:material-notebook:][24]| 55 | 56 | ### Combine multiple factors 57 | 58 | |API|Description|Notebook example| 59 | |---|---|---:| 60 | |[fct_c()][29]|Concatenate factors, combining levels|[:material-notebook:][31]| 61 | |[fct_cross()][30]|Combine levels from two or more factors to create a new factor|[:material-notebook:][31]| 62 | 63 | ### Other helpers 64 | 65 | |API|Description|Notebook example| 66 | |---|---|---:| 67 | |[as_factor()][33]|Convert input to a factor|[:material-notebook:][32]| 68 | |[fct_count()][34]|Count entries in a factor|[:material-notebook:][32]| 69 | |[fct_match()][35]|Test for presence of levels in a factor|[:material-notebook:][32]| 70 | |[fct_unique()][36]|Unique values of a factor|[:material-notebook:][32]| 71 | |[lvls_reorder()][37] [lvls_revalue()][38] [lvls_expand()][39]|Low-level functions for manipulating levels|[:material-notebook:][32]| 72 | |[lvls_union()][40]|Find all levels in a list of factors|[:material-notebook:][32]| 73 | 74 | [1]: https://forcats.tidyverse.org/reference/index.html 75 | [2]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_relevel 76 | [3]: ../../notebooks/forcats_lvl_order 77 | [4]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_inorder 78 | [5]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_infreq 79 | [6]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_inseq 80 | [7]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_reorder 81 | [8]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_reorder2 82 | [9]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.last2 83 | [10]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.first2 84 | [11]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_shuffle 85 | [12]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_rev 86 | [13]: ../../api/datar.forcats.lvl_order/#datar.tidyr.lvl_order.fct_shift 87 | [14]: ../../notebooks/forcats_lvl_value 88 | [15]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_relevel 89 | [16]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_relevel 90 | [17]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_lump 91 | [18]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_lump_min 92 | [19]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_lump_prop 93 | [20]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_lump_n 94 | [21]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_other 95 | [22]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_recode 96 | [23]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_relabel 97 | [24]: ../../notebooks/forcats_lvl_addrm 98 | [25]: ../../api/datar.forcats.lvl_addrm/#datar.tidyr.lvl_addrm.fct_expand 99 | [26]: ../../api/datar.forcats.lvl_addrm/#datar.tidyr.lvl_addrm.fct_explicit_na 100 | [27]: ../../api/datar.forcats.lvl_addrm/#datar.tidyr.lvl_addrm.fct_drop 101 | [28]: ../../api/datar.forcats.lvl_addrm/#datar.tidyr.lvl_addrm.fct_unify 102 | [29]: ../../api/datar.forcats.fct_multi/#datar.tidyr.fct_multi.fct_c 103 | [30]: ../../api/datar.forcats.fct_multi/#datar.tidyr.fct_multi.fct_cross 104 | [31]: ../../notebooks/forcats_fct_multi 105 | [32]: ../../notebooks/forcats_misc 106 | [33]: ../../api/datar.forcats.misc/#datar.tidyr.misc.as_factor 107 | [34]: ../../api/datar.forcats.misc/#datar.tidyr.misc.fct_count 108 | [35]: ../../api/datar.forcats.misc/#datar.tidyr.misc.fct_match 109 | [36]: ../../api/datar.forcats.misc/#datar.tidyr.misc.fct_unique 110 | [37]: ../../api/datar.forcats.misc/#datar.tidyr.misc.lvls_reorder 111 | [38]: ../../api/datar.forcats.misc/#datar.tidyr.misc.lvls_revalue 112 | [39]: ../../api/datar.forcats.misc/#datar.tidyr.misc.lvls_expand 113 | [40]: ../../api/datar.forcats.misc/#datar.tidyr.misc.lvls_union 114 | [41]: ../../api/datar.forcats.lvl_value/#datar.tidyr.lvl_value.fct_lump_lowfreq 115 | -------------------------------------------------------------------------------- /docs/reference-maps/other.md: -------------------------------------------------------------------------------- 1 | 11 | 12 | ## Reference of `datar.datar` 13 | 14 | **Legend:** 15 | 16 | |Sample|Status| 17 | |---|---| 18 | |[normal]()|API that is regularly ported| 19 | |[strike-through]()|API that is not ported, or not an API originally| 20 | |[**bold**]()|API that is unique in `datar`| 21 | |[_italic_]()|Working in process| 22 | 23 | ### Verbs 24 | 25 | |API|Description|Notebook example| 26 | |---|---|---:| 27 | |[**`get()`**][2]|Extract values from data frames|[:material-notebook:][1]| 28 | |[**`flatten()`**][2]|Flatten values of data frames|[:material-notebook:][1]| 29 | 30 | ### Functions 31 | 32 | |[**`itemgetter()`**][3]|Turn `a[f.x]` to a valid verb argument with `itemgetter(a, f.x)`|[:material-notebook:][1]| 33 | |[**`attrgetter()`**][4]|`f.x.` but works with `SeriesGroupBy` object|[:material-notebook:][1]| 34 | |[**`pd_str()`**][4]|`str` accessor but works with `SeriesGroupBy` object|[:material-notebook:][1]| 35 | |[**`pd_cat()`**][4]|`cat` accessor but works with `SeriesGroupBy` object|[:material-notebook:][1]| 36 | |[**`pd_dt()`**][4]|`dt` accessor but works with `SeriesGroupBy` object|[:material-notebook:][1]| 37 | 38 | 39 | [1]: ../../notebooks/datar 40 | [2]: ../../api/datar.datar.verbs/#datar.datar.verbs.get 41 | [3]: ../../api/datar.datar.verbs/#datar.datar.verbs.flatten 42 | [4]: ../../api/datar.datar.funcs/#datar.datar.funcs.itemgetter 43 | [5]: ../../api/datar.datar.funcs/#datar.datar.funcs.attrgetter 44 | [6]: ../../api/datar.datar.funcs/#datar.datar.funcs.pd_str 45 | [7]: ../../api/datar.datar.funcs/#datar.datar.funcs.pd_cat 46 | [8]: ../../api/datar.datar.funcs/#datar.datar.funcs.pd_dt 47 | -------------------------------------------------------------------------------- /docs/reference-maps/stats.md: -------------------------------------------------------------------------------- 1 | 11 | 12 | ## Reference of `datar.datar` 13 | 14 | **Legend:** 15 | 16 | |Sample|Status| 17 | |---|---| 18 | |[normal]()|API that is regularly ported| 19 | |[strike-through]()|API that is not ported, or not an API originally| 20 | |[**bold**]()|API that is unique in `datar`| 21 | |[_italic_]()|Working in process| 22 | 23 | ### Stats 24 | 25 | |API|Description|Notebook example| 26 | |---|---|---:| 27 | |[`rnorm()`][1]|Generates random deviates for the normal distribution|| 28 | |[`rpois()`][2]|Generates random deviates for the Poisson distribution|| 29 | |[`runif()`][3]|Generates random deviates for the uniform distribution|| 30 | 31 | 32 | [1]: ../../api/datar.base.stats/#datar.base.stats.rnorm 33 | [2]: ../../api/datar.base.stats/#datar.base.stats.rpois 34 | [3]: ../../api/datar.base.stats/#datar.base.stats.runif 35 | -------------------------------------------------------------------------------- /docs/reference-maps/tibble.md: -------------------------------------------------------------------------------- 1 | 11 | 12 | ## Reference of `datar.tibble` 13 | 14 | Reference map of `r-tidyverse-tibble` can be found [here][1]. 15 | 16 | **Legend:** 17 | 18 | |Sample|Status| 19 | |---|---| 20 | |[normal]()|API that is regularly ported| 21 | |[strike-through]()|API that is not ported, or not an API originally| 22 | |[**bold**]()|API that is unique in `datar`| 23 | |_italic_|Working in process| 24 | 25 | 26 | ### Tibbles 27 | 28 | !!! Tip 29 | 30 | Tibbles in `datar` are just `pandas.DataFrame`s. So there is no difference between data frames created by `tibble()` and `pandas.DataFrame`, unlike in R, `tibble` and `data.frame`. 31 | 32 | Also note that tibbles in `datar` are not `rownames`/`index` aware for most APIs, just like most `tidyverse` APIs. 33 | 34 | |API|Description|Notebook example| 35 | |---|---|---:| 36 | |`tibble-package`||| 37 | |[`tibble()`][12] [`tibble_row()`][18]|Build a data frame| [:material-notebook:][2] | 38 | |[`fibble()`][13]|Same as `tibble()` but used as Verb arguments| [:material-notebook:][2] | 39 | |`tbl_df-class`||| 40 | |`print()` `format()`||| 41 | |[`tribble()`][3]|Row-wise tibble creation|[:material-notebook:][2]| 42 | 43 | ### Coercion 44 | 45 | |API|Description|Notebook example| 46 | |---|---|---:| 47 | |`is_tibble()`||| 48 | |[`as_tibble()`][19]|Convert data frames into datar's tibbles|| 49 | |`new_tibble()` `validate_tibble()`||| 50 | |[`enframe()`][4] [`deframe()`][14]|Converting iterables to data frames, and vice versa| [:material-notebook:][5]| 51 | 52 | ### Manipulation 53 | 54 | |API|Description|Notebook example| 55 | |---|---|---:| 56 | |`$` `[[` `[`|Please subset data frames using `pandas` syntax (`df.col`, `df['col']`, `df.loc[...]` or `df.iloc[...]`| 57 | |[`add_row()`][6]| Add rows to a data frame | [:material-notebook:][7] | 58 | |[`add_column()`][8]| Add columns to a data frame | [:material-notebook:][9] | 59 | 60 | ### Helpers 61 | 62 | |API|Description|Notebook example| 63 | |---|---|---:| 64 | |`reexports`||| 65 | |[`has_rownames()`/`has_index()`][10] [`remove_rownames()`/`remove_index()`/`drop_index()`][15] [`rownames_to_column()`/`index_to_column()`][16] [`rowid_to_column()` `column_to_rownames()`/`column_to_index()`][17]|Tools for working with row names/DataFrame indexes|[:material-notebook:][11]| 66 | |`view()`||| 67 | 68 | ### Vectors, matrices, and lists 69 | 70 | 71 | [1]: https://tibble.tidyverse.org/reference/index.html 72 | [2]: ../../notebooks/tibble 73 | [3]: ../../api/datar.tibble.tibble/#datar.tibble.tibble.tribble 74 | [4]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.enframe 75 | [5]: ../../notebooks/enframe 76 | [6]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.add_row 77 | [7]: ../../notebooks/add_row 78 | [8]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.add_column 79 | [9]: ../../notebooks/add_column 80 | [10]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.has_rownames 81 | [11]: ../../notebooks/rownames 82 | [12]: ../../api/datar.tibble.tibble/#datar.tibble.tibble.tibble 83 | [13]: ../../api/datar.tibble.tibble/#datar.tibble.tibble.fibble 84 | [14]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.deframe 85 | [15]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.remove_rownames 86 | [16]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.rownames_to_column 87 | [17]: ../../api/datar.tibble.verbs/#datar.tibble.verbs.rowid_to_column 88 | [18]: ../../api/datar.tibble.tibble/#datar.tibble.tibble.tibble_row 89 | [19]: ../../api/datar.tibble.tibble/#datar.tibble.tibble.as_tibble 90 | -------------------------------------------------------------------------------- /docs/reference-maps/tidyr.md: -------------------------------------------------------------------------------- 1 | 11 | 12 | ## Reference of `datar.dplyr` 13 | 14 | Reference map of `r-tidyverse-tidyr` can be found [here][1]. 15 | 16 | **Legend:** 17 | 18 | |Sample|Status| 19 | |---|---| 20 | |[normal]()|API that is regularly ported| 21 | |[strike-through]()|API that is not ported, or not an API originally| 22 | |[**bold**]()|API that is unique in `datar`| 23 | |[_italic_]()|Working in process| 24 | 25 | ### Pivoting 26 | 27 | |API|Description|Notebook example| 28 | |---|---|---:| 29 | |[pivot_longer()][26]|Pivot data from wide to long|[:material-notebook:][27]| 30 | |[pivot_wider()][28]|Pivot data from long to wide|[:material-notebook:][29]| 31 | 32 | ### Rectangling 33 | 34 | |API|Description|Notebook example| 35 | |---|---|---:| 36 | |_`hoist()`_ _`unnest_longer()`_ _`unnest_wider()`_ _`unnest_auto()`_|Rectangle a nested list into a tidy tibble|| 37 | 38 | ### Nesting 39 | 40 | |API|Description|Notebook example| 41 | |---|---|---:| 42 | |[`nest()`][9] [`unnest()`][10]|Nest and unnest|[:material-notebook:][11]| 43 | 44 | ### Character vectors 45 | 46 | |API|Description|Notebook example| 47 | |---|---|---:| 48 | |[`extract()`][22]|Extract a character column into multiple columns using regular expression groups|[:material-notebook:][23]| 49 | |[`separate()`][30]|Separate a character column into multiple columns with a regular expression or numeric locations|[:material-notebook:][31]| 50 | |[`separate_rows()`][34]|Separate a collapsed column into multiple rows|[:material-notebook:][35]| 51 | |[`unite()`][36]|Unite multiple columns into one by pasting strings together|[:material-notebook:][37]| 52 | 53 | ### Missing values 54 | 55 | |API|Description|Notebook example| 56 | |---|---|---:| 57 | |[`complete()`][18]|Complete a data frame with missing combinations of data|[:material-notebook:][19]| 58 | |[`drop_na()`][20]|Drop rows containing missing values|[:material-notebook:][21]| 59 | |[`expand()`][12] [`crossing()`][13] [`nesting()`][14]|Expand data frame to include all possible combinations of values|[:material-notebook:][15]| 60 | |[`expand_grid()`][16]| 61 | |[`fill()`][24]|Fill in missing values with previous or next value|[:material-notebook:][25]| 62 | |[`full_seq()`][40]|Create the full sequence of values in a vector|[:material-notebook:][41]| 63 | |[`replace_na()`][38]|Replace NAs with specified values|[:material-notebook:][39]| 64 | 65 | ### Miscellanea 66 | 67 | |API|Description|Notebook example| 68 | |---|---|---:| 69 | |[`chop()`][3] [`unchop()`][4]|Chop and unchop|[:material-notebook:][5]| 70 | |[`pack()`][6] [`unpack()`][7]|Pack and unpack|[:material-notebook:][8]| 71 | |[`uncount()`][32]|"Uncount" a data frame|[:material-notebook:][33]| 72 | 73 | ### Data 74 | 75 | See [datasets][2] 76 | 77 | [1]: https://tidyr.tidyverse.org/reference/index.html 78 | [2]: ../datasets 79 | [3]: ../../api/datar.tidyr.chop/#datar.tidyr.chop.chop 80 | [4]: ../../api/datar.tidyr.chop/#datar.tidyr.chop.unchop 81 | [5]: ../../notebooks/chop 82 | [6]: ../../api/datar.tidyr.pack/#datar.tidyr.pack.pack 83 | [7]: ../../api/datar.tidyr.pack/#datar.tidyr.pack.unpack 84 | [8]: ../../notebooks/chop 85 | [9]: ../../api/datar.tidyr.nest/#datar.tidyr.nest.nest 86 | [10]: ../../api/datar.tidyr.nest/#datar.tidyr.nest.unnest 87 | [11]: ../../notebooks/nest 88 | [12]: ../../api/datar.tidyr.expand/#datar.tidyr.expand.expand 89 | [13]: ../../api/datar.tidyr.expand/#datar.tidyr.expand.crossing 90 | [14]: ../../api/datar.tidyr.expand/#datar.tidyr.expand.nesting 91 | [15]: ../../notebooks/expand 92 | [16]: ../../api/datar.tidyr.expand/#datar.tidyr.expand.expand_grid 93 | [17]: ../../notebooks/expand_grid 94 | [18]: ../../api/datar.tidyr.complete/#datar.tidyr.complete.complete 95 | [19]: ../../notebooks/complete 96 | [20]: ../../api/datar.tidyr.drop_na/#datar.tidyr.drop_na.drop_na 97 | [21]: ../../notebooks/drop_na 98 | [22]: ../../api/datar.tidyr.extract/#datar.tidyr.extract.extract 99 | [23]: ../../notebooks/extract 100 | [24]: ../../api/datar.tidyr.fill/#datar.tidyr.fill.fill 101 | [25]: ../../notebooks/fill 102 | [26]: ../../api/datar.tidyr.pivot_long/#datar.tidyr.pivot_long.pivot_longer 103 | [27]: ../../notebooks/pivot_longer 104 | [28]: ../../api/datar.tidyr.pivot_wide/#datar.tidyr.pivot_wide.pivot_wider 105 | [29]: ../../notebooks/pivot_wider 106 | [30]: ../../api/datar.tidyr.separate/#datar.tidyr.separate.separate 107 | [31]: ../../notebooks/separate 108 | [32]: ../../api/datar.tidyr.uncount/#datar.tidyr.uncount.uncount 109 | [33]: ../../notebooks/uncount 110 | [34]: ../../api/datar.tidyr.separate/#datar.tidyr.separate.separate_rows 111 | [35]: ../../notebooks/separate 112 | [36]: ../../api/datar.tidyr.unite/#datar.tidyr.unite.unite 113 | [37]: ../../notebooks/unite 114 | [38]: ../../api/datar.tidyr.replace_na/#datar.tidyr.replace_na.replace_na 115 | [39]: ../../notebooks/replace_na 116 | [40]: ../../api/datar.tidyr.funcs/#datar.tidyr.funcs.full_seq 117 | [41]: ../../notebooks/full_seq 118 | -------------------------------------------------------------------------------- /docs/reference-maps/utils.md: -------------------------------------------------------------------------------- 1 | 11 | 12 | ## Reference of `datar.datar` 13 | 14 | **Legend:** 15 | 16 | |Sample|Status| 17 | |---|---| 18 | |[normal]()|API that is regularly ported| 19 | |[strike-through]()|API that is not ported, or not an API originally| 20 | |[**bold**]()|API that is unique in `datar`| 21 | |[_italic_]()|Working in process| 22 | 23 | ### Utils 24 | 25 | |API|Description|Notebook example| 26 | |---|---|---:| 27 | |[`head()`][1]|Get the head of the object|| 28 | |[`tail()`][2]|Get the tail of the object|| 29 | 30 | [1]: ../../api/datar.base.verbs/#datar.base.verbs.head 31 | [2]: ../../api/datar.base.verbs/#datar.base.verbs.tail 32 | -------------------------------------------------------------------------------- /docs/style.css: -------------------------------------------------------------------------------- 1 | 2 | .md-main__inner.md-grid { 3 | max-width: 80%; 4 | margin-left: 32px; 5 | } 6 | 7 | .md-typeset .admonition, .md-typeset details { 8 | font-size: .7rem !important; 9 | } 10 | 11 | .md-typeset table:not([class]) td { 12 | padding: .55em 1.25em !important; 13 | } 14 | 15 | .md-typeset table:not([class]) th { 16 | padding: .75em 1.25em !important; 17 | } 18 | 19 | .md-grid { 20 | max-width: none; 21 | } 22 | 23 | .mkapi-docstring{ 24 | line-height: 1; 25 | } 26 | .mkapi-node { 27 | background-color: #f4faff; 28 | border-top: 3px solid #151922; 29 | } 30 | .mkapi-node .mkapi-object-container { 31 | background-color: #d1d4d6; 32 | padding: .12em .4em; 33 | } 34 | .mkapi-node .mkapi-object-container .mkapi-object.code { 35 | background: none; 36 | border: none; 37 | } 38 | .mkapi-node .mkapi-object-container .mkapi-object.code * { 39 | font-size: .65rem !important; 40 | } 41 | .mkapi-node pre { 42 | line-height: 1.5; 43 | } 44 | .md-typeset pre>code { 45 | overflow: visible; 46 | line-height: 1.2; 47 | } 48 | .mkapi-docstring .md-typeset pre>code { 49 | font-size: 0.1rem !important; 50 | } 51 | .mkapi-section-name.bases { 52 | margin-top: .2em; 53 | } 54 | .mkapi-section-body.bases { 55 | padding-bottom: .7em; 56 | line-height: 1.3; 57 | } 58 | .mkapi-section.bases { 59 | margin-bottom: .8em; 60 | } 61 | .mkapi-node * { 62 | font-size: .7rem; 63 | } 64 | .mkapi-node a.mkapi-src-link { 65 | word-break: keep-all; 66 | } 67 | .mkapi-docstring { 68 | padding: .4em .15em !important; 69 | } 70 | .mkapi-section-name-body { 71 | font-size: .72rem !important; 72 | } 73 | .mkapi-node ul.mkapi-items li { 74 | line-height: 1.4 !important; 75 | } 76 | .mkapi-node ul.mkapi-items li * { 77 | font-size: .65rem !important; 78 | } 79 | .mkapi-node code.mkapi-object-signature { 80 | padding-right: 2px; 81 | } 82 | .mkapi-node .mkapi-code * { 83 | font-size: .6rem; 84 | } 85 | .mkapi-node a.mkapi-docs-link { 86 | font-size: .6rem; 87 | } 88 | .mkapi-node h1.mkapi-object.mkapi-object-code { 89 | margin: .2em .3em; 90 | } 91 | .mkapi-node h1.mkapi-object.mkapi-object-code .mkapi-object-kind.mkapi-object-kind-code { 92 | font-style: normal; 93 | margin-right: 16px; 94 | } 95 | .mkapi-node .mkapi-item-name { 96 | font-size: .7rem !important; 97 | color: #555; 98 | padding-right: 4px; 99 | } 100 | .md-typeset { 101 | font-size: .75rem !important; 102 | line-height: 1.5 !important; 103 | } 104 | .mkapi-object-kind.package.top { 105 | font-size: .8rem !important; 106 | color: #111; 107 | 108 | } 109 | .mkapi-object.package.top > h2 { 110 | font-size: .8rem !important; 111 | } 112 | 113 | .mkapi-object-body.package.top * { 114 | font-size: .75rem !important; 115 | } 116 | .mkapi-object-kind.module.top { 117 | font-size: .75rem !important; 118 | color: #222; 119 | } 120 | 121 | .mkapi-object-body.module.top * { 122 | font-size: .75rem !important; 123 | } 124 | 125 | .mkapi-section-body.examples pre code { 126 | font-size: .65rem !important; 127 | overflow: auto; 128 | } 129 | -------------------------------------------------------------------------------- /example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/example.png -------------------------------------------------------------------------------- /example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/example2.png -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: datar 2 | repo_url: https://github.com/pwwang/datar 3 | repo_name: pwwang/datar 4 | theme: 5 | favicon: favicon.png 6 | logo: favicon.png 7 | icon: 8 | repo: fontawesome/brands/github 9 | palette: 10 | primary: black 11 | name: 'material' 12 | font: 13 | text: 14 | - FreightSans 15 | - "Helvetica Neue" 16 | - Helvetica 17 | - Arial 18 | - sans-serif 19 | code: 20 | - IBMPlexMono 21 | - SFMono-Regular 22 | - Menlo 23 | - Monaco 24 | - Consolas 25 | - "Liberation Mono" 26 | - "Courier New" 27 | - monospace 28 | features: 29 | - navigation.top 30 | markdown_extensions: 31 | - markdown.extensions.admonition 32 | - pymdownx.emoji: 33 | emoji_index: !!python/name:material.extensions.emoji.twemoji 34 | emoji_generator: !!python/name:material.extensions.emoji.to_svg 35 | - pymdownx.superfences: 36 | preserve_tabs: true 37 | - toc: 38 | baselevel: 2 39 | plugins: 40 | - search # necessary for search to work 41 | - mkapi 42 | - mkdocs-jupyter: 43 | execute: false 44 | extra_css: 45 | - style.css 46 | nav: 47 | - 'Home': 'index.md' 48 | - 'Reference maps': 49 | - 'reference-maps/ALL.md' 50 | - 'reference-maps/base.md' 51 | - 'reference-maps/dplyr.md' 52 | - 'reference-maps/tibble.md' 53 | - 'reference-maps/tidyr.md' 54 | - 'reference-maps/forcats.md' 55 | - 'reference-maps/datasets.md' 56 | - 'reference-maps/other.md' 57 | - 'Import datar': 'import.md' 58 | - 'Backends': 'backends.md' 59 | - 'Options': 'options.md' 60 | - 'The f-expression': 'f.md' 61 | - 'Data': 'data.md' 62 | - 'Examples': 63 | - 'across': 'notebooks/across.ipynb' 64 | - 'add_column': 'notebooks/add_column.ipynb' 65 | - 'add_row': 'notebooks/add_row.ipynb' 66 | - 'arrange': 'notebooks/arrange.ipynb' 67 | - 'base': 'notebooks/base.ipynb' 68 | - 'base-arithmetic': 'notebooks/base-arithmetic.ipynb' 69 | - 'base-funs': 'notebooks/base-funs.ipynb' 70 | - 'between': 'notebooks/between.ipynb' 71 | - 'bind': 'notebooks/bind.ipynb' 72 | - 'case_when': 'notebooks/case_when.ipynb' 73 | - 'chop': 'notebooks/chop.ipynb' 74 | - 'coalesce': 'notebooks/coalesce.ipynb' 75 | - 'complete': 'notebooks/complete.ipynb' 76 | - 'context': 'notebooks/context.ipynb' 77 | - 'count': 'notebooks/count.ipynb' 78 | - 'cumall': 'notebooks/cumall.ipynb' 79 | - 'desc': 'notebooks/desc.ipynb' 80 | - 'distinct': 'notebooks/distinct.ipynb' 81 | - 'drop_na': 'notebooks/drop_na.ipynb' 82 | - 'enframe': 'notebooks/enframe.ipynb' 83 | - 'expand': 'notebooks/expand.ipynb' 84 | - 'expand_grid': 'notebooks/expand_grid.ipynb' 85 | - 'extract': 'notebooks/extract.ipynb' 86 | - 'fill': 'notebooks/fill.ipynb' 87 | - 'filter': 'notebooks/filter.ipynb' 88 | - 'filter-joins': 'notebooks/filter-joins.ipynb' 89 | - 'forcats_fct_multi': 'notebooks/forcats_fct_multi.ipynb' 90 | - 'forcats_lvl_addrm': 'notebooks/forcats_lvl_addrm.ipynb' 91 | - 'forcats_lvl_order': 'notebooks/forcats_lvl_order.ipynb' 92 | - 'forcats_lvl_value': 'notebooks/forcats_lvl_value.ipynb' 93 | - 'forcats_misc': 'notebooks/forcats_misc.ipynb' 94 | - 'full_seq': 'notebooks/full_seq.ipynb' 95 | - 'other': 'notebooks/other.ipynb' 96 | - 'group_by': 'notebooks/group_by.ipynb' 97 | - 'group_map': 'notebooks/group_map.ipynb' 98 | - 'group_split': 'notebooks/group_split.ipynb' 99 | - 'group_trim': 'notebooks/group_trim.ipynb' 100 | - 'lead-lag': 'notebooks/lead-lag.ipynb' 101 | - 'mutate-joins': 'notebooks/mutate-joins.ipynb' 102 | - 'mutate': 'notebooks/mutate.ipynb' 103 | - 'n_distinct': 'notebooks/n_distinct.ipynb' 104 | - 'na_if': 'notebooks/na_if.ipynb' 105 | - 'near': 'notebooks/near.ipynb' 106 | - 'nest': 'notebooks/nest.ipynb' 107 | - 'nest-join': 'notebooks/nest-join.ipynb' 108 | - 'nth': 'notebooks/nth.ipynb' 109 | - 'pack': 'notebooks/pack.ipynb' 110 | - 'pivot_longer': 'notebooks/pivot_longer.ipynb' 111 | - 'pivot_wider': 'notebooks/pivot_wider.ipynb' 112 | - 'pull': 'notebooks/pull.ipynb' 113 | - 'ranking': 'notebooks/ranking.ipynb' 114 | - 'readme': 'notebooks/readme.ipynb' 115 | - 'recode': 'notebooks/recode.ipynb' 116 | - 'relocate': 'notebooks/relocate.ipynb' 117 | - 'rename': 'notebooks/rename.ipynb' 118 | - 'replace_na': 'notebooks/replace_na.ipynb' 119 | - 'rownames': 'notebooks/rownames.ipynb' 120 | - 'rows': 'notebooks/rows.ipynb' 121 | - 'rowwise': 'notebooks/rowwise.ipynb' 122 | - 'select': 'notebooks/select.ipynb' 123 | - 'separate': 'notebooks/separate.ipynb' 124 | - 'setops': 'notebooks/setops.ipynb' 125 | - 'slice': 'notebooks/slice.ipynb' 126 | - 'summarise': 'notebooks/summarise.ipynb' 127 | - 'tibble': 'notebooks/tibble.ipynb' 128 | - 'uncount': 'notebooks/uncount.ipynb' 129 | - 'unite': 'notebooks/unite.ipynb' 130 | - 'with_groups': 'notebooks/with_groups.ipynb' 131 | - 'API': 'mkapi/api/datar' 132 | - 'Change Log': CHANGELOG.md 133 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "datar" 3 | version = "0.15.9" 4 | description = "A Grammar of Data Manipulation in python" 5 | authors = ["pwwang "] 6 | license = "MIT" 7 | readme = "README.md" 8 | homepage = "https://github.com/pwwang/datar" 9 | repository = "https://github.com/pwwang/datar" 10 | 11 | [tool.poetry.dependencies] 12 | python = "^3.9" 13 | simplug = "^0.5" 14 | pipda = "^0.13.1" 15 | python-simpleconf = {version = "^0.7", extras = ["toml"]} 16 | datar-numpy = {version = "^0.3.4", optional = true} 17 | datar-pandas = {version = "^0.5.5", optional = true} 18 | # datar-polars = {version = "^0.0.0", optional = true} 19 | datar-arrow = {version = "^0.1", optional = true} 20 | 21 | [tool.poetry.build] 22 | generate-setup-file = true 23 | 24 | [tool.poetry.extras] 25 | numpy = ["datar-numpy"] 26 | pandas = ["datar-pandas"] 27 | arrow = ["datar-arrow"] 28 | # modin = ["datar-pandas"] 29 | # polars = ["datar-polars"] 30 | 31 | [tool.poetry.group.dev.dependencies] 32 | pytest = "^8.1" 33 | pytest-cov = "^6" 34 | six = "^1.16" 35 | numpy = "*" 36 | python-slugify = "^8" 37 | 38 | [tool.poetry.group.docs.dependencies] 39 | mkdocs = "^1.6" 40 | mkdocs-material = "^9.6" 41 | pymdown-extensions = "^10.14" 42 | mkapi-fix = "^0.1" 43 | mkdocs-jupyter = "^0.25" 44 | ipykernel = "^6.29" 45 | ipython-genutils = "^0.2" 46 | plotnine = "^0.13" 47 | klib = "^1.3" 48 | pardoc = "^0.2" 49 | 50 | [build-system] 51 | requires = ["poetry-core"] 52 | build-backend = "poetry.core.masonry.api" 53 | 54 | [tool.mypy] 55 | ignore_missing_imports = true 56 | allow_redefinition = true 57 | disable_error_code = ["attr-defined", "no-redef", "union-attr"] 58 | show_error_codes = true 59 | strict_optional = false 60 | 61 | [tool.pytest.ini_options] 62 | addopts = "-vv -p no:asyncio --tb=short --cov-config=.coveragerc --cov=datar --cov-report xml:cov.xml --cov-report term-missing" 63 | filterwarnings = [ 64 | # "error" 65 | ] 66 | console_output_style = "progress" 67 | junit_family = "xunit1" 68 | 69 | [tool.black] 70 | line-length = 80 71 | target-version = ['py37', 'py38', 'py39'] 72 | include = '\.pyi?$' 73 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | # This will not be included in the distribution. 3 | # The distribution is managed by poetry 4 | # This file is kept only for 5 | # 1. Github to index the dependents 6 | # 2. pip install -e . 7 | """ 8 | 9 | from setuptools import setup 10 | 11 | setup(name="datar") 12 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pwwang/datar/66a505e4e8e39bc0c48e3463bec07e71f4ebde73/tests/__init__.py -------------------------------------------------------------------------------- /tests/conflict_names.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def test_getattr(module, allow_conflict_names, fun, error): 5 | from datar import options 6 | options(allow_conflict_names=allow_conflict_names) 7 | 8 | if module == "all": 9 | import datar.all as d 10 | elif module == "base": 11 | import datar.base as d 12 | elif module == "dplyr": 13 | import datar.dplyr as d 14 | 15 | if not error: 16 | return getattr(d, fun) 17 | 18 | try: 19 | getattr(d, fun) 20 | except Exception as e: 21 | raised = type(e).__name__ 22 | assert raised == error, f"Raised {raised}, expected {error}" 23 | else: 24 | raise AssertionError(f"{error} should have raised") 25 | 26 | 27 | def _import(module, fun): 28 | if module == "all" and fun == "sum": 29 | from datar.all import sum # noqa: F401 30 | elif module == "all" and fun == "slice": 31 | from datar.all import slice # noqa: F401 32 | elif module == "base" and fun == "sum": 33 | from datar.base import sum # noqa: F401 34 | elif module == "dplyr" and fun == "slice": 35 | from datar.dplyr import slice # noqa: F401 36 | 37 | 38 | def test_import(module, allow_conflict_names, fun, error): 39 | from datar import options 40 | options(allow_conflict_names=allow_conflict_names) 41 | 42 | if not error: 43 | return _import(module, fun) 44 | 45 | try: 46 | _import(module, fun) 47 | except Exception as e: 48 | raised = type(e).__name__ 49 | assert raised == error, f"Raised {raised}, expected {error}" 50 | else: 51 | raise AssertionError(f"{error} should have raised") 52 | 53 | 54 | def make_test(module, allow_conflict_names, getattr, fun, error): 55 | if fun == "_": 56 | fun = "sum" if module in ["all", "base"] else "slice" 57 | 58 | if getattr: 59 | return test_getattr(module, allow_conflict_names, fun, error) 60 | 61 | return test_import(module, allow_conflict_names, fun, error) 62 | 63 | 64 | def main(): 65 | parser = argparse.ArgumentParser() 66 | parser.add_argument( 67 | "--module", 68 | choices=["all", "base", "dplyr"], 69 | required=True, 70 | help="The module to test" 71 | ) 72 | parser.add_argument( 73 | "--allow-conflict-names", 74 | action="store_true", 75 | help="Whether to allow conflict names", 76 | default=False, 77 | ) 78 | parser.add_argument( 79 | "--getattr", 80 | action="store_true", 81 | help=( 82 | "Whether to test datar.all.sum, " 83 | "otherwise test from datar.all import sum." 84 | ), 85 | default=False, 86 | ) 87 | parser.add_argument( 88 | "--fun", 89 | help=( 90 | "The function to test. " 91 | "If _ then sum for all/base, slice for dplyr" 92 | ), 93 | choices=["sum", "filter", "_"], 94 | default="_", 95 | ) 96 | parser.add_argument( 97 | "--error", 98 | help="The error to expect", 99 | ) 100 | args = parser.parse_args() 101 | 102 | make_test( 103 | args.module, 104 | args.allow_conflict_names, 105 | args.getattr, 106 | args.fun, 107 | args.error, 108 | ) 109 | 110 | 111 | if __name__ == "__main__": 112 | main() 113 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from datar import options 2 | 3 | 4 | def pytest_sessionstart(session): 5 | # Load no plugins 6 | options(backends=[None]) 7 | -------------------------------------------------------------------------------- /tests/test_array_ufunc.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa: F401 2 | 3 | import numpy as np 4 | from pipda import Context 5 | from datar import f 6 | from datar.core import plugin as _ # noqa: F401 7 | from datar.apis.misc import array_ufunc 8 | 9 | 10 | def test_default(): 11 | out = np.sqrt(f)._pipda_eval([1, 4, 9], Context.EVAL) 12 | assert out.tolist() == [1, 2, 3] 13 | 14 | 15 | def test_misc_obj(): 16 | class Foo(list): 17 | pass 18 | 19 | @array_ufunc.register(Foo) 20 | def _array_ufunc(x, ufunc, *args, kind, **kwargs): 21 | return ufunc([i * 2 for i in x], *args, **kwargs) 22 | 23 | out = np.sqrt(f)._pipda_eval(Foo([2, 8, 18]), Context.EVAL) 24 | assert out.tolist() == [2, 4, 6] 25 | -------------------------------------------------------------------------------- /tests/test_base.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from datar.base import ( 4 | ceiling, 5 | cov, 6 | floor, 7 | mean, 8 | median, 9 | pmax, 10 | pmin, 11 | sqrt, 12 | var, 13 | scale, 14 | col_sums, 15 | col_means, 16 | col_sds, 17 | col_medians, 18 | row_sums, 19 | row_means, 20 | row_sds, 21 | row_medians, 22 | min_, 23 | max_, 24 | round_, 25 | sum_, 26 | abs_, 27 | prod, 28 | sign, 29 | signif, 30 | trunc, 31 | exp, 32 | log, 33 | log2, 34 | log10, 35 | log1p, 36 | sd, 37 | weighted_mean, 38 | quantile, 39 | bessel_i, 40 | bessel_j, 41 | bessel_k, 42 | bessel_y, 43 | as_double, 44 | as_integer, 45 | as_logical, 46 | as_character, 47 | as_factor, 48 | as_ordered, 49 | as_date, 50 | as_numeric, 51 | arg, 52 | conj, 53 | mod, 54 | re_, 55 | im, 56 | as_complex, 57 | is_complex, 58 | cummax, 59 | cummin, 60 | cumprod, 61 | cumsum, 62 | droplevels, 63 | levels, 64 | set_levels, 65 | is_factor, 66 | is_ordered, 67 | nlevels, 68 | factor, 69 | ordered, 70 | cut, 71 | diff, 72 | expand_grid, 73 | outer, 74 | make_names, 75 | make_unique, 76 | rank, 77 | identity, 78 | is_logical, 79 | is_true, 80 | is_false, 81 | is_na, 82 | is_finite, 83 | is_infinite, 84 | any_na, 85 | as_null, 86 | is_null, 87 | set_seed, 88 | rep, 89 | c_, 90 | c, 91 | length, 92 | lengths, 93 | order, 94 | sort, 95 | rev, 96 | sample, 97 | seq, 98 | seq_along, 99 | seq_len, 100 | match, 101 | beta, 102 | lgamma, 103 | digamma, 104 | trigamma, 105 | choose, 106 | factorial, 107 | gamma, 108 | lfactorial, 109 | lchoose, 110 | lbeta, 111 | psigamma, 112 | rnorm, 113 | runif, 114 | rpois, 115 | rbinom, 116 | rcauchy, 117 | rchisq, 118 | rexp, 119 | is_character, 120 | grep, 121 | grepl, 122 | sub, 123 | gsub, 124 | strsplit, 125 | paste, 126 | paste0, 127 | sprintf, 128 | substr, 129 | substring, 130 | startswith, 131 | endswith, 132 | strtoi, 133 | trimws, 134 | toupper, 135 | tolower, 136 | chartr, 137 | nchar, 138 | nzchar, 139 | table, 140 | tabulate, 141 | is_atomic, 142 | is_double, 143 | is_element, 144 | is_integer, 145 | is_numeric, 146 | any_, 147 | all_, 148 | acos, 149 | acosh, 150 | asin, 151 | asinh, 152 | atan, 153 | atanh, 154 | cos, 155 | cosh, 156 | cospi, 157 | sin, 158 | sinh, 159 | sinpi, 160 | tan, 161 | tanh, 162 | tanpi, 163 | atan2, 164 | append, 165 | colnames, 166 | set_colnames, 167 | rownames, 168 | set_rownames, 169 | dim, 170 | diag, 171 | duplicated, 172 | intersect, 173 | ncol, 174 | nrow, 175 | proportions, 176 | setdiff, 177 | setequal, 178 | unique, 179 | t, 180 | union, 181 | max_col, 182 | complete_cases, 183 | head, 184 | tail, 185 | which, 186 | which_min, 187 | which_max, 188 | ) 189 | 190 | from datar.core.utils import NotImplementedByCurrentBackendError 191 | 192 | 193 | @pytest.mark.parametrize("fun,args", [ 194 | (ceiling, [1]), 195 | (cov, [[1, 2], [3, 4]]), 196 | (floor, [1]), 197 | (mean, [1]), 198 | (median, [1]), 199 | (pmax, [1]), 200 | (pmin, [1]), 201 | (sqrt, [1]), 202 | (var, [1]), 203 | (scale, [1]), 204 | (col_sums, [1]), 205 | (col_means, [1]), 206 | (col_sds, [1]), 207 | (col_medians, [1]), 208 | (row_sums, [1]), 209 | (row_means, [1]), 210 | (row_sds, [1]), 211 | (row_medians, [1]), 212 | (min_, [1]), 213 | (max_, [1]), 214 | (round_, [1]), 215 | (sum_, [1]), 216 | (abs_, [1]), 217 | (prod, [1]), 218 | (sign, [1]), 219 | (signif, [1]), 220 | (trunc, [1]), 221 | (exp, [1]), 222 | (log, [1]), 223 | (log2, [1]), 224 | (log10, [1]), 225 | (log1p, [1]), 226 | (sd, [1]), 227 | (weighted_mean, [1]), 228 | (quantile, [1]), 229 | (bessel_i, [1, 2]), 230 | (bessel_j, [1, 2]), 231 | (bessel_k, [1, 2]), 232 | (bessel_y, [1, 2]), 233 | (as_double, [1]), 234 | (as_integer, [1]), 235 | (as_logical, [1]), 236 | (as_character, [1]), 237 | (as_factor, [1]), 238 | (as_ordered, [1]), 239 | (as_date, [1]), 240 | (as_numeric, [1]), 241 | (arg, [1]), 242 | (conj, [1]), 243 | (mod, [1]), 244 | (re_, [1]), 245 | (im, [1]), 246 | (as_complex, [1]), 247 | (is_complex, [1]), 248 | (cummax, [1]), 249 | (cummin, [1]), 250 | (cumprod, [1]), 251 | (cumsum, [1]), 252 | (droplevels, [1]), 253 | (levels, [1]), 254 | (set_levels, [1, 1]), 255 | (is_factor, [1]), 256 | (is_ordered, [1]), 257 | (nlevels, [1]), 258 | (factor, [1]), 259 | (ordered, [1]), 260 | (cut, [1, 1]), 261 | (diff, [1]), 262 | (expand_grid, [1]), 263 | (outer, [1, 1]), 264 | (rank, [1]), 265 | (is_logical, [1]), 266 | (is_true, [1]), 267 | (is_false, [1]), 268 | (is_na, [1]), 269 | (is_finite, [1]), 270 | (is_infinite, [1]), 271 | (any_na, [1]), 272 | (as_null, [1]), 273 | (is_null, [1]), 274 | (set_seed, [1]), 275 | (rep, [1]), 276 | (c_, [1]), 277 | (c, [1]), 278 | (length, [1]), 279 | (lengths, [1]), 280 | (order, [1]), 281 | (rev, [1]), 282 | (seq, [1]), 283 | (seq_along, [1]), 284 | (seq_len, [1]), 285 | (sort, [1]), 286 | (sample, [1]), 287 | (match, [1, 1]), 288 | (is_element, [1, 1]), 289 | (is_atomic, [1]), 290 | (is_double, [1]), 291 | (is_integer, [1]), 292 | (is_numeric, [1]), 293 | (any_, [1]), 294 | (all_, [1]), 295 | (acos, [1]), 296 | (acosh, [1]), 297 | (asin, [1]), 298 | (asinh, [1]), 299 | (atan, [1]), 300 | (atanh, [1]), 301 | (cos, [1]), 302 | (cosh, [1]), 303 | (cospi, [1]), 304 | (sin, [1]), 305 | (sinh, [1]), 306 | (sinpi, [1]), 307 | (tan, [1]), 308 | (tanh, [1]), 309 | (tanpi, [1]), 310 | (atan2, [1, 1]), 311 | (beta, [1, 1]), 312 | (choose, [1, 1]), 313 | (digamma, [1]), 314 | (lgamma, [1]), 315 | (lbeta, [1, 1]), 316 | (trigamma, [1]), 317 | (factorial, [1]), 318 | (gamma, [1]), 319 | (lchoose, [1, 1]), 320 | (lfactorial, [1]), 321 | (psigamma, [1, 1]), 322 | (rnorm, [1, 1]), 323 | (runif, [1, 1]), 324 | (rcauchy, [1, 1]), 325 | (rchisq, [1, 1]), 326 | (rexp, [1, 1]), 327 | (rpois, [1, 1]), 328 | (rbinom, [1, 1, 1]), 329 | (is_character, [1]), 330 | (grep, [1, 1]), 331 | (grepl, [1, 1]), 332 | (sub, [1, 1, 1]), 333 | (gsub, [1, 1, 1]), 334 | (strsplit, [1, 1]), 335 | (paste, [1]), 336 | (paste0, [1]), 337 | (sprintf, [1]), 338 | (substr, [1, 1, 1]), 339 | (tolower, [1]), 340 | (toupper, [1]), 341 | (trimws, [1]), 342 | (strtoi, [1]), 343 | (substring, [1, 1]), 344 | (startswith, [1, 1]), 345 | (endswith, [1, 1]), 346 | (chartr, [1, 1, 1]), 347 | (nchar, [1]), 348 | (nzchar, [1]), 349 | (table, [1]), 350 | (tabulate, [1]), 351 | (append, [1, 1]), 352 | (colnames, [1]), 353 | (set_colnames, [1, 1]), 354 | (rownames, [1]), 355 | (set_rownames, [1, 1]), 356 | (dim, [1]), 357 | (diag, [1]), 358 | (duplicated, [1]), 359 | (intersect, [1, 1]), 360 | (ncol, [1]), 361 | (nrow, [1]), 362 | (proportions, [1]), 363 | (setdiff, [1, 1]), 364 | (setequal, [1, 1]), 365 | (unique, [1]), 366 | (union, [1, 1]), 367 | (t, [1]), 368 | (max_col, [1]), 369 | (complete_cases, [1]), 370 | (head, [1]), 371 | (tail, [1]), 372 | (which, [1]), 373 | (which_min, [1]), 374 | (which_max, [1]), 375 | ]) 376 | def test_default_implementation(fun, args): 377 | with pytest.raises(NotImplementedByCurrentBackendError): 378 | fun(*args) 379 | 380 | 381 | @pytest.mark.parametrize("x, uniq, y", [ 382 | (["a", "b", "c"], False, ["a", "b", "c"]), 383 | ("a", False, ["a"]), 384 | (1, False, ["_1"]), 385 | ]) 386 | def test_make_names(x, uniq, y): 387 | out = make_names(x, uniq) 388 | assert out == y 389 | 390 | 391 | @pytest.mark.parametrize("x, y", [ 392 | (["a", "b", "c"], ["a", "b", "c"]), 393 | ("a", ["a"]), 394 | ]) 395 | def test_make_unique(x, y): 396 | out = make_unique(x) 397 | assert out == y 398 | 399 | 400 | def test_identify(): 401 | out = identity(1) 402 | assert out == 1 403 | -------------------------------------------------------------------------------- /tests/test_conflict_names.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import subprocess 3 | from pathlib import Path 4 | 5 | import pytest 6 | 7 | 8 | def _run_conflict_names(module, allow_conflict_names, getat, error): 9 | here = Path(__file__).parent 10 | conflict_names = here / "conflict_names.py" 11 | cmd = [ 12 | sys.executable, 13 | str(conflict_names), 14 | "--module", 15 | module, 16 | ] 17 | if error: 18 | cmd += ["--error", error] 19 | if allow_conflict_names: 20 | cmd.append("--allow-conflict-names") 21 | if getat: 22 | cmd.append("--getattr") 23 | 24 | p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 25 | return p.wait(), " ".join(cmd) 26 | 27 | 28 | def test_from_all_import_allow_conflict_names_true(): 29 | r, cmd = _run_conflict_names("all", True, False, None) 30 | assert r == 0, cmd 31 | 32 | 33 | def test_from_all_import_allow_conflict_names_false(): 34 | r, cmd = _run_conflict_names("all", False, False, "ImportError") 35 | assert r == 0, cmd 36 | 37 | 38 | def test_all_getattr_allow_conflict_names_true(): 39 | r, cmd = _run_conflict_names("all", True, True, None) 40 | assert r == 0, cmd 41 | 42 | 43 | def test_all_getattr_allow_conflict_names_false(): 44 | r, cmd = _run_conflict_names("all", False, True, None) 45 | assert r == 0, cmd 46 | 47 | 48 | def test_from_base_import_allow_conflict_names_true(): 49 | r, cmd = _run_conflict_names("base", True, False, None) 50 | assert r == 0, cmd 51 | 52 | 53 | def test_from_base_import_allow_conflict_names_false(): 54 | r, cmd = _run_conflict_names("base", False, False, "ImportError") 55 | assert r == 0, cmd 56 | 57 | 58 | def test_base_getattr_allow_conflict_names_true(): 59 | r, cmd = _run_conflict_names("base", True, True, None) 60 | assert r == 0, cmd 61 | 62 | 63 | def test_base_getattr_allow_conflict_names_false(): 64 | r, cmd = _run_conflict_names("base", False, True, None) 65 | assert r == 0, cmd 66 | 67 | 68 | def test_from_dplyr_import_allow_conflict_names_true(): 69 | r, cmd = _run_conflict_names("dplyr", True, False, None) 70 | assert r == 0, cmd 71 | 72 | 73 | def test_from_dplyr_import_allow_conflict_names_false(): 74 | r, cmd = _run_conflict_names("dplyr", False, False, "ImportError") 75 | assert r == 0, cmd 76 | 77 | 78 | def test_dplyr_getattr_allow_conflict_names_true(): 79 | r, cmd = _run_conflict_names("dplyr", True, True, None) 80 | assert r == 0, cmd 81 | 82 | 83 | def test_dplyr_getattr_allow_conflict_names_false(): 84 | r, cmd = _run_conflict_names("dplyr", False, True, None) 85 | assert r == 0, cmd 86 | -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from datar.data import descr_datasets, add_dataset 3 | from datar.core.utils import NotImplementedByCurrentBackendError 4 | 5 | 6 | def test_descr_datasets(): 7 | x = descr_datasets() 8 | assert "iris" in x 9 | 10 | x = descr_datasets("iris") 11 | assert "iris" in x and len(x) == 1 12 | 13 | 14 | def test_add_dataset(): 15 | 16 | add_dataset("test", {"url": ""}) 17 | assert "test" in descr_datasets() 18 | 19 | 20 | def test_load_dataset(): 21 | 22 | with pytest.raises(NotImplementedByCurrentBackendError): 23 | from datar.data import iris # noqa: F401 24 | 25 | 26 | def test_no_such(): 27 | with pytest.raises(NotImplementedByCurrentBackendError): 28 | from datar.data import nosuch # noqa: F401 29 | -------------------------------------------------------------------------------- /tests/test_dplyr.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from datar.core.utils import NotImplementedByCurrentBackendError 4 | from datar.dplyr import ( 5 | across, 6 | add_count, 7 | add_tally, 8 | all_of, 9 | anti_join, 10 | any_of, 11 | arrange, 12 | between, 13 | bind_cols, 14 | bind_rows, 15 | c_across, 16 | case_when, 17 | coalesce, 18 | contains, 19 | count, 20 | cumall, 21 | cumany, 22 | cume_dist, 23 | cummean, 24 | cur_column, 25 | cur_data, 26 | cur_data_all, 27 | cur_group, 28 | cur_group_id, 29 | cur_group_rows, 30 | dense_rank, 31 | desc, 32 | distinct, 33 | ends_with, 34 | everything, 35 | filter_, 36 | first, 37 | full_join, 38 | glimpse, 39 | group_by, 40 | group_by_drop_default, 41 | group_cols, 42 | group_data, 43 | group_indices, 44 | group_keys, 45 | group_map, 46 | group_modify, 47 | group_rows, 48 | group_size, 49 | group_split, 50 | group_trim, 51 | group_vars, 52 | group_walk, 53 | if_all, 54 | if_any, 55 | if_else, 56 | inner_join, 57 | lag, 58 | last, 59 | last_col, 60 | lead, 61 | left_join, 62 | matches, 63 | min_rank, 64 | mutate, 65 | n, 66 | n_distinct, 67 | n_groups, 68 | na_if, 69 | near, 70 | nest_join, 71 | nth, 72 | ntile, 73 | num_range, 74 | order_by, 75 | percent_rank, 76 | pull, 77 | recode, 78 | recode_factor, 79 | relocate, 80 | rename, 81 | rename_with, 82 | right_join, 83 | row_number, 84 | rows_append, 85 | rows_delete, 86 | rows_insert, 87 | rows_patch, 88 | rows_update, 89 | rows_upsert, 90 | rowwise, 91 | select, 92 | semi_join, 93 | slice_, 94 | slice_head, 95 | slice_min, 96 | slice_sample, 97 | slice_tail, 98 | slice_max, 99 | starts_with, 100 | summarise, 101 | tally, 102 | transmute, 103 | ungroup, 104 | union_all, 105 | where, 106 | with_groups, 107 | with_order, 108 | pick, 109 | symdiff, 110 | consecutive_id, 111 | case_match, 112 | cross_join, 113 | ) 114 | 115 | 116 | @pytest.mark.parametrize("verb, data, args, kwargs", [ 117 | (add_count, None, [], None), 118 | (add_tally, None, [], None), 119 | (anti_join, None, [None], None), 120 | (arrange, None, [], None), 121 | (between, None, [1, 2], None), 122 | (bind_cols, None, [], None), 123 | (bind_rows, None, [], None), 124 | (case_when, None, [1], None), 125 | (coalesce, None, [], None), 126 | (count, None, [], None), 127 | (cumall, None, [], None), 128 | (cumany, None, [], None), 129 | (cume_dist, None, [], None), 130 | (cummean, None, [], None), 131 | (cur_column, None, [1], None), 132 | (dense_rank, None, [], None), 133 | (desc, None, [], None), 134 | (distinct, None, [], None), 135 | (filter_, None, [], None), 136 | (first, None, [], None), 137 | (full_join, None, [1], None), 138 | (glimpse, None, [], None), 139 | (group_by, None, [], None), 140 | (group_by_drop_default, None, [], None), 141 | (group_cols, None, [], None), 142 | (group_data, None, [], None), 143 | (group_indices, None, [], None), 144 | (group_keys, None, [], None), 145 | (group_map, None, [1], None), 146 | (group_modify, None, [1], None), 147 | (group_rows, None, [], None), 148 | (group_size, None, [], None), 149 | (group_split, None, [], None), 150 | (group_trim, None, [], None), 151 | (group_vars, None, [], None), 152 | (group_walk, None, [1], None), 153 | (if_else, None, [1, 2], None), 154 | (inner_join, None, [1], None), 155 | (lag, None, [], None), 156 | (last, None, [], None), 157 | (lead, None, [], None), 158 | (left_join, None, [1], None), 159 | (min_rank, None, [], None), 160 | (mutate, None, [], None), 161 | (n_distinct, None, [], None), 162 | (n_groups, None, [], None), 163 | (na_if, None, [1], None), 164 | (near, None, [1], None), 165 | (nest_join, None, [1], None), 166 | (nth, None, [1], None), 167 | (ntile, None, [], None), 168 | (num_range, None, [1], None), 169 | (order_by, None, [1], None), 170 | (percent_rank, None, [], None), 171 | (pull, None, [], None), 172 | (recode, None, [], None), 173 | (recode_factor, None, [], None), 174 | (relocate, None, [], None), 175 | (rename, None, [], None), 176 | (rename_with, None, [1], None), 177 | (right_join, None, [1], None), 178 | (row_number, None, [], None), 179 | (rows_append, None, [None], None), 180 | (rows_delete, None, [None], None), 181 | (rows_insert, None, [None], None), 182 | (rows_patch, None, [None], None), 183 | (rows_update, None, [None], None), 184 | (rows_upsert, None, [None], None), 185 | (rowwise, None, [], None), 186 | (select, None, [], None), 187 | (semi_join, None, [1], None), 188 | (slice_, None, [], None), 189 | (slice_head, None, [], None), 190 | (slice_min, None, [1], None), 191 | (slice_sample, None, [], None), 192 | (slice_tail, None, [], None), 193 | (slice_max, None, [1], None), 194 | (summarise, None, [], None), 195 | (tally, None, [], None), 196 | (transmute, None, [], None), 197 | (ungroup, None, [], None), 198 | (union_all, None, [1], None), 199 | (with_groups, None, [1, 2], None), 200 | (with_order, None, [1, 2], None), 201 | (symdiff, None, [None], None), 202 | (consecutive_id, None, [], None), 203 | (case_match, None, [], None), 204 | (cross_join, None, [1], None), 205 | ]) 206 | def test_verb_not_implemented(verb, data, args, kwargs): 207 | kwargs = kwargs or {} 208 | with pytest.raises(NotImplementedByCurrentBackendError): 209 | verb(data, *args, **kwargs) 210 | 211 | 212 | @pytest.mark.parametrize("verb, data, args, kwargs", [ 213 | (pick, None, [], None), 214 | (across, None, [], None), 215 | (if_any, None, [], None), 216 | (if_all, None, [], None), 217 | (c_across, None, [], None), 218 | (cur_data, None, [], None), 219 | (n, None, [], None), 220 | (cur_data_all, None, [], None), 221 | (cur_group, None, [], None), 222 | (cur_group_id, None, [], None), 223 | (cur_group_rows, None, [], None), 224 | (where, None, [1], None), 225 | (everything, None, [], None), 226 | (last_col, None, [], None), 227 | (starts_with, None, [1], None), 228 | (ends_with, None, [1], None), 229 | (contains, None, [1], None), 230 | (matches, None, [1], None), 231 | (all_of, None, [1], None), 232 | (any_of, None, [1], None), 233 | ]) 234 | def test_dep_verbs(verb, data, args, kwargs): 235 | kwargs = kwargs or {} 236 | with pytest.raises(NotImplementedByCurrentBackendError): 237 | data >> verb(*args, **kwargs) 238 | -------------------------------------------------------------------------------- /tests/test_forcats.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa: F401 2 | 3 | from datar.core.utils import NotImplementedByCurrentBackendError 4 | from datar.forcats import ( 5 | fct_anon, 6 | fct_c, 7 | fct_collapse, 8 | fct_count, 9 | fct_cross, 10 | fct_drop, 11 | fct_expand, 12 | fct_explicit_na, 13 | fct_infreq, 14 | fct_inorder, 15 | fct_inseq, 16 | fct_lump, 17 | fct_lump_lowfreq, 18 | fct_lump_min, 19 | fct_lump_n, 20 | fct_lump_prop, 21 | fct_match, 22 | fct_other, 23 | fct_recode, 24 | fct_relabel, 25 | fct_relevel, 26 | fct_reorder, 27 | fct_reorder2, 28 | fct_rev, 29 | fct_shift, 30 | fct_shuffle, 31 | fct_unify, 32 | fct_unique, 33 | first2, 34 | last2, 35 | lvls_expand, 36 | lvls_reorder, 37 | lvls_revalue, 38 | lvls_union, 39 | ) 40 | 41 | 42 | @pytest.mark.parametrize("verb, data, args, kwargs", [ 43 | (fct_anon, None, [], None), 44 | (fct_c, None, [], None), 45 | (fct_collapse, None, [], None), 46 | (fct_count, None, [], None), 47 | (fct_cross, None, [], None), 48 | (fct_drop, None, [], None), 49 | (fct_expand, None, [], None), 50 | (fct_explicit_na, None, [], None), 51 | (fct_infreq, None, [], None), 52 | (fct_inorder, None, [], None), 53 | (fct_inseq, None, [], None), 54 | (fct_lump, None, [], None), 55 | (fct_lump_lowfreq, None, [], None), 56 | (fct_lump_min, None, [1], None), 57 | (fct_lump_n, None, [1], None), 58 | (fct_lump_prop, None, [1], None), 59 | (fct_match, None, [1], None), 60 | (fct_other, None, [], None), 61 | (fct_recode, None, [], None), 62 | (fct_relabel, None, [1], None), 63 | (fct_relevel, None, [], None), 64 | (fct_reorder, None, [1], None), 65 | (fct_reorder2, None, [1], None), 66 | (fct_rev, None, [], None), 67 | (fct_shift, None, [], None), 68 | (fct_shuffle, None, [], None), 69 | (fct_unify, None, [], None), 70 | (fct_unique, None, [], None), 71 | (first2, None, [1], None), 72 | (last2, None, [1], None), 73 | (lvls_expand, None, [1], None), 74 | (lvls_reorder, None, [1], None), 75 | (lvls_revalue, None, [1], None), 76 | (lvls_union, None, [], None), 77 | ]) 78 | def test_default_impl(verb, data, args, kwargs): 79 | kwargs = kwargs or {} 80 | with pytest.raises(NotImplementedByCurrentBackendError): 81 | verb(data, *args, **kwargs) 82 | -------------------------------------------------------------------------------- /tests/test_names.py: -------------------------------------------------------------------------------- 1 | # https://github.com/r-lib/vctrs/blob/master/tests/testthat/test-names.R 2 | import pytest 3 | from typing import Iterable 4 | 5 | import numpy as np 6 | from string import ascii_letters 7 | 8 | from datar.core.names import ( 9 | NameNonUniqueError, 10 | repair_names, 11 | ) 12 | 13 | 14 | @pytest.mark.parametrize( 15 | "names,expect", 16 | [ 17 | ([1, 2, 3], ["1", "2", "3"]), 18 | (["", np.nan], ["", ""]), 19 | (["", np.nan], ["", ""]), 20 | (["", "", np.nan], ["", "", ""]), 21 | (repair_names(["", "", np.nan], repair="minimal"), ["", "", ""]), 22 | ], 23 | ) 24 | def test_minimal(names, expect): 25 | assert repair_names(names, repair="minimal") == expect 26 | 27 | 28 | @pytest.mark.parametrize( 29 | "names,expect", 30 | [ 31 | ([np.nan, np.nan], ["__0", "__1"]), 32 | (["x", "x"], ["x__0", "x__1"]), 33 | (["x", "y"], ["x", "y"]), 34 | (["", "x", "y", "x"], ["__0", "x__1", "y", "x__3"]), 35 | ([""], ["__0"]), 36 | ([np.nan], ["__0"]), 37 | ( 38 | ["__20", "a__33", "b", "", "a__2__34"], 39 | ["__0", "a__1", "b", "__3", "a__4"], 40 | ), 41 | (["a__1"], ["a"]), 42 | (["a__2", "a"], ["a__0", "a__1"]), 43 | (["a__3", "a", "a"], ["a__0", "a__1", "a__2"]), 44 | (["a__2", "a", "a"], ["a__0", "a__1", "a__2"]), 45 | (["a__2", "a__2", "a__2"], ["a__0", "a__1", "a__2"]), 46 | ( 47 | ["__20", "a__1", "b", "", "a__2"], 48 | ["__0", "a__1", "b", "__3", "a__4"], 49 | ), 50 | ( 51 | repair_names(["__20", "a__1", "b", "", "a__2"], repair="unique"), 52 | ["__0", "a__1", "b", "__3", "a__4"], 53 | ), 54 | ( 55 | ["", "x", "", "y", "x", "_2", "__"], 56 | ["__0", "x__1", "__2", "y", "x__4", "__5", "__6"], 57 | ), 58 | ], 59 | ) 60 | def test_unique(names, expect): 61 | assert repair_names(names, repair="unique") == expect 62 | 63 | 64 | def test_unique_algebraic_y(): 65 | x = ["__20", "a__1", "b", "", "a__2", "d"] 66 | y = ["", "a__3", "b", "__3", "e"] 67 | # fix names on each, catenate, fix the whole 68 | z1 = repair_names( 69 | repair_names(x, repair="unique") + repair_names(y, repair="unique"), 70 | repair="unique", 71 | ) 72 | z2 = repair_names(repair_names(x, repair="unique") + y, repair="unique") 73 | z3 = repair_names(x + repair_names(y, repair="unique"), repair="unique") 74 | z4 = repair_names(x + y, repair="unique") 75 | assert z1 == z2 == z3 == z4 76 | 77 | 78 | @pytest.mark.parametrize( 79 | "names,expect", 80 | [ 81 | (list(ascii_letters), list(ascii_letters)), 82 | ( 83 | [np.nan, "", "x", "x", "a1:", "_x_y}"], 84 | ["__0", "__1", "x__2", "x__3", "a1_", "_x_y_"], 85 | ), 86 | ( 87 | repair_names( 88 | [np.nan, "", "x", "x", "a1:", "_x_y}"], repair="universal" 89 | ), 90 | ["__0", "__1", "x__2", "x__3", "a1_", "_x_y_"], 91 | ), 92 | (["a", "b", "a", "c", "b"], ["a__0", "b__1", "a__2", "c", "b__4"]), 93 | ([""], ["__0"]), 94 | ([np.nan], ["__0"]), 95 | (["__"], ["__0"]), 96 | (["_"], ["_"]), 97 | (["_", "_"], ["___0", "___1"]), 98 | (["", "_"], ["__0", "_"]), 99 | (["", "", "_"], ["__0", "__1", "_"]), 100 | (["_", "_", ""], ["___0", "___1", "__2"]), 101 | (["_", "", "_"], ["___0", "__1", "___2"]), 102 | (["", "_", ""], ["__0", "_", "__2"]), 103 | (["__6", "__1__2"], ["__0", "__1"]), 104 | (["if__2"], ["_if"]), 105 | ( 106 | ["", "_", np.nan, "if__4", "if", "if__8", "for", "if){]1"], 107 | [ 108 | "__0", 109 | "_", 110 | "__2", 111 | "_if__3", 112 | "_if__4", 113 | "_if__5", 114 | "_for", 115 | "if___1", 116 | ], 117 | ), 118 | (["a b", "b c"], ["a_b", "b_c"]), 119 | ( 120 | ["", "_2", "_3", "__4", "___5", "____6", "_____7", "__"], 121 | ["__0", "__1", "__2", "__3", "___5", "____6", "_____7", "__7"], 122 | ), 123 | ( 124 | repair_names( 125 | ["", "_2", "_3", "__4", "___5", "____6", "_____7", "__"], 126 | repair="unique", 127 | ), 128 | ["__0", "__1", "__2", "__3", "___5", "____6", "_____7", "__7"], 129 | ), 130 | ( 131 | [7, 4, 3, 6, 5, 1, 2, 8], 132 | ["_7", "_4", "_3", "_6", "_5", "_1", "_2", "_8"], 133 | ), 134 | ( 135 | repair_names([7, 4, 3, 6, 5, 1, 2, 8], repair="unique"), 136 | ["_7", "_4", "_3", "_6", "_5", "_1", "_2", "_8"], 137 | ), 138 | ], 139 | ) 140 | def test_universal(names, expect): 141 | assert repair_names(names, repair="universal") == expect 142 | 143 | 144 | def test_check_unique(): 145 | with pytest.raises(NameNonUniqueError): 146 | repair_names([np.nan], repair="check_unique") 147 | with pytest.raises(NameNonUniqueError): 148 | repair_names([""], repair="check_unique") 149 | with pytest.raises(NameNonUniqueError): 150 | repair_names(["a", "a"], repair="check_unique") 151 | with pytest.raises(NameNonUniqueError): 152 | repair_names(["__1"], repair="check_unique") 153 | with pytest.raises(NameNonUniqueError): 154 | repair_names(["__"], repair="check_unique") 155 | assert repair_names(["a", "b"], repair="check_unique") == ["a", "b"] 156 | 157 | 158 | def test_custom_repair(): 159 | def replace(names: Iterable[str]): 160 | return ["a", "b", "c"] 161 | 162 | out = repair_names([1, 2, 3], repair=replace) 163 | assert out == ["a", "b", "c"] 164 | 165 | with pytest.raises(ValueError): 166 | repair_names([1, 2, 3], repair=1) 167 | 168 | out = repair_names(["a", "b", "c"], repair=str.upper) 169 | assert out == ["A", "B", "C"] 170 | 171 | out = repair_names(["a", "b", "c"], repair=["x", "y", "z"]) 172 | assert out == ["x", "y", "z"] 173 | -------------------------------------------------------------------------------- /tests/test_options.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from datar.core.options import ( 3 | options, 4 | options_context, 5 | add_option, 6 | get_option, 7 | ) 8 | 9 | 10 | @pytest.fixture(autouse=True) 11 | def reset_options(): 12 | opts = options() 13 | add_option("x_y_z", True) 14 | yield 15 | options(opts) 16 | 17 | 18 | def test_options_empty_args_returns_full_options(): 19 | from datar.core.options import OPTIONS 20 | out = options() 21 | assert out == OPTIONS 22 | 23 | 24 | def test_options_with_names_only_selects_options(): 25 | out = options("x_y_z") 26 | assert len(out) == 1 27 | assert out["x_y_z"] 28 | 29 | 30 | def test_opts_with_names_nameval_pairs_mixed_rets_sel_opts_and_changes_option(): 31 | out = options(x_y_z=False, _return=True) 32 | assert out == {"x_y_z": True} 33 | assert not get_option("x.y.z") 34 | 35 | 36 | def test_options_with_dict_updates_options(): 37 | out = options({"x_y_z": True}, _return=True) 38 | assert get_option("x_y_z") 39 | assert out.x_y_z 40 | 41 | 42 | def test_options_context(): 43 | assert get_option("x_y_z") 44 | with options_context(x_y_z=False): 45 | assert not get_option("x_y_z") 46 | 47 | assert get_option("x_y_z") 48 | -------------------------------------------------------------------------------- /tests/test_plugin.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | from simplug import MultipleImplsForSingleResultHookWarning 5 | from pipda import Context 6 | from pipda.utils import MultiImplementationsWarning 7 | from datar import f 8 | from datar.core.plugin import plugin 9 | from datar.core.operator import DatarOperator 10 | 11 | 12 | class TestPlugin1: 13 | 14 | @plugin.impl 15 | def get_versions(): 16 | return {"abc": "1.2.3"} 17 | 18 | @plugin.impl 19 | def load_dataset(name, metadata): 20 | return name * 2 21 | 22 | @plugin.impl 23 | def misc_api(): 24 | from datar.apis.misc import array_ufunc 25 | 26 | @array_ufunc.register(object, backend="testplugin1") 27 | def _array_ufunc(x, ufunc, *args, kind, **kwargs): 28 | return ufunc([i * 3 for i in x], *args, **kwargs) 29 | 30 | return {"other_var": 1} 31 | 32 | @plugin.impl 33 | def operate(op, x, y=None): 34 | if op == "add": 35 | return x + y + x * y 36 | return None 37 | 38 | @plugin.impl 39 | def c_getitem(item): 40 | return item * 2 41 | 42 | 43 | class TestPlugin2: 44 | 45 | @plugin.impl 46 | def load_dataset(name, metadata): 47 | return name * 3 48 | 49 | @plugin.impl 50 | def c_getitem(item): 51 | return item * 4 52 | 53 | @plugin.impl 54 | def operate(op, x, y=None): 55 | if op == "add": 56 | return x + y + 2 * x * y 57 | return None 58 | 59 | 60 | def setup_function(function): 61 | plugin.register(TestPlugin1) 62 | plugin.register(TestPlugin2) 63 | plugin.get_plugin("testplugin1").disable() 64 | plugin.get_plugin("testplugin2").disable() 65 | 66 | 67 | @pytest.fixture 68 | def with_test_plugin1(): 69 | plugin.get_plugin("testplugin1").enable() 70 | yield 71 | plugin.get_plugin("testplugin1").disable() 72 | 73 | 74 | @pytest.fixture 75 | def with_test_plugin2(): 76 | plugin.get_plugin("testplugin2").enable() 77 | yield 78 | plugin.get_plugin("testplugin2").disable() 79 | 80 | 81 | def test_get_versions(with_test_plugin1, capsys): 82 | from datar import get_versions 83 | assert get_versions(prnt=False)["abc"] == "1.2.3" 84 | 85 | get_versions() 86 | assert "datar" in capsys.readouterr().out 87 | 88 | 89 | def test_misc_api(with_test_plugin1): 90 | from datar import all, misc 91 | plugin.hooks.misc_api() 92 | from importlib import reload 93 | reload(misc) 94 | assert misc.other_var == 1 95 | 96 | reload(all) 97 | from datar.all import other_var 98 | assert other_var == 1 99 | 100 | 101 | def test_misc_api_array_ufunc(with_test_plugin1): 102 | from datar import f 103 | from datar.apis.misc import array_ufunc 104 | 105 | plugin.hooks.misc_api() 106 | 107 | with pytest.warns(MultiImplementationsWarning): 108 | out = np.sqrt(f)._pipda_eval([3, 12, 27], Context.EVAL) 109 | 110 | assert out.tolist() == [3, 6, 9] 111 | 112 | with array_ufunc.with_backend("_default"): 113 | out = np.sqrt(f)._pipda_eval([1, 4, 9], Context.EVAL) 114 | 115 | assert out.tolist() == [1, 2, 3] 116 | 117 | 118 | def test_load_dataset(with_test_plugin1, with_test_plugin2): 119 | with pytest.warns(MultipleImplsForSingleResultHookWarning): 120 | from datar.data import iris 121 | 122 | assert iris == "irisirisiris" 123 | 124 | from datar.data import load_dataset 125 | assert load_dataset("iris", __backend="testplugin1") == "irisiris" 126 | 127 | 128 | def test_operate(with_test_plugin1): 129 | 130 | expr = f[0] + f[1] 131 | assert expr._pipda_eval([3, 2], Context.EVAL) == 11 132 | 133 | 134 | def test_operate2(with_test_plugin1, with_test_plugin2): 135 | expr = f[0] + f[1] 136 | with pytest.warns(MultipleImplsForSingleResultHookWarning): 137 | assert expr._pipda_eval([3, 2], Context.EVAL) == 17 138 | 139 | with DatarOperator.with_backend("testplugin1"): 140 | assert expr._pipda_eval([3, 2], Context.EVAL) == 11 141 | 142 | with pytest.warns(MultipleImplsForSingleResultHookWarning): 143 | assert expr._pipda_eval([3, 2], Context.EVAL) == 17 144 | 145 | 146 | def test_c_getitem(with_test_plugin1): 147 | from datar.base import c 148 | assert c[11] == 22 149 | 150 | 151 | def test_c_getitem2(with_test_plugin1, with_test_plugin2): 152 | from datar.base import c 153 | with pytest.warns(MultipleImplsForSingleResultHookWarning): 154 | assert c[11] == 44 155 | 156 | with c.with_backend("testplugin1"): 157 | assert c[11] == 22 158 | 159 | with pytest.warns(MultipleImplsForSingleResultHookWarning): 160 | assert c[11] == 44 161 | -------------------------------------------------------------------------------- /tests/test_tibble.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa: F401 2 | 3 | from datar.core.utils import NotImplementedByCurrentBackendError 4 | from datar.tibble import ( 5 | add_column, 6 | add_row, 7 | as_tibble, 8 | column_to_rownames, 9 | deframe, 10 | enframe, 11 | has_rownames, 12 | remove_rownames, 13 | rowid_to_column, 14 | rownames_to_column, 15 | tibble, 16 | tibble_, 17 | tibble_row, 18 | tribble, 19 | ) 20 | 21 | 22 | @pytest.mark.parametrize("verb, data, args, kwargs", [ 23 | (add_column, None, [1], None), 24 | (add_row, None, [1], None), 25 | (as_tibble, None, [], None), 26 | (column_to_rownames, None, [], None), 27 | (deframe, None, [], None), 28 | (enframe, None, [], None), 29 | (has_rownames, None, [], None), 30 | (remove_rownames, None, [], None), 31 | (rowid_to_column, None, ["x"], None), 32 | (rownames_to_column, None, ["x"], None), 33 | (tibble, None, [], None), 34 | (tibble_, None, [], None), 35 | (tibble_row, None, [], None), 36 | (tribble, None, [], None), 37 | ]) 38 | def test_default_impl(verb, data, args, kwargs): 39 | kwargs = kwargs or {} 40 | with pytest.raises(NotImplementedByCurrentBackendError): 41 | verb(data, *args, **kwargs) 42 | -------------------------------------------------------------------------------- /tests/test_tidyr.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from datar.core.utils import NotImplementedByCurrentBackendError 4 | from datar.tidyr import ( 5 | chop, 6 | complete, 7 | crossing, 8 | drop_na, 9 | expand, 10 | extract, 11 | fill, 12 | full_seq, 13 | nest, 14 | nesting, 15 | pack, 16 | pivot_longer, 17 | pivot_wider, 18 | replace_na, 19 | separate, 20 | separate_rows, 21 | unchop, 22 | uncount, 23 | unite, 24 | unnest, 25 | unpack, 26 | ) 27 | 28 | 29 | @pytest.mark.parametrize("verb, data, args, kwargs", [ 30 | (chop, None, [], None), 31 | (complete, None, [], None), 32 | (crossing, None, [], None), 33 | (drop_na, None, [], None), 34 | (expand, None, [], None), 35 | (extract, None, [1, 1], None), 36 | (fill, None, [], None), 37 | (full_seq, None, [1], None), 38 | (nest, None, [], None), 39 | (nesting, None, [], None), 40 | (pack, None, [], None), 41 | (pivot_longer, None, [1], None), 42 | (pivot_wider, None, [], None), 43 | (replace_na, None, [], None), 44 | (separate, None, [1, 1], None), 45 | (separate_rows, None, [], None), 46 | (unchop, None, [], None), 47 | (uncount, None, [1], None), 48 | (unite, None, [1], None), 49 | (unnest, None, [], None), 50 | (unpack, None, [1], None), 51 | ]) 52 | def test_default_impl(verb, data, args, kwargs): 53 | kwargs = kwargs or {} 54 | with pytest.raises(NotImplementedByCurrentBackendError): 55 | verb(data, *args, **kwargs) 56 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from datar.core.utils import arg_match 3 | 4 | 5 | def test_arg_match(): 6 | with pytest.raises(ValueError, match='abc'): 7 | arg_match('a', 'a', ['b', 'c'], errmsg='abc') 8 | with pytest.raises(ValueError, match='must be one of'): 9 | arg_match('a', 'a', ['b', 'c']) 10 | 11 | assert arg_match('a', 'a', ['a', 'b', 'c']) == 'a' 12 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203, W503, E731 3 | per-file-ignores = 4 | # imported but unused 5 | __init__.py: F401, E402 6 | datar/all.py: F401, E402, F403, F811 7 | datar/apis/base.py: F401 8 | datar/apis/dplyr.py: F401 9 | datar/apis/forcats.py: F401 10 | datar/apis/tidyr.py: F401 11 | datar/forcats.py: F401, F403 12 | datar/tidyr.py: F401, F403 13 | datar/tibble.py: F401, F403 14 | datar/base.py: F401, F402, F403, E402 15 | datar/dplyr.py: F401, F402, F403, E402 16 | datar/data/metadata.py: E501 17 | tests/test_conflict_names.py: F401 18 | max-line-length = 81 19 | --------------------------------------------------------------------------------