├── .env.dev ├── .github ├── CODE_OF_CONDUCT.md └── workflows │ ├── ci.yml │ ├── code-checks.yml │ └── cross-compat.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── MAINTAINERS.md ├── MANIFEST.in ├── Makefile ├── README.md ├── README.qmd ├── binder ├── postBuild ├── requirements.txt └── runtime.txt ├── docker-compose.yml ├── docs ├── .gitignore ├── _extensions │ └── machow │ │ └── interlinks │ │ ├── .gitignore │ │ ├── _extension.yml │ │ └── interlinks.lua ├── _quarto.yml ├── customize-pins-metadata.qmd ├── favicon.ico ├── get_started.qmd ├── index.qmd └── logo.png ├── pins ├── __init__.py ├── _adaptors.py ├── _types.py ├── boards.py ├── cache.py ├── config.py ├── constructors.py ├── data │ ├── __init__.py │ └── mtcars.csv ├── databricks │ ├── __init__.py │ └── fs.py ├── drivers.py ├── errors.py ├── meta.py ├── rsconnect │ ├── __init__.py │ ├── api.py │ ├── fs.py │ └── html │ │ ├── highlight.js-9.15.9 │ │ ├── highlight.js │ │ └── qtcreator_light.css │ │ ├── index.html │ │ └── pagedtable-1.1 │ │ ├── pagedtable.css │ │ └── pagedtable.js ├── tests │ ├── .gitignore │ ├── _snapshots │ │ └── test_board_pin_write_rsc_index_html │ │ │ ├── data.txt │ │ │ ├── highlight.js-9.15.9 │ │ │ ├── highlight.js │ │ │ └── qtcreator_light.css │ │ │ ├── index.html │ │ │ ├── pagedtable-1.1 │ │ │ ├── pagedtable.css │ │ │ └── pagedtable.js │ │ │ └── test_rsc_pin.csv │ ├── conftest.py │ ├── example-bundle │ │ ├── data.txt │ │ ├── data_frame.csv │ │ ├── index.html │ │ └── manifest.json │ ├── helpers.py │ ├── pin-board │ │ ├── _pins.yaml │ │ ├── x │ │ │ └── 20221215T180351Z-c3943 │ │ │ │ ├── data.txt │ │ │ │ └── x.json │ │ └── y │ │ │ ├── 20221215T180357Z-9ae7a │ │ │ ├── data.txt │ │ │ └── y.rds │ │ │ └── 20221215T180400Z-b81d5 │ │ │ ├── data.txt │ │ │ └── y.json │ ├── pins-compat │ │ ├── df_arrow │ │ │ └── 20220214T163720Z-ad0c1 │ │ │ │ ├── data.txt │ │ │ │ └── df_arrow.arrow │ │ ├── df_csv │ │ │ ├── 20220214T163718Z-eceac │ │ │ │ ├── data.txt │ │ │ │ └── df_csv.csv │ │ │ └── 20220214T163720Z-9bfad │ │ │ │ ├── data.txt │ │ │ │ └── df_csv.csv │ │ ├── df_rds │ │ │ └── 20220214T163720Z-35b15 │ │ │ │ ├── data.txt │ │ │ │ └── df_rds.rds │ │ └── df_unversioned │ │ │ └── 20220214T163720Z-35b15 │ │ │ ├── data.txt │ │ │ └── df_unversioned.rds │ ├── pins-old-types │ │ └── a-table │ │ │ └── v │ │ │ ├── data.csv │ │ │ ├── data.rds │ │ │ └── data.txt │ ├── test_adaptors.py │ ├── test_boards.py │ ├── test_cache.py │ ├── test_compat.py │ ├── test_compat_old_types.py │ ├── test_config.py │ ├── test_constructors.py │ ├── test_drivers.py │ ├── test_meta.py │ ├── test_rsconnect_api.py │ ├── test_utils.py │ └── test_versions.py ├── utils.py └── versions.py ├── pyproject.toml ├── requirements ├── dev.txt └── minimum.txt └── script ├── ci-compat-check ├── .gitignore ├── Makefile ├── dump_py_pins.py ├── dump_r_pins.R ├── validate_py_to_r.R └── validate_r_to_py.py ├── setup-rsconnect ├── add-users.sh ├── dump_api_keys.py ├── rstudio-connect.gcfg └── users.txt ├── stage_example_bundle.py ├── stage_r_pins.R └── stage_r_pins_old_types.R /.env.dev: -------------------------------------------------------------------------------- 1 | # This allows the unit tests to run, while not using 2 | # the full / format. 3 | PINS_ALLOW_RSC_SHORT_NAME=1 4 | PINS_FEATURE_PREVIEW=1 5 | 6 | # Pins optional config ---- 7 | #PINS_CACHE_DIR=.pins_cache 8 | #PINS_DATA_DIR=.pins_data 9 | 10 | # AWS S3 backend ---- 11 | AWS_ACCESS_KEY_ID= 12 | AWS_SECRET_ACCESS_KEY= 13 | AWS_REGION=us-east-1 14 | 15 | # Azure backend ---- 16 | AZURE_STORAGE_ACCOUNT_NAME=cipins 17 | AZURE_STORAGE_ACCOUNT_KEY= 18 | 19 | # GCS backend ---- 20 | # Note that this backend uses gcsfs's 21 | # default auth setting, which requires authenticating 22 | # via the gcloud cli. 23 | 24 | # Posit Connect license ---- 25 | RSC_LICENSE= 26 | 27 | # Uncomment and change the variables below to specify the bucket (directory) the buckets 28 | # in which test boards will be created. E.g. "ci-pins" means boards will be created 29 | # in the ci-pins bucket on s3. 30 | # (Note that the local file backend always uses a temporary directory.) 31 | # 32 | # PINS_TEST_S3__PATH="ci-pins" 33 | 34 | # Databricks backend ---- 35 | DATABRICKS_HOST= 36 | DATABRICKS_TOKEN= 37 | DATABRICKS_VOLUME= 38 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at codeofconduct@posit.co. 63 | All complaints will be reviewed and investigated promptly and fairly. 64 | 65 | All community leaders are obligated to respect the privacy and security of the 66 | reporter of any incident. 67 | 68 | ## Enforcement Guidelines 69 | 70 | Community leaders will follow these Community Impact Guidelines in determining 71 | the consequences for any action they deem in violation of this Code of Conduct: 72 | 73 | ### 1. Correction 74 | 75 | **Community Impact**: Use of inappropriate language or other behavior deemed 76 | unprofessional or unwelcome in the community. 77 | 78 | **Consequence**: A private, written warning from community leaders, providing 79 | clarity around the nature of the violation and an explanation of why the 80 | behavior was inappropriate. A public apology may be requested. 81 | 82 | ### 2. Warning 83 | 84 | **Community Impact**: A violation through a single incident or series of 85 | actions. 86 | 87 | **Consequence**: A warning with consequences for continued behavior. No 88 | interaction with the people involved, including unsolicited interaction with 89 | those enforcing the Code of Conduct, for a specified period of time. This 90 | includes avoiding interactions in community spaces as well as external channels 91 | like social media. Violating these terms may lead to a temporary or permanent 92 | ban. 93 | 94 | ### 3. Temporary Ban 95 | 96 | **Community Impact**: A serious violation of community standards, including 97 | sustained inappropriate behavior. 98 | 99 | **Consequence**: A temporary ban from any sort of interaction or public 100 | communication with the community for a specified period of time. No public or 101 | private interaction with the people involved, including unsolicited interaction 102 | with those enforcing the Code of Conduct, is allowed during this period. 103 | Violating these terms may lead to a permanent ban. 104 | 105 | ### 4. Permanent Ban 106 | 107 | **Community Impact**: Demonstrating a pattern of violation of community 108 | standards, including sustained inappropriate behavior, harassment of an 109 | individual, or aggression toward or disparagement of classes of individuals. 110 | 111 | **Consequence**: A permanent ban from any sort of public interaction within the 112 | community. 113 | 114 | ## Attribution 115 | 116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 117 | version 2.1, available at 118 | . 119 | 120 | Community Impact Guidelines were inspired by 121 | [Mozilla's code of conduct enforcement ladder][https://github.com/mozilla/inclusion]. 122 | 123 | For answers to common questions about this code of conduct, see the FAQ at 124 | . Translations are available at . 125 | 126 | [homepage]: https://www.contributor-covenant.org 127 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: ['main', 'dev-*'] 7 | pull_request: 8 | release: 9 | types: [published] 10 | 11 | env: 12 | PINS_ALLOW_RSC_SHORT_NAME: 1 13 | PINS_FEATURE_PREVIEW: 1 14 | 15 | jobs: 16 | tests: 17 | name: "Tests" 18 | runs-on: ${{ matrix.os }} 19 | if: ${{ !github.event.pull_request.head.repo.fork }} 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python: ["3.9", "3.10", "3.11", "3.12", "3.13"] 24 | os: ["ubuntu-latest"] 25 | pytest_opts: ["--workers 4 --tests-per-worker 1"] 26 | requirements: [""] 27 | include: 28 | - os: "ubuntu-latest" 29 | python: "3.9" 30 | requirements: "requirements/minimum.txt" 31 | - os: "macos-latest" 32 | python: "3.10" 33 | # ignore doctests, as they involve calls to github, and all mac machines 34 | # use the same IP address 35 | pytest_opts: "--workers 4 --tests-per-worker 1 -k pins/tests" 36 | - os: "windows-latest" 37 | python: "3.10" 38 | # ignore doctests 39 | pytest_opts: "-k pins/tests" 40 | steps: 41 | - uses: actions/checkout@v4 42 | - uses: actions/setup-python@v4 43 | with: 44 | python-version: ${{ matrix.python }} 45 | - name: Install dependencies 46 | shell: bash 47 | run: | 48 | python -m pip install --upgrade pip 49 | 50 | # optionally install from requirements file 51 | if [ $REQUIREMENTS ]; then 52 | pip install -r $REQUIREMENTS 53 | fi 54 | 55 | python -m pip install -e .[test] 56 | 57 | - name: Set up Cloud SDK 58 | uses: google-github-actions/setup-gcloud@v0 59 | with: 60 | project_id: siuba-tests 61 | service_account_key: ${{ secrets.GCP_SA_KEY }} 62 | export_default_credentials: true 63 | 64 | - name: Run tests 65 | shell: bash 66 | run: | 67 | pytest pins -m 'not fs_rsc and not skip_on_github' $PYTEST_OPTS 68 | env: 69 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 70 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 71 | AWS_REGION: "us-east-1" 72 | AZURE_STORAGE_ACCOUNT_NAME: ${{ secrets.AZURE_STORAGE_ACCOUNT_NAME }} 73 | AZURE_STORAGE_ACCOUNT_KEY: ${{ secrets.AZURE_STORAGE_ACCOUNT_KEY }} 74 | DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }} 75 | DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} 76 | PYTEST_OPTS: ${{ matrix.pytest_opts }} 77 | REQUIREMENTS: ${{ matrix.requirements }} 78 | ACTION_OS: ${{ matrix.os }} 79 | # fixes error on macosx virtual machine with pytest-parallel 80 | # https://github.com/browsertron/pytest-parallel/issues/93 81 | no_proxy: "*" 82 | 83 | test-rsconnect: 84 | name: "Test Posit Connect" 85 | runs-on: ubuntu-latest 86 | if: ${{ !github.event.pull_request.head.repo.fork }} 87 | steps: 88 | - uses: actions/checkout@v4 89 | - uses: actions/setup-python@v4 90 | with: 91 | python-version: "3.10" 92 | - name: Install dependencies 93 | run: | 94 | python -m pip install --upgrade pip 95 | python -m pip install -r requirements/dev.txt 96 | python -m pip install -e . 97 | 98 | - name: run Posit Connect 99 | run: | 100 | docker compose up --build -d 101 | make dev 102 | env: 103 | RSC_LICENSE: ${{ secrets.RSC_LICENSE }} 104 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 105 | 106 | # NOTE: edited to run checks for python package 107 | - name: Run tests 108 | run: | 109 | pytest pins -m 'fs_rsc and not skip_on_github' 110 | 111 | 112 | test-fork: 113 | name: "Test a fork PR (no secrets)" 114 | runs-on: ubuntu-latest 115 | if: ${{ github.event.pull_request.head.repo.fork }} 116 | steps: 117 | - uses: actions/checkout@v4 118 | - uses: actions/setup-python@v4 119 | with: 120 | python-version: "3.10" 121 | - name: Install dependencies 122 | run: | 123 | python -m pip install --upgrade pip 124 | 125 | python -m pip install -e .[test] 126 | - name: Run tests 127 | run: | 128 | # TODO: better way to disable all cloud backend tests? 129 | pytest pins -m 'not fs_rsc and not fs_s3 and not fs_gcs and not fs_abfs and not skip_on_github' 130 | 131 | 132 | build-docs: 133 | name: "Build Docs" 134 | runs-on: ubuntu-latest 135 | steps: 136 | - uses: actions/checkout@v3 137 | - uses: actions/setup-python@v4 138 | with: 139 | python-version: "3.10" 140 | - name: Install dependencies 141 | run: | 142 | python -m pip install --upgrade pip 143 | python -m pip install -r requirements/dev.txt 144 | python -m pip install -e . 145 | python -m ipykernel install --user 146 | 147 | - name: Set up Quarto 148 | uses: quarto-dev/quarto-actions/setup@v2 149 | - name: Build docs 150 | run: | 151 | make docs-build 152 | - name: Save docs artifact 153 | uses: actions/upload-artifact@v4 154 | with: 155 | name: docs-html 156 | path: docs/_site 157 | 158 | preview-docs: 159 | name: "Preview Docs:" 160 | runs-on: ubuntu-latest 161 | needs: ["build-docs"] 162 | if: "${{github.event_name == 'pull_request' && !github.event.pull_request.head.repo.fork }}" 163 | 164 | steps: 165 | - uses: actions/download-artifact@v4 166 | with: 167 | name: docs-html 168 | path: docs/_site 169 | 170 | # Determine the release name --- 171 | 172 | - name: Configure pull release name 173 | if: ${{github.event_name == 'pull_request'}} 174 | run: | 175 | echo "RELEASE_NAME=pr-${PR_NUMBER}" >> $GITHUB_ENV 176 | env: 177 | PR_NUMBER: ${{ github.event.number }} 178 | 179 | # create deployment ---- 180 | 181 | - name: Create Github Deployment 182 | uses: bobheadxi/deployments@v0.4.3 183 | id: deployment 184 | with: 185 | step: start 186 | token: ${{ secrets.GITHUB_TOKEN }} 187 | env: ${{ env.RELEASE_NAME }} 188 | ref: ${{ github.head_ref }} 189 | transient: true 190 | logs: 'https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}' 191 | 192 | # push docs ---- 193 | 194 | - name: Netlify docs preview 195 | run: | 196 | npm install -g netlify-cli 197 | # push main branch to production, others to preview -- 198 | netlify deploy --dir=docs/_site --alias="${ALIAS}" 199 | 200 | env: 201 | NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }} 202 | NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} 203 | ALIAS: ${{ steps.deployment.outputs.env }} 204 | 205 | # update deployment ---- 206 | 207 | - name: Update Github Deployment 208 | uses: bobheadxi/deployments@v0.4.3 209 | if: ${{ always() }} 210 | with: 211 | step: finish 212 | token: ${{ secrets.GITHUB_TOKEN }} 213 | status: ${{ job.status }} 214 | deployment_id: ${{ steps.deployment.outputs.deployment_id }} 215 | env_url: 'https://${{ steps.deployment.outputs.env }}--pins-python.netlify.app' 216 | logs: 'https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}' 217 | 218 | publish-docs: 219 | name: "Publish Docs" 220 | runs-on: ubuntu-latest 221 | needs: ["build-docs", "tests", "test-rsconnect"] 222 | if: github.ref == 'refs/heads/main' 223 | steps: 224 | - uses: actions/download-artifact@v4 225 | with: 226 | name: docs-html 227 | path: docs/_site 228 | - uses: peaceiris/actions-gh-pages@v3 229 | with: 230 | github_token: ${{ secrets.GITHUB_TOKEN }} 231 | publish_dir: docs/_site 232 | 233 | release-pypi: 234 | name: "Release to pypi" 235 | runs-on: ubuntu-latest 236 | if: github.event_name == 'release' 237 | needs: [build-docs, tests, test-rsconnect] 238 | steps: 239 | - uses: actions/checkout@v4 240 | - uses: actions/setup-python@v4 241 | with: 242 | python-version: "3.10" 243 | - name: "Build Package" 244 | run: | 245 | python -m pip install build wheel 246 | python -m build --sdist --wheel 247 | 248 | - name: "Deploy to Test PyPI" 249 | uses: pypa/gh-action-pypi-publish@release/v1 250 | with: 251 | user: __token__ 252 | password: ${{ secrets.PYPI_API_TOKEN }} 253 | -------------------------------------------------------------------------------- /.github/workflows/code-checks.yml: -------------------------------------------------------------------------------- 1 | name: Code Checks 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: ['main', 'dev-*'] 7 | pull_request: 8 | release: 9 | types: [published] 10 | 11 | jobs: 12 | pre-commit: 13 | name: "Run pre-commit" 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: actions/setup-python@v4 18 | - uses: pre-commit/action@v3.0.1 19 | 20 | pyright: 21 | name: "Run Pyright" 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@v4 25 | - uses: actions/setup-python@v4 26 | with: 27 | python-version: 3.13 # Use the maximum version supported by python-pins 28 | - name: Install dependencies 29 | shell: bash 30 | run: | 31 | python -m pip install --upgrade pip 32 | python -m pip install -e .[check] 33 | - uses: jakebailey/pyright-action@v2 34 | with: 35 | version: 1.1.372 36 | -------------------------------------------------------------------------------- /.github/workflows/cross-compat.yml: -------------------------------------------------------------------------------- 1 | name: Cross compatibility 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | r_pins_tag: 7 | description: "Tag or commit from pins-r (e.g. v1.0.3)" 8 | default: "__cran__" 9 | required: true 10 | push: 11 | branches: ['main', 'dev-*'] 12 | pull_request: 13 | release: 14 | types: [published] 15 | 16 | env: 17 | PINS_ALLOW_RSC_SHORT_NAME: 1 18 | PINS_FEATURE_PREVIEW: 1 19 | 20 | jobs: 21 | check-cross-compatibility: 22 | name: "Check cross lib compatibility" 23 | runs-on: ubuntu-latest 24 | steps: 25 | - uses: actions/checkout@v4 26 | 27 | - name: Install libcurl on Linux 28 | run: sudo apt-get update -y && sudo apt-get install -y libcurl4-openssl-dev 29 | 30 | # r --- 31 | 32 | - uses: r-lib/actions/setup-r@v2 33 | with: 34 | use-public-rspm: true 35 | 36 | - name: Install R dependencies (from CRAN) 37 | run: "install.packages('pins')" 38 | shell: Rscript {0} 39 | if: ${{ github.event.name != 'workflow_dispatch' || inputs.r_pins_tag == '__cran__' }} 40 | 41 | - name: Install R dependencies (from github) 42 | run: | 43 | install.packages("remotes") 44 | remotes::install_github(paste0('rstudio/pins-r@', Sys.getenv('R_PINS_TAG'))) 45 | shell: Rscript {0} 46 | env: 47 | R_PINS_TAG: ${{ inputs.r_pins_tag }} 48 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 49 | if: ${{ github.event.name == 'workflow_dispatch' && inputs.r_pins_tag != '__cran__' }} 50 | 51 | # python --- 52 | 53 | - uses: actions/setup-python@v2 54 | with: 55 | python-version: "3.10" 56 | - name: Install py dependencies 57 | run: | 58 | python -m pip install --upgrade pip 59 | python -m pip install -r requirements/dev.txt 60 | python -m pip install -e . 61 | 62 | # write and test --- 63 | 64 | - name: Run script/ci-compat-check 65 | run: make ci-compat-check 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Mac specific 2 | .DS_Store 3 | 4 | # Vim swapfiles 5 | *.sw[op] 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | pip-wheel-metadata/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 101 | __pypackages__/ 102 | 103 | # Celery stuff 104 | celerybeat-schedule 105 | celerybeat.pid 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # Environments 111 | .env 112 | .venv 113 | env/ 114 | venv/ 115 | ENV/ 116 | env.bak/ 117 | venv.bak/ 118 | 119 | # Spyder project settings 120 | .spyderproject 121 | .spyproject 122 | 123 | # Rope project settings 124 | .ropeproject 125 | 126 | # mkdocs documentation 127 | /site 128 | 129 | # mypy 130 | .mypy_cache/ 131 | .dmypy.json 132 | dmypy.json 133 | 134 | # Pyre type checker 135 | .pyre/ 136 | 137 | # RStudio 138 | .Rproj.user 139 | *.Rproj 140 | 141 | # Quarto 142 | /.quarto/ 143 | _site/ 144 | objects.json 145 | reference/ 146 | src/ 147 | 148 | /.luarc.json 149 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: "(.*\\.csv)|(^pins/tests/_snapshots)" 2 | repos: 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v2.4.0 5 | hooks: 6 | - id: trailing-whitespace 7 | - id: end-of-file-fixer 8 | - id: check-yaml 9 | args: ["--unsafe"] 10 | - id: check-added-large-files 11 | - repo: https://github.com/charliermarsh/ruff-pre-commit 12 | rev: "v0.5.4" # Sync with pyproject.toml 13 | hooks: 14 | - id: ruff 15 | args: ["--fix"] 16 | - id: ruff-format 17 | - repo: https://github.com/codespell-project/codespell 18 | rev: v2.4.1 19 | hooks: 20 | - id: codespell 21 | additional_dependencies: 22 | - tomli 23 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # pins-python 2 | 3 | ## Development 4 | 5 | ### Install pins with dev dependencies 6 | 7 | ```shell 8 | python -m pip install -e .[dev] 9 | ``` 10 | 11 | ### Install pre-commit hooks 12 | 13 | This project uses [pre-commit](https://pre-commit.com/) to check and format each commit. 14 | 15 | You can set it up by running the following code in this repo: 16 | 17 | ``` 18 | python -m pip install pre-commit 19 | pre-commit install 20 | ``` 21 | 22 | ### Setting version number 23 | 24 | This project uses [setuptools_scm](https://github.com/pypa/setuptools_scm) to 25 | automatically track and change version numbers within the `pins` package. 26 | It works by checking the last tagged commit. 27 | 28 | In order to set the version number, create a tag like the following. 29 | 30 | ```shell 31 | git tag v0.0.1 32 | ``` 33 | 34 | In order to see the version number being used for the current commit, run: 35 | 36 | ``` 37 | python -m setuptools_scm 38 | ``` 39 | 40 | ## Test 41 | 42 | Tests can be run using pytest: 43 | 44 | ```shell 45 | pytest pins 46 | 47 | # run all tests except those for Posit Connect 48 | pytest pins -m 'not fs_rsc' 49 | 50 | # run only local filesystem backend tests 51 | pytest pins -m 'fs_file' 52 | 53 | # run all tests except those for S3 and GCS 54 | pytest pins -m 'not fs_s3 and not fs_gcs' 55 | 56 | # run all tests except those using data on GitHub 57 | # n.b. doctests cannot have marks https://github.com/pytest-dev/pytest/issues/5794 58 | pytest pins -m 'not skip_on_github' -k 'not pins.boards.BoardManual' 59 | ``` 60 | 61 | There are two important details to note for testing: 62 | 63 | * **Backends**. pins can write to backends like s3, azure, and Posit Connect, so you 64 | will need to set credentials to test against them. 65 | * **Pytest Marks**. You can disable tests over a specific backend through pytest's 66 | `-m` flag. For example... 67 | - Skip S3: `pytest pins -m 'not fs_s3'` 68 | - Test only s3: `pytest pins -m 'fs_s3'` 69 | - List all marks: `pytest pins --markers` 70 | 71 | ### Configuring backends 72 | 73 | * Copy `.env.dev` to be `.env` 74 | * Modify `.env` to file in environment variables (e.g. AWS_ACCESS_KEY_ID) 75 | * Be careful not to put any sensitive information in `.env.dev`! 76 | 77 | ### Setting up Posit Connect tests 78 | 79 | ``` 80 | # Be sure to set RSC_LICENSE in .env 81 | make dev 82 | ``` 83 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 pins-python authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MAINTAINERS.md: -------------------------------------------------------------------------------- 1 | # Who maintains pins 2 | 3 | The pins-python package is currently maintained by Isabel Zimmerman . [Posit Software, PBC](https://posit.co/products/open-source/) is a copyright holder and funder of this package. 4 | 5 | Several individuals in the community have taken an active role in helping to maintain this package and submit fixes. Those individuals are shown in the git changelog. 6 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | prune .* 2 | prune docs 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SPHINX_BUILDARGS= 2 | # Note that these are keys generated by the docker rsconnect service, so are 3 | # not really secrets. They are saved to json to make it easy to use rsconnect 4 | # as multiple users from the tests 5 | RSC_API_KEYS=pins/tests/rsconnect_api_keys.json 6 | 7 | dev: pins/tests/rsconnect_api_keys.json 8 | 9 | dev-start: 10 | docker compose up -d 11 | docker compose exec -T rsconnect bash < script/setup-rsconnect/add-users.sh 12 | # curl fails with error 52 without a short sleep.... 13 | sleep 5 14 | curl -s --retry 10 --retry-connrefused http://localhost:3939 15 | 16 | dev-stop: 17 | docker compose down 18 | rm -f $(RSC_API_KEYS) 19 | 20 | $(RSC_API_KEYS): dev-start 21 | python script/setup-rsconnect/dump_api_keys.py $@ 22 | 23 | README.md: 24 | quarto render README.qmd 25 | 26 | test: test-most test-rsc 27 | 28 | test-most: 29 | pytest pins -m "not fs_rsc and not fs_s3" --workers 4 --tests-per-worker 1 -vv 30 | 31 | test-rsc: 32 | pytest pins -m "fs_rsc" 33 | 34 | docs-build: 35 | cd docs && python -m quartodoc build --verbose 36 | cd docs && quarto render 37 | 38 | docs-clean: 39 | rm -rf docs/_build docs/api/api_card 40 | 41 | requirements/dev.txt: pyproject.toml 42 | @# allows you to do this... 43 | @# make requirements | tee > requirements/some_file.txt 44 | @pip-compile pyproject.toml --rebuild --extra doc --extra test --extra check --output-file=- > $@ 45 | 46 | binder/requirements.txt: requirements/dev.txt 47 | cp $< $@ 48 | 49 | ci-compat-check: 50 | # TODO: mark as dummy 51 | $(MAKE) -C script/$@ 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pins 2 | 3 | 4 | ![PyPI - Version](https://img.shields.io/pypi/v/pins.svg) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pins) [![Checked with pyright](https://microsoft.github.io/pyright/img/pyright_badge.svg)](https://microsoft.github.io/pyright/) 5 | 6 | 7 | 8 | The pins package publishes data, models, and other Python objects, 9 | making it easy to share them across projects and with your colleagues. 10 | You can pin objects to a variety of pin *boards*, including folders (to 11 | share on a networked drive or with services like DropBox), Posit 12 | Connect, Amazon S3, and Google Cloud Storage. Pins can be automatically 13 | versioned, making it straightforward to track changes, re-run analyses 14 | on historical data, and undo mistakes. 15 | 16 | See the [documentation](https://rstudio.github.io/pins-python) for 17 | getting started. 18 | 19 | You can use pins from R as well as Python. For example, you can use one 20 | language to read a pin created with the other. Learn more about [pins 21 | for R](https://pins.rstudio.com). 22 | 23 | ## Installation 24 | 25 | You can install the released version of pins from 26 | [PyPI](https://pypi.org/project/pins/): 27 | 28 | ``` shell 29 | python -m pip install pins 30 | ``` 31 | 32 | And the development version from 33 | [GitHub](https://github.com/rstudio/pins-python) with: 34 | 35 | ``` shell 36 | python -m pip install git+https://github.com/rstudio/pins-python 37 | ``` 38 | 39 | ## Usage 40 | 41 | To use the pins package, you must first create a pin board. A good place 42 | to start is `board_folder()`, which stores pins in a directory you 43 | specify. Here I’ll use a special version of `board_folder()` called 44 | `board_temp()` which creates a temporary board that’s automatically 45 | deleted when your Python script or notebook session ends. This is great 46 | for examples, but obviously you shouldn’t use it for real work! 47 | 48 | ``` python 49 | import pins 50 | from pins.data import mtcars 51 | 52 | board = pins.board_temp() 53 | ``` 54 | 55 | You can “pin” (save) data to a board with the `.pin_write()` method. It 56 | requires three arguments: an object, a name, and a pin type: 57 | 58 | ``` python 59 | board.pin_write(mtcars.head(), "mtcars", type="csv") 60 | ``` 61 | 62 | Writing pin: 63 | Name: 'mtcars' 64 | Version: 20230523T115348Z-120a5 65 | 66 | Meta(title='mtcars: a pinned 5 x 11 DataFrame', description=None, created='20230523T115348Z', pin_hash='120a54f7e0818041', file='mtcars.csv', file_size=249, type='csv', api_version=1, version=Version(created=datetime.datetime(2023, 5, 23, 11, 53, 48, 555797), hash='120a54f7e0818041'), tags=None, name='mtcars', user={}, local={}) 67 | 68 | Above, we saved the data as a CSV, but depending on what you’re saving 69 | and who else you want to read it, you might use the `type` argument to 70 | instead save it as a `joblib`, `parquet`, or `json` file. 71 | 72 | You can later retrieve the pinned data with `.pin_read()`: 73 | 74 | ``` python 75 | board.pin_read("mtcars") 76 | ``` 77 | 78 | mpg cyl disp hp drat wt qsec vs am gear carb 79 | 0 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 80 | 1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 81 | 2 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 82 | 3 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 83 | 4 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 84 | 85 | A board on your computer is good place to start, but the real power of 86 | pins comes when you use a board that’s shared with multiple people. To 87 | get started, you can use `board_folder()` with a directory on a shared 88 | drive or in DropBox, or if you use [Posit 89 | Connect](https://posit.co/products/enterprise/connect/) you can use 90 | `board_connect()`: 91 | 92 | ``` python 93 | # Note that this uses one approach to connecting, 94 | # the environment variables CONNECT_SERVER and CONNECT_API_KEY 95 | 96 | board = pins.board_connect() 97 | board.pin_write(tidy_sales_data, "hadley/sales-summary", type="csv") 98 | ``` 99 | 100 | Then, someone else (or an automated report) can read and use your pin: 101 | 102 | ``` python 103 | board = board_connect() 104 | board.pin_read("hadley/sales-summary") 105 | ``` 106 | 107 | You can easily control who gets to access the data using the Posit 108 | Connect permissions pane. 109 | 110 | The pins package also includes boards that allow you to share data on 111 | services like Amazon’s S3 (`board_s3()`), Google Cloud Storage 112 | (`board_gcs()`), and Azure blob storage (`board_azure()`). 113 | 114 | ## Contributing 115 | 116 | - This project is released with a [Contributor Code of 117 | Conduct](https://www.contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). 118 | By contributing to this project, you agree to abide by its terms. 119 | 120 | - If you think you have encountered a bug, please [submit an 121 | issue](https://github.com/rstudio/pins-python/issues). 122 | -------------------------------------------------------------------------------- /README.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | format: gfm 3 | --- 4 | 5 | ```{python} 6 | #| include: false 7 | # this keeps the pandas dataframe repr from spitting out scoped style tags 8 | # which don't render on github 9 | import pandas as pd 10 | pd.set_option("display.notebook_repr_html", False) 11 | ``` 12 | 13 | # pins 14 | 15 | The pins package publishes data, models, and other Python objects, making it 16 | easy to share them across projects and with your colleagues. You can pin 17 | objects to a variety of pin *boards*, including folders (to share on a 18 | networked drive or with services like DropBox), Posit Connect, Amazon 19 | S3, and Google Cloud Storage. 20 | Pins can be automatically versioned, making it straightforward to track changes, 21 | re-run analyses on historical data, and undo mistakes. 22 | 23 | See the [documentation](https://rstudio.github.io/pins-python) for getting started. 24 | 25 | You can use pins from R as well as Python. For example, you can use one language 26 | to read a pin created with the other. Learn more about 27 | [pins for R](https://pins.rstudio.com). 28 | 29 | ## Installation 30 | 31 | You can install the released version of pins from [PyPI](https://pypi.org/project/pins/): 32 | 33 | ```shell 34 | python -m pip install pins 35 | ``` 36 | 37 | And the development version from [GitHub](https://github.com/rstudio/pins-python) with: 38 | 39 | ```shell 40 | python -m pip install git+https://github.com/rstudio/pins-python 41 | ``` 42 | 43 | ## Usage 44 | 45 | To use the pins package, you must first create a pin board. A good place 46 | to start is `board_folder()`, which stores pins in a directory you 47 | specify. Here I’ll use a special version of `board_folder()` called 48 | `board_temp()` which creates a temporary board that’s automatically 49 | deleted when your Python script or notebook session ends. This is great for examples, but 50 | obviously you shouldn't use it for real work! 51 | 52 | ```{python} 53 | import pins 54 | from pins.data import mtcars 55 | 56 | board = pins.board_temp() 57 | ``` 58 | 59 | You can "pin" (save) data to a board with the `.pin_write()` method. It requires three 60 | arguments: an object, a name, and a pin type: 61 | 62 | ```{python} 63 | board.pin_write(mtcars.head(), "mtcars", type="csv") 64 | ``` 65 | 66 | Above, we saved the data as a CSV, but depending on 67 | what you’re saving and who else you want to read it, you might use the 68 | `type` argument to instead save it as a `joblib`, `parquet`, or `json` file. 69 | 70 | You can later retrieve the pinned data with `.pin_read()`: 71 | 72 | ```{python} 73 | board.pin_read("mtcars") 74 | ``` 75 | 76 | A board on your computer is good place to start, but the real power of 77 | pins comes when you use a board that’s shared with multiple people. To 78 | get started, you can use `board_folder()` with a directory on a shared 79 | drive or in DropBox, or if you use [Posit 80 | Connect](https://posit.co/products/enterprise/connect/) you can use 81 | `board_connect()`: 82 | 83 | ```python 84 | # Note that this uses one approach to connecting, 85 | # the environment variables CONNECT_SERVER and CONNECT_API_KEY 86 | 87 | board = pins.board_connect() 88 | board.pin_write(tidy_sales_data, "hadley/sales-summary", type="csv") 89 | ``` 90 | 91 | Then, someone else (or an automated report) can read and use your 92 | pin: 93 | 94 | ```python 95 | board = board_connect() 96 | board.pin_read("hadley/sales-summary") 97 | ``` 98 | 99 | You can easily control who gets to access the data using the Posit 100 | Connect permissions pane. 101 | 102 | The pins package also includes boards that allow you to share data on 103 | services like Amazon’s S3 (`board_s3()`), Google Cloud Storage (`board_gcs()`), 104 | and Azure blob storage (`board_azure()`). 105 | 106 | ## Contributing 107 | 108 | - This project is released with a [Contributor Code of Conduct](https://www.contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. 109 | 110 | - If you think you have encountered a bug, please [submit an issue](https://github.com/rstudio/pins-python/issues). 111 | -------------------------------------------------------------------------------- /binder/postBuild: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | pip install -e . 4 | -------------------------------------------------------------------------------- /binder/requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with python 3.8 3 | # To update, run: 4 | # 5 | # pip-compile --extra=dev --output-file=- setup.cfg 6 | # 7 | aiobotocore==2.1.2 8 | # via s3fs 9 | aiohttp==3.8.1 10 | # via 11 | # aiobotocore 12 | # s3fs 13 | aioitertools==0.10.0 14 | # via aiobotocore 15 | aiosignal==1.2.0 16 | # via aiohttp 17 | alabaster==0.7.12 18 | # via sphinx 19 | appdirs==1.4.4 20 | # via pins (setup.cfg) 21 | appnope==0.1.2 22 | # via 23 | # ipykernel 24 | # ipython 25 | asttokens==2.0.5 26 | # via stack-data 27 | async-timeout==4.0.2 28 | # via aiohttp 29 | attrs==21.4.0 30 | # via 31 | # aiohttp 32 | # jsonschema 33 | # markdown-it-py 34 | # pytest 35 | babel==2.9.1 36 | # via sphinx 37 | backcall==0.2.0 38 | # via ipython 39 | beautifulsoup4==4.10.0 40 | # via 41 | # nbconvert 42 | # pydata-sphinx-theme 43 | bleach==4.1.0 44 | # via nbconvert 45 | botocore==1.23.24 46 | # via aiobotocore 47 | certifi==2021.10.8 48 | # via requests 49 | charset-normalizer==2.0.12 50 | # via 51 | # aiohttp 52 | # requests 53 | click==8.0.4 54 | # via pip-tools 55 | debugpy==1.6.0 56 | # via ipykernel 57 | decopatch==1.4.10 58 | # via pytest-cases 59 | decorator==5.1.1 60 | # via ipython 61 | defusedxml==0.7.1 62 | # via nbconvert 63 | docutils==0.17.1 64 | # via 65 | # nbsphinx 66 | # pydata-sphinx-theme 67 | # sphinx 68 | entrypoints==0.4 69 | # via 70 | # jupyter-client 71 | # nbconvert 72 | executing==0.8.3 73 | # via stack-data 74 | frozenlist==1.3.0 75 | # via 76 | # aiohttp 77 | # aiosignal 78 | fsspec==2022.02.0 79 | # via 80 | # pins (setup.cfg) 81 | # s3fs 82 | idna==3.3 83 | # via 84 | # requests 85 | # yarl 86 | imagesize==1.3.0 87 | # via sphinx 88 | importlib-metadata==4.11.3 89 | # via sphinx 90 | importlib-resources==5.4.0 91 | # via 92 | # jsonschema 93 | # pins (setup.cfg) 94 | iniconfig==1.1.1 95 | # via pytest 96 | ipykernel==6.9.2 97 | # via pins (setup.cfg) 98 | ipython==8.1.1 99 | # via ipykernel 100 | jedi==0.18.1 101 | # via ipython 102 | jinja2==3.1.0 103 | # via 104 | # nbconvert 105 | # nbsphinx 106 | # pins (setup.cfg) 107 | # sphinx 108 | jmespath==0.10.0 109 | # via botocore 110 | joblib==1.1.0 111 | # via pins (setup.cfg) 112 | jsonschema==4.4.0 113 | # via nbformat 114 | jupyter-client==7.1.2 115 | # via 116 | # ipykernel 117 | # nbclient 118 | jupyter-core==4.9.2 119 | # via 120 | # jupyter-client 121 | # nbconvert 122 | # nbformat 123 | jupyterlab-pygments==0.1.2 124 | # via nbconvert 125 | jupytext==1.13.7 126 | # via pins (setup.cfg) 127 | makefun==1.13.1 128 | # via 129 | # decopatch 130 | # pytest-cases 131 | markdown-it-py==1.1.0 132 | # via 133 | # jupytext 134 | # mdit-py-plugins 135 | markupsafe==2.1.1 136 | # via jinja2 137 | matplotlib-inline==0.1.3 138 | # via 139 | # ipykernel 140 | # ipython 141 | mdit-py-plugins==0.3.0 142 | # via jupytext 143 | mistune==0.8.4 144 | # via nbconvert 145 | multidict==6.0.2 146 | # via 147 | # aiohttp 148 | # yarl 149 | nbclient==0.5.13 150 | # via nbconvert 151 | nbconvert==6.4.4 152 | # via nbsphinx 153 | nbformat==5.2.0 154 | # via 155 | # jupytext 156 | # nbclient 157 | # nbconvert 158 | # nbsphinx 159 | nbsphinx==0.8.8 160 | # via pins (setup.cfg) 161 | nest-asyncio==1.5.4 162 | # via 163 | # ipykernel 164 | # jupyter-client 165 | # nbclient 166 | numpy==1.22.3 167 | # via 168 | # pandas 169 | # siuba 170 | packaging==21.3 171 | # via 172 | # bleach 173 | # pytest 174 | # sphinx 175 | pandas==1.4.1 176 | # via 177 | # pins (setup.cfg) 178 | # siuba 179 | pandocfilters==1.5.0 180 | # via nbconvert 181 | parso==0.8.3 182 | # via jedi 183 | pep517==0.12.0 184 | # via pip-tools 185 | pexpect==4.8.0 186 | # via ipython 187 | pickleshare==0.7.5 188 | # via ipython 189 | pip-tools==6.5.1 190 | # via pins (setup.cfg) 191 | pluggy==1.0.0 192 | # via pytest 193 | prompt-toolkit==3.0.28 194 | # via ipython 195 | psutil==5.9.0 196 | # via ipykernel 197 | ptyprocess==0.7.0 198 | # via pexpect 199 | pure-eval==0.2.2 200 | # via stack-data 201 | py==1.11.0 202 | # via pytest 203 | pydata-sphinx-theme==0.8.0 204 | # via pins (setup.cfg) 205 | pygments==2.11.2 206 | # via 207 | # ipython 208 | # jupyterlab-pygments 209 | # nbconvert 210 | # sphinx 211 | pyparsing==3.0.7 212 | # via packaging 213 | pyrsistent==0.18.1 214 | # via jsonschema 215 | pytest==7.1.1 216 | # via 217 | # pins (setup.cfg) 218 | # pytest-dotenv 219 | pytest-cases==3.6.11 220 | # via pins (setup.cfg) 221 | pytest-dotenv==0.5.2 222 | # via pins (setup.cfg) 223 | python-dateutil==2.8.2 224 | # via 225 | # botocore 226 | # jupyter-client 227 | # pandas 228 | python-dotenv==0.20.0 229 | # via pytest-dotenv 230 | pytz==2022.1 231 | # via 232 | # babel 233 | # pandas 234 | pyyaml==6.0 235 | # via 236 | # jupytext 237 | # pins (setup.cfg) 238 | # siuba 239 | pyzmq==22.3.0 240 | # via jupyter-client 241 | requests==2.27.1 242 | # via sphinx 243 | s3fs==2022.2.0 244 | # via pins (setup.cfg) 245 | siuba==0.1.2 246 | # via pins (setup.cfg) 247 | six==1.16.0 248 | # via 249 | # bleach 250 | # python-dateutil 251 | snowballstemmer==2.2.0 252 | # via sphinx 253 | soupsieve==2.3.1 254 | # via beautifulsoup4 255 | sphinx==4.4.0 256 | # via 257 | # nbsphinx 258 | # pins (setup.cfg) 259 | # pydata-sphinx-theme 260 | sphinxcontrib-applehelp==1.0.2 261 | # via sphinx 262 | sphinxcontrib-devhelp==1.0.2 263 | # via sphinx 264 | sphinxcontrib-htmlhelp==2.0.0 265 | # via sphinx 266 | sphinxcontrib-jsmath==1.0.1 267 | # via sphinx 268 | sphinxcontrib-qthelp==1.0.3 269 | # via sphinx 270 | sphinxcontrib-serializinghtml==1.1.5 271 | # via sphinx 272 | sqlalchemy==1.4.32 273 | # via siuba 274 | stack-data==0.2.0 275 | # via ipython 276 | testpath==0.6.0 277 | # via nbconvert 278 | toml==0.10.2 279 | # via jupytext 280 | tomli==2.0.1 281 | # via 282 | # pep517 283 | # pytest 284 | tornado==6.1 285 | # via 286 | # ipykernel 287 | # jupyter-client 288 | traitlets==5.1.1 289 | # via 290 | # ipykernel 291 | # ipython 292 | # jupyter-client 293 | # jupyter-core 294 | # matplotlib-inline 295 | # nbclient 296 | # nbconvert 297 | # nbformat 298 | # nbsphinx 299 | typing-extensions==4.1.1 300 | # via aioitertools 301 | urllib3==1.26.9 302 | # via 303 | # botocore 304 | # requests 305 | wcwidth==0.2.5 306 | # via prompt-toolkit 307 | webencodings==0.5.1 308 | # via bleach 309 | wheel==0.37.1 310 | # via pip-tools 311 | wrapt==1.14.0 312 | # via aiobotocore 313 | xxhash==3.0.0 314 | # via pins (setup.cfg) 315 | yarl==1.7.2 316 | # via aiohttp 317 | zipp==3.7.0 318 | # via 319 | # importlib-metadata 320 | # importlib-resources 321 | 322 | # The following packages are considered to be unsafe in a requirements file: 323 | # pip 324 | # setuptools 325 | -------------------------------------------------------------------------------- /binder/runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.9 2 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.1' 2 | 3 | services: 4 | 5 | rsconnect: 6 | image: rstudio/rstudio-connect:2021.12.1 7 | restart: always 8 | ports: 9 | - 3939:3939 10 | volumes: 11 | - $PWD/script/setup-rsconnect/users.txt:/etc/users.txt 12 | - $PWD/script/setup-rsconnect/rstudio-connect.gcfg:/etc/rstudio-connect/rstudio-connect.gcfg 13 | # by default, mysql rounds to 4 decimals, but tests require more precision 14 | privileged: true 15 | environment: 16 | RSTUDIO_CONNECT_HASTE: "enabled" 17 | RSC_LICENSE: ${RSC_LICENSE} 18 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | /.quarto/ 2 | -------------------------------------------------------------------------------- /docs/_extensions/machow/interlinks/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.pdf 3 | *_files/ 4 | -------------------------------------------------------------------------------- /docs/_extensions/machow/interlinks/_extension.yml: -------------------------------------------------------------------------------- 1 | title: Interlinks 2 | author: Michael Chow 3 | version: 1.0.0 4 | quarto-required: ">=1.2.0" 5 | contributes: 6 | filters: 7 | - interlinks.lua 8 | -------------------------------------------------------------------------------- /docs/_extensions/machow/interlinks/interlinks.lua: -------------------------------------------------------------------------------- 1 | local function read_json(filename) 2 | local file = io.open(filename, "r") 3 | if file == nil then 4 | return nil 5 | end 6 | local str = file:read("a") 7 | file:close() 8 | return quarto.json.decode(str) 9 | end 10 | 11 | local inventory = {} 12 | 13 | function lookup(search_object) 14 | 15 | local results = {} 16 | for ii, inventory in ipairs(inventory) do 17 | for jj, item in ipairs(inventory.items) do 18 | -- e.g. :external+:::`` 19 | if item.inv_name and item.inv_name ~= search_object.inv_name then 20 | goto continue 21 | end 22 | 23 | if item.name ~= search_object.name then 24 | goto continue 25 | end 26 | 27 | if search_object.role and item.role ~= search_object.role then 28 | goto continue 29 | end 30 | 31 | if search_object.domain and item.domain ~= search_object.domain then 32 | goto continue 33 | else 34 | table.insert(results, item) 35 | 36 | goto continue 37 | end 38 | 39 | ::continue:: 40 | end 41 | end 42 | 43 | if #results == 1 then 44 | return results[1] 45 | end 46 | if #results > 1 then 47 | print("Found multiple matches for " .. search_object.name) 48 | quarto.utils.dump(results) 49 | return nil 50 | end 51 | if #results == 0 then 52 | print("Found no matches for object:") 53 | quarto.utils.dump(search_object) 54 | end 55 | 56 | return nil 57 | end 58 | 59 | function mysplit (inputstr, sep) 60 | if sep == nil then 61 | sep = "%s" 62 | end 63 | local t={} 64 | for str in string.gmatch(inputstr, "([^"..sep.."]+)") do 65 | table.insert(t, str) 66 | end 67 | return t 68 | end 69 | 70 | local function normalize_role(role) 71 | if role == "func" then 72 | return "function" 73 | end 74 | return role 75 | end 76 | 77 | local function build_search_object(str) 78 | local starts_with_colon = str:sub(1, 1) == ":" 79 | local search = {} 80 | if starts_with_colon then 81 | local t = mysplit(str, ":") 82 | if #t == 2 then 83 | -- e.g. :py:func:`my_func` 84 | search.role = normalize_role(t[1]) 85 | search.name = t[2]:match("%%60(.*)%%60") 86 | elseif #t == 3 then 87 | -- e.g. :py:func:`my_func` 88 | search.domain = t[1] 89 | search.role = normalize_role(t[2]) 90 | search.name = t[3]:match("%%60(.*)%%60") 91 | elseif #t == 4 then 92 | -- e.g. :ext+inv:py:func:`my_func` 93 | search.external = true 94 | 95 | search.inv_name = t[1]:match("external%+(.*)") 96 | search.domain = t[2] 97 | search.role = normalize_role(t[3]) 98 | search.name = t[4]:match("%%60(.*)%%60") 99 | else 100 | print("couldn't parse this link: " .. str) 101 | return {} 102 | end 103 | else 104 | search.name = str:match("%%60(.*)%%60") 105 | end 106 | 107 | if search.name == nil then 108 | print("couldn't parse this link: " .. str) 109 | return {} 110 | end 111 | 112 | if search.name:sub(1, 1) == "~" then 113 | search.shortened = true 114 | search.name = search.name:sub(2, -1) 115 | end 116 | return search 117 | end 118 | 119 | function report_broken_link(link, search_object, replacement) 120 | -- TODO: how to unescape html elements like [? 121 | return pandoc.Code(pandoc.utils.stringify(link.content)) 122 | end 123 | 124 | function Link(link) 125 | -- do not process regular links ---- 126 | if not link.target:match("%%60") then 127 | return link 128 | end 129 | 130 | -- lookup item ---- 131 | local search = build_search_object(link.target) 132 | local item = lookup(search) 133 | 134 | -- determine replacement, used if no link text specified ---- 135 | local original_text = pandoc.utils.stringify(link.content) 136 | local replacement = search.name 137 | if search.shortened then 138 | local t = mysplit(search.name, ".") 139 | replacement = t[#t] 140 | end 141 | 142 | -- set link text ---- 143 | if original_text == "" and replacement ~= nil then 144 | link.content = pandoc.Code(replacement) 145 | end 146 | 147 | -- report broken links ---- 148 | if item == nil then 149 | return report_broken_link(link, search) 150 | end 151 | link.target = item.uri:gsub("%$$", search.name) 152 | 153 | 154 | return link 155 | end 156 | 157 | function fixup_json(json, prefix) 158 | for _, item in ipairs(json.items) do 159 | item.uri = prefix .. item.uri 160 | end 161 | table.insert(inventory, json) 162 | end 163 | 164 | return { 165 | { 166 | Meta = function(meta) 167 | local json 168 | local prefix 169 | for k, v in pairs(meta.interlinks.sources) do 170 | json = read_json(quarto.project.offset .. "/_inv/" .. k .. "_objects.json") 171 | prefix = pandoc.utils.stringify(v.url) 172 | fixup_json(json, prefix) 173 | end 174 | json = read_json(quarto.project.offset .. "/objects.json") 175 | if json ~= nil then 176 | fixup_json(json, "/") 177 | end 178 | end 179 | }, 180 | { 181 | Link = Link 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /docs/_quarto.yml: -------------------------------------------------------------------------------- 1 | project: 2 | type: website 3 | output-dir: _site 4 | 5 | website: 6 | title: "pins for Python" 7 | description: "Pin, Discover, and Share Resources" 8 | page-navigation: true 9 | favicon: "favicon.ico" 10 | page-footer: 11 | center: | 12 | Proudly supported by 13 | [![](https://www.rstudio.com/assets/img/posit-logo-fullcolor-TM.svg){fig-alt="Posit PBC" width=65px}](https://posit.co/) 14 | navbar: 15 | background: light 16 | pinned: true 17 | logo: logo.png 18 | left: 19 | - text: "Get started" 20 | file: get_started.qmd 21 | - text: "Reference" 22 | file: reference/index.qmd 23 | - text: Learn more 24 | menu: 25 | - text: "Create consistent metadata for pins" 26 | file: customize-pins-metadata.qmd 27 | - text: "pins for R" 28 | href: https://pins.rstudio.com 29 | target: _blank 30 | right: 31 | - icon: github 32 | href: https://github.com/rstudio/pins-python 33 | aria-label: Pins python GitHub 34 | sidebar: 35 | style: "floating" 36 | collapse-level: 1 37 | contents: 38 | - section: Boards 39 | contents: 40 | - text: "`board_folder`" 41 | href: reference/board_folder.qmd 42 | - text: "`board_local`" 43 | href: reference/board_local.qmd 44 | - text: "`board_temp`" 45 | href: reference/board_temp.qmd 46 | - text: "`board_s3`" 47 | href: reference/board_s3.qmd 48 | - text: "`board_gcs`" 49 | href: reference/board_gcs.qmd 50 | - text: "`board_azure`" 51 | href: reference/board_azure.qmd 52 | - text: "`board_databricks`" 53 | href: reference/board_databricks.qmd 54 | - text: "`board_connect`" 55 | href: reference/board_connect.qmd 56 | - text: "`board_url`" 57 | href: reference/board_url.qmd 58 | - text: "`board`" 59 | href: reference/board.qmd 60 | - section: Pins 61 | contents: 62 | - text: "`pin_read`" 63 | href: reference/pin_read.qmd 64 | - text: "`pin_write`" 65 | href: reference/pin_write.qmd 66 | - text: "`pin_meta`" 67 | href: reference/pin_meta.qmd 68 | - text: "`pin_download`" 69 | href: reference/pin_download.qmd 70 | - text: "`pin_upload`" 71 | href: reference/pin_upload.qmd 72 | - text: "`pin_versions`" 73 | href: reference/pin_versions.qmd 74 | - text: "`pin_list`" 75 | href: reference/pin_list.qmd 76 | - text: "`pin_exists`" 77 | href: reference/pin_exists.qmd 78 | - text: "`pin_version_delete`" 79 | href: reference/pin_version_delete.qmd 80 | - text: "`pin_versions_prune`" 81 | href: reference/pin_versions_prune.qmd 82 | - text: "`pin_delete`" 83 | href: reference/pin_delete.qmd 84 | - text: "`pin_search`" 85 | href: reference/pin_search.qmd 86 | 87 | format: 88 | html: 89 | sidebar: false 90 | 91 | quartodoc: 92 | title: Reference 93 | package: pins 94 | sections: 95 | - title: Board constructors 96 | desc: "Functions to create a board object" 97 | contents: 98 | - board_folder 99 | - board_local 100 | - board_temp 101 | - board_s3 102 | - board_gcs 103 | - board_azure 104 | - board_databricks 105 | - board_connect 106 | - board_url 107 | - board 108 | - title: Pin methods 109 | desc: "Methods to handle pins on your board" 110 | package: pins.boards.BaseBoard 111 | contents: 112 | - pin_read 113 | - pin_write 114 | - pin_meta 115 | - pin_download 116 | - pin_upload 117 | - pin_versions 118 | - pin_list 119 | - pin_exists 120 | - pin_version_delete 121 | - pin_versions_prune 122 | - pin_delete 123 | - pin_search 124 | 125 | filters: 126 | - interlinks 127 | 128 | interlinks: 129 | sources: {} 130 | -------------------------------------------------------------------------------- /docs/customize-pins-metadata.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Create consistent metadata for pins 3 | jupyter: python3 4 | --- 5 | 6 | The `metadata` argument in pins is flexible and can hold any kind of metadata that you can formulate as a `dict` (convertible to JSON). 7 | In some situations, you may want to read and write with _consistent_ customized metadata; 8 | you can create functions to wrap [](`~pins.boards.BaseBoard.pin_write`) and [](`~pins.boards.BaseBoard.pin_read`) for your particular use case. 9 | 10 | We'll begin by creating a temporary board for demonstration: 11 | 12 | ```{python setup} 13 | import pins 14 | import pandas as pd 15 | 16 | from pprint import pprint 17 | 18 | board = pins.board_temp() 19 | ``` 20 | 21 | 22 | # A function to store pandas Categoricals 23 | 24 | Say you want to store a pandas Categorical object as JSON together with the _categories_ of the categorical in the metadata. 25 | 26 | For example, here is a simple categorical and its categories: 27 | 28 | ```{python} 29 | some_cat = pd.Categorical(["a", "a", "b"]) 30 | 31 | some_cat.categories 32 | ``` 33 | 34 | Notice that the categories attribute is just the unique values in the categorical. 35 | 36 | We can write a function wrapping [](`~pins.boards.BaseBoard.pin_write`) that holds the categories in metadata, so we can easily re-create the categorical with them. 37 | 38 | ```{python} 39 | def pin_write_cat_json( 40 | board, 41 | x: pd.Categorical, 42 | name, 43 | **kwargs 44 | ): 45 | metadata = {"categories": x.categories.to_list()} 46 | json_data = x.to_list() 47 | board.pin_write(json_data, name = name, type = "json", metadata = metadata, **kwargs) 48 | ``` 49 | 50 | We can use this new function to write a pin as JSON with our specific metadata: 51 | 52 | ```{python} 53 | some_cat = pd.Categorical(["a", "a", "b", "c"]) 54 | pin_write_cat_json(board, some_cat, name = "some-cat") 55 | ``` 56 | 57 | ## A function to read categoricals 58 | 59 | It's possible to read this pin using the regular [](`~pins.boards.BaseBoard.pin_read`) function, but the object we get is no longer a categorical! 60 | 61 | ```{python} 62 | board.pin_read("some-cat") 63 | ``` 64 | 65 | However, notice that if we use [](`~pins.boards.BaseBoard.pin_meta`), the information we stored on categories is in the `.user` field. 66 | 67 | ```{python} 68 | pprint( 69 | board.pin_meta("some-cat") 70 | ) 71 | ``` 72 | 73 | This enables us to write a special function for reading, to reconstruct the categorical, using the categories stashed in metadata: 74 | 75 | ```{python} 76 | def pin_read_cat_json(board, name, version=None, hash=None, **kwargs): 77 | data = board.pin_read(name = name, version = version, hash = hash, **kwargs) 78 | meta = board.pin_meta(name = name, version = version, **kwargs) 79 | return pd.Categorical(data, categories=meta.user["categories"]) 80 | 81 | pin_read_cat_json(board, "some-cat") 82 | ``` 83 | 84 | For an example of how this approach is used in a real project, look at look at how the vetiver package wraps these functions to [write](https://github.com/rstudio/vetiver-python/blob/main/vetiver/pin_read_write.py) and [read](https://github.com/rstudio/vetiver-python/blob/main/vetiver/vetiver_model.py) model binaries as pins. 85 | -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/docs/favicon.ico -------------------------------------------------------------------------------- /docs/get_started.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Get started with pins 3 | jupyter: python3 4 | --- 5 | 6 | ```{python} 7 | # | include: false 8 | import time 9 | import pandas as pd 10 | pd.options.display.max_rows = 25 11 | ``` 12 | 13 | The pins package helps you publish data sets, models, and other Python objects, making it easy to share them across projects and with your colleagues. 14 | You can pin objects to a variety of "boards", including local folders (to share on a networked drive or with DropBox), Posit Connect, Amazon S3, 15 | Google Cloud Storage, Azure, and more. 16 | This vignette will introduce you to the basics of pins. 17 | 18 | ```{python} 19 | from pins import board, board_local, board_folder, board_temp, board_url 20 | ``` 21 | 22 | ## Getting started 23 | 24 | Every pin lives in a pin *board*, so you must start by creating a pin board. 25 | In this vignette I'll use a temporary board which is automatically deleted when your Python session is over: 26 | 27 | ```{python} 28 | board = board_temp() 29 | ``` 30 | 31 | In real life, you'd pick a board depending on how you want to share the data. 32 | Here are a few options: 33 | 34 | 35 | ```python 36 | board = board_local() # share data across R sessions on the same computer 37 | board = board_folder("~/Dropbox") # share data with others using dropbox 38 | board = board_folder("Z:\\my-team\pins") # share data using a shared network drive 39 | board = board("file", "Z:\\my-team\pins") # share data using a shared network drive with caching 40 | board = board_connect() # share data with Posit Connect 41 | ``` 42 | 43 | 44 | ## Reading and writing data 45 | 46 | Once you have a pin board, you can write data to it with the [](`~pins.boards.BaseBoard.pin_write`) method: 47 | 48 | ```{python} 49 | from pins.data import mtcars 50 | 51 | meta = board.pin_write(mtcars, "mtcars", type="csv") 52 | ``` 53 | 54 | The first argument is the object to save (usually a data frame, but it can be any Python object), and the second argument gives the "name" of the pin. 55 | The name is basically equivalent to a file name; you'll use it when you later want to read the data from the pin. 56 | The only rule for a pin name is that it can't contain slashes. 57 | 58 | After you've pinned an object, you can read it back with [](`~pins.boards.BaseBoard.pin_read`): 59 | 60 | ```{python} 61 | board.pin_read("mtcars") 62 | ``` 63 | 64 | You don't need to supply the file type when reading data from a pin because pins automatically stores the file type in the [metadata](#metadata). 65 | 66 | ::: {.callout-note} 67 | If you are using the Posit Connect board [](`~pins.board_connect`), then you must specify your pin name as 68 | `"user_name/content_name"`. For example, `"hadley/sales-report"`. 69 | ::: 70 | 71 | ## How and what to store as a pin 72 | 73 | Above, we saved the data as a CSV, but you can choose another option depending on your goals: 74 | 75 | - `type = "csv"` uses `to_csv()` from pandas to create a CSV file. CSVs are plain text and can be read easily by many applications, but they only support simple columns (e.g. numbers, strings), can take up a lot of disk space, and can be slow to read. 76 | - `type = "parquet"` uses `to_parquet()` from pandas to create a Parquet file. [Parquet](https://parquet.apache.org/) is a modern, language-independent, column-oriented file format for efficient data storage and retrieval. Parquet is an excellent choice for storing tabular data. 77 | - `type = "arrow"` uses `to_feather()` from pandas to create an Arrow/Feather file. 78 | - `type = "joblib"` uses `joblib.dump()` to create a binary Python data file, such as for storing a trained model. See the [joblib docs](https://joblib.readthedocs.io/en/latest/) for more information. 79 | - `type = "json"` uses `json.dump()` to create a JSON file. Pretty much every programming language can read JSON files, but they only work well for nested lists. 80 | 81 | Note that when the data lives elsewhere, pins takes care of downloading and caching so that it's only re-downloaded when needed. 82 | That said, most boards transmit pins over HTTP, and this is going to be slow and possibly unreliable for very large pins. 83 | As a general rule of thumb, we don't recommend using pins with files over 500 MB. 84 | If you find yourself routinely pinning data larger that this, you might need to reconsider your data engineering pipeline. 85 | 86 | Storing your data/object as a pin works well when you write from a single source or process. It is _not_ appropriate when multiple sources or processes need to write to the same pin; since the pins package reads and writes files, it cannot manage concurrent writes. It is also not appropriate for high frequency writes (multiple times per second). 87 | 88 | - **Good** use for pins: an ETL pipeline that stores a model or summarized dataset once a day 89 | - **Bad** use for pins: a Shiny app that collects data from users, who may be using the app at the same time 90 | 91 | ## Metadata 92 | 93 | 94 | Every pin is accompanied by some metadata that you can access with [](`~pins.boards.BaseBoard.pin_meta`): 95 | 96 | ```{python} 97 | board.pin_meta("mtcars") 98 | ``` 99 | 100 | This shows you the metadata that’s generated by default. This includes: 101 | 102 | * `title`, a brief textual description of the dataset. 103 | * an optional `description`, where you can provide more details. 104 | * the date-time when the pin was `created`. 105 | * the `file_size`, in bytes, of the underlying files. 106 | * a unique `pin_hash` that you can supply to [](`~pins.boards.BaseBoard.pin_read`) to ensure that you’re reading exactly the data that you expect. 107 | 108 | When creating the pin, you can override the default description or provide additional metadata that is stored with the data: 109 | 110 | ```{python} 111 | board.pin_write( 112 | mtcars, 113 | name="mtcars2", 114 | type="csv", 115 | description = "Data extracted from the 1974 Motor Trend US magazine, and comprises fuel consumption and 10 aspects of automobile design and performance for 32 automobiles (1973–74 models).", 116 | metadata = { 117 | "source": "Henderson and Velleman (1981), Building multiple regression models interactively. Biometrics, 37, 391–411." 118 | } 119 | ) 120 | ``` 121 | 122 | ```{python} 123 | board.pin_meta("mtcars") 124 | ``` 125 | 126 | While we’ll do our best to keep the automatically generated metadata consistent over time, I’d recommend manually capturing anything you really care about in metadata. 127 | 128 | 129 | ## Versioning 130 | 131 | By default, calls to [](`~pins.boards.BaseBoard.pin_write`) will usually create a new version: 132 | 133 | ```{python} 134 | board2 = board_temp() 135 | board2.pin_write([1,2,3,4,5], name = "x", type = "json") 136 | board2.pin_write([1,2,3], name = "x", type = "json") 137 | board2.pin_write([1,2], name = "x", type = "json") 138 | board2.pin_versions("x") 139 | ``` 140 | 141 | The only exception is if the data is identical with the most recent version (compared via file hash): 142 | 143 | ```{python} 144 | board2.pin_write([1], name = "x", type = "json") 145 | time.sleep(1.1) # later, let's try and write a new version of the same data... 146 | board2.pin_write([1], name = "x", type = "json") 147 | board2.pin_versions("x") 148 | ``` 149 | 150 | 151 | However you can opt-out of this behaviour with `force_identical_write=True`: 152 | ```{python} 153 | time.sleep(1.1) # try again... 154 | board2.pin_write([1], name = "x", type = "json", force_identical_write=True) 155 | board2.pin_versions("x") 156 | ``` 157 | 158 | By default, [](`~pins.boards.BaseBoard.pin_read`) will return the most recent version: 159 | 160 | ```{python} 161 | board2.pin_read("x") 162 | ``` 163 | 164 | But you can request an older version by supplying the `version` argument: 165 | 166 | ```{python} 167 | version = board2.pin_versions("x").version[1] 168 | board2.pin_read("x", version = version) 169 | ``` 170 | 171 | ## Storing models 172 | 173 | ::: {.callout-warning} 174 | The examples in this section use joblib to read and write data. Joblib uses the pickle format, and **pickle files are not secure**. Only read pickle files you trust. In order to read pickle files, set the `allow_pickle_read=True` argument. [Learn more about pickling](https://docs.python.org/3/library/pickle.html). 175 | ::: 176 | 177 | You can write a pin with `type="joblib"` to store arbitrary python objects, including fitted models from packages like [scikit-learn](https://scikit-learn.org/). 178 | 179 | For example, suppose you wanted to store a custom `namedtuple` object. 180 | 181 | ```{python} 182 | from collections import namedtuple 183 | 184 | board3 = board_temp(allow_pickle_read=True) 185 | 186 | Coords = namedtuple("Coords", ["x", "y"]) 187 | coords = Coords(1, 2) 188 | 189 | coords 190 | ``` 191 | 192 | Using `type="joblib"` lets you store and read back the custom `coords` object. 193 | 194 | ```{python} 195 | board3.pin_write(coords, "my_coords", type="joblib") 196 | 197 | board3.pin_read("my_coords") 198 | ``` 199 | 200 | 201 | ## Caching 202 | 203 | The primary purpose of pins is to make it easy to share data. 204 | But pins is also designed to help you spend as little time as possible downloading data. 205 | [](`~pins.boards.BaseBoard.pin_read`) and [](`~pins.boards.BaseBoard.pin_download`) automatically cache remote pins: they maintain a local copy of the data (so it's fast) but always check that it's up-to-date (so your analysis doesn't use stale data). 206 | 207 | Wouldn't it be nice if you could take advantage of this feature for any dataset on the internet? 208 | That's the idea behind [](`~pins.board_url`); you can assemble your own board from datasets, wherever they live on the internet. 209 | For example, this code creates a board containing a single pin, `penguins`, that refers to some fun data I found on GitHub: 210 | 211 | ```{python} 212 | my_data = board_url("", { 213 | "penguins": "https://raw.githubusercontent.com/allisonhorst/palmerpenguins/master/inst/extdata/penguins_raw.csv" 214 | }) 215 | ``` 216 | 217 | You can read this data by combining [](`~pins.boards.BaseBoard.pin_download`) with `read_csv` from pandas: 218 | 219 | ```{python} 220 | fname = my_data.pin_download("penguins") 221 | 222 | fname 223 | 224 | ``` 225 | 226 | ```{python} 227 | import pandas as pd 228 | 229 | pd.read_csv(fname[0]).head() 230 | ``` 231 | 232 | ```{python} 233 | my_data.pin_download("penguins") 234 | ``` 235 | -------------------------------------------------------------------------------- /docs/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | jupyter: python3 3 | --- 4 | 5 | # pins 6 | 7 | The pins package publishes data, models, and other Python objects, making it 8 | easy to share them across projects and with your colleagues. You can pin 9 | objects to a variety of pin *boards*, including folders (to share on a 10 | networked drive or with services like DropBox), Posit Connect, Amazon 11 | S3, and Google Cloud Storage. 12 | Pins can be automatically versioned, making it straightforward to track changes, 13 | re-run analyses on historical data, and undo mistakes. 14 | 15 | You can use pins from R as well as Python. For example, you can use one language 16 | to read a pin created with the other. Learn more about 17 | [pins for R](https://pins.rstudio.com). 18 | 19 | ## Installation 20 | 21 | You can install the released version of pins from [PyPI](https://pypi.org/project/pins/): 22 | 23 | ```bash 24 | python -m pip install pins 25 | ``` 26 | 27 | And the development version from [GitHub](https://github.com/rstudio/pins-python) with: 28 | 29 | ```bash 30 | python -m pip install git+https://github.com/rstudio/pins-python 31 | ``` 32 | 33 | ## Usage 34 | 35 | To use the pins package, you must first create a pin board. A good place 36 | to start is [](`~pins.board_folder`), which stores pins in a directory you 37 | specify. Here we'll use a special version of [](`~pins.board_folder`) called 38 | [](`~pins.board_temp`) which creates a temporary board that’s automatically 39 | deleted when your Python script or notebook session ends. This is great for examples, but 40 | obviously you shouldn't use it for real work! 41 | 42 | ```{python} 43 | import pins 44 | from pins.data import mtcars 45 | 46 | board = pins.board_temp() 47 | ``` 48 | 49 | You can "pin" (save) data to a board with the [](`~pins.boards.BaseBoard.pin_write`) method. It requires three 50 | arguments: an object, a name, and a pin type: 51 | 52 | ```{python} 53 | board.pin_write(mtcars.head(), "mtcars", type="csv") 54 | ``` 55 | 56 | Above, we saved the data as a CSV, but depending on 57 | what you’re saving and who else you want to read it, you might use the 58 | `type` argument to instead save it as a `joblib`, `parquet`, or `json` file. 59 | 60 | You can later retrieve the pinned data with [](`~pins.boards.BaseBoard.pin_read`): 61 | 62 | ```{python} 63 | board.pin_read("mtcars") 64 | ``` 65 | 66 | A board on your computer is good place to start, but the real power of 67 | pins comes when you use a board that’s shared with multiple people. To 68 | get started, you can use [](`~pins.board_folder`) with a directory on a shared 69 | drive or in DropBox, or if you use [Posit 70 | Connect](https://posit.co/products/enterprise/connect/) you can use 71 | [](`~pins.board_connect`): 72 | 73 | ```python 74 | # Note that this uses one approach to connecting, 75 | # the environment variables CONNECT_SERVER and CONNECT_API_KEY 76 | 77 | board = pins.board_connect() 78 | board.pin_write(tidy_sales_data, "hadley/sales-summary", type="csv") 79 | ``` 80 | 81 | Then, someone else (or an automated report) can read and use your 82 | pin: 83 | 84 | ```python 85 | board = board_connect() 86 | board.pin_read("hadley/sales-summary") 87 | ``` 88 | 89 | You can easily control who gets to access the data using the Posit 90 | Connect permissions pane. 91 | 92 | The pins package also includes boards that allow you to share data on 93 | services like Amazon’s S3 ([](`~pins.board_s3`)), Google Cloud Storage ([](`~pins.board_gcs`)), 94 | and Azure blob storage ([](`~pins.board_azure`)). 95 | 96 | ## Contributing 97 | 98 | - This project is released with a [Contributor Code of Conduct](https://www.contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. 99 | 100 | - If you think you have encountered a bug, please [submit an issue](https://github.com/rstudio/pins-python/issues). 101 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/docs/logo.png -------------------------------------------------------------------------------- /pins/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | # Set version ---- 4 | from importlib_metadata import version as _v 5 | 6 | __version__ = _v("pins") 7 | 8 | del _v 9 | 10 | 11 | # Imports ---- 12 | from .cache import cache_prune, cache_info 13 | from .constructors import ( 14 | board_folder, 15 | board_temp, 16 | board_local, 17 | board_github, 18 | board_urls, # DEPRECATED 19 | board_url, 20 | board_connect, 21 | board_rsconnect, 22 | board_azure, 23 | board_s3, 24 | board_gcs, 25 | board_databricks, 26 | board, 27 | ) 28 | from .boards import board_deparse 29 | -------------------------------------------------------------------------------- /pins/_adaptors.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | from abc import abstractmethod 5 | from typing import TYPE_CHECKING, Any, ClassVar, overload 6 | 7 | from databackend import AbstractBackend 8 | from typing_extensions import TypeAlias 9 | 10 | if TYPE_CHECKING: 11 | import pandas as pd 12 | 13 | PandasDataFrame: TypeAlias = pd.DataFrame 14 | DataFrame: TypeAlias = PandasDataFrame 15 | 16 | 17 | class AbstractPandasFrame(AbstractBackend): 18 | _backends = [("pandas", "DataFrame")] 19 | 20 | 21 | AbstractDF: TypeAlias = AbstractPandasFrame 22 | 23 | 24 | class Adaptor: 25 | def __init__(self, data: Any) -> None: 26 | self._d = data 27 | 28 | def write_json(self, file: str) -> None: 29 | with open(file, "w") as f: 30 | f.write(self.to_json()) 31 | 32 | def to_json(self) -> str: 33 | import json 34 | 35 | return json.dumps(self._d) 36 | 37 | def write_joblib(self, file: str) -> None: 38 | import joblib 39 | 40 | joblib.dump(self._d, file) 41 | 42 | def write_csv(self, file: str) -> None: 43 | msg = f"Writing to CSV is not supported for {type(self._d)}" 44 | raise NotImplementedError(msg) 45 | 46 | def write_parquet(self, file: str) -> None: 47 | msg = f"Writing to Parquet is not supported for {type(self._d)}" 48 | raise NotImplementedError(msg) 49 | 50 | def write_feather(self, file: str) -> None: 51 | msg = f"Writing to Feather is not supported for {type(self._d)}" 52 | raise NotImplementedError(msg) 53 | 54 | @property 55 | def data_preview(self) -> str: 56 | # note that the R library uses jsonlite::toJSON 57 | import json 58 | 59 | # TODO(compat): set display none in index.html 60 | return json.dumps({}) 61 | 62 | def default_title(self, name: str) -> str: 63 | # TODO(compat): title says CSV rather than data.frame 64 | # see https://github.com/machow/pins-python/issues/5 65 | return f"{name}: a pinned {self._obj_name}" 66 | 67 | @property 68 | def _obj_name(self) -> str: 69 | return f"{type(self._d).__qualname__} object" 70 | 71 | 72 | class DFAdaptor(Adaptor): 73 | _d: ClassVar[DataFrame] 74 | 75 | def __init__(self, data: DataFrame) -> None: 76 | super().__init__(data) 77 | 78 | @property 79 | def df_type(self) -> str: 80 | # Consider over-riding this for specialized dataframes 81 | return "DataFrame" 82 | 83 | @property 84 | @abstractmethod 85 | def columns(self) -> list[Any]: ... 86 | 87 | @property 88 | @abstractmethod 89 | def shape(self) -> tuple[int, int]: ... 90 | 91 | @abstractmethod 92 | def head(self, n: int) -> DFAdaptor: ... 93 | 94 | @property 95 | def data_preview(self) -> str: 96 | # TODO(compat) is 100 hard-coded? 97 | # Note that we go df -> json -> dict, to take advantage of type conversions in the dataframe library 98 | data: list[dict[Any, Any]] = json.loads(self.head(100).to_json()) 99 | columns = [ 100 | {"name": [col], "label": [col], "align": ["left"], "type": [""]} 101 | for col in self.columns 102 | ] 103 | 104 | # this reproduces R pins behavior, by omitting entries that would be null 105 | data_no_nulls = [{k: v for k, v in row.items() if v is not None} for row in data] 106 | 107 | return json.dumps({"data": data_no_nulls, "columns": columns}) 108 | 109 | @property 110 | def _obj_name(self) -> str: 111 | row, col = self.shape 112 | return f"{row} x {col} {self.df_type}" 113 | 114 | 115 | class PandasAdaptor(DFAdaptor): 116 | _d: ClassVar[PandasDataFrame] 117 | 118 | def __init__(self, data: AbstractPandasFrame) -> None: 119 | super().__init__(data) 120 | 121 | @property 122 | def columns(self) -> list[Any]: 123 | return self._d.columns.tolist() 124 | 125 | @property 126 | def shape(self) -> tuple[int, int]: 127 | return self._d.shape 128 | 129 | def head(self, n: int) -> PandasAdaptor: 130 | return PandasAdaptor(self._d.head(n)) 131 | 132 | def to_json(self) -> str: 133 | return self._d.to_json(orient="records") 134 | 135 | def write_csv(self, file: str) -> None: 136 | self._d.to_csv(file, index=False) 137 | 138 | def write_parquet(self, file: str) -> None: 139 | self._d.to_parquet(file) 140 | 141 | def write_feather(self, file: str) -> None: 142 | self._d.to_feather(file) 143 | 144 | 145 | @overload 146 | def create_adaptor(obj: DataFrame) -> DFAdaptor: ... 147 | @overload 148 | def create_adaptor(obj: Any) -> Adaptor: ... 149 | def create_adaptor(obj: Any | DataFrame) -> Adaptor | DFAdaptor: 150 | if isinstance(obj, AbstractPandasFrame): 151 | return PandasAdaptor(obj) 152 | elif isinstance(obj, Adaptor): 153 | return obj 154 | else: 155 | return Adaptor(obj) 156 | -------------------------------------------------------------------------------- /pins/_types.py: -------------------------------------------------------------------------------- 1 | from io import IOBase 2 | from typing import Union 3 | 4 | StrOrFile = Union[str, IOBase] 5 | -------------------------------------------------------------------------------- /pins/cache.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | import os 5 | import shutil 6 | import time 7 | import urllib.parse 8 | from collections.abc import Iterator 9 | from pathlib import Path 10 | 11 | import humanize 12 | from fsspec import register_implementation 13 | from fsspec.implementations.cached import SimpleCacheFileSystem 14 | 15 | from .config import get_cache_dir 16 | from .utils import hash_name, inform 17 | 18 | _log = logging.getLogger(__name__) 19 | 20 | 21 | # used if needed to preserve board path structure in the cache 22 | PLACEHOLDER_VERSION = "v" 23 | PLACEHOLDER_FILE = "file" 24 | 25 | 26 | def touch_access_time(path, access_time: float | None = None, strict=True): 27 | """Update access time of file. 28 | 29 | Returns the new access time. 30 | """ 31 | 32 | if access_time is None: 33 | access_time = time.time() 34 | 35 | p = Path(path) 36 | 37 | if not p.exists() and not strict: 38 | p.touch() 39 | 40 | stat = p.stat() 41 | os.utime(path, (access_time, stat.st_mtime)) 42 | 43 | return access_time 44 | 45 | 46 | def protocol_to_string(protocol): 47 | if isinstance(protocol, str): 48 | return protocol 49 | 50 | return protocol[0] 51 | 52 | 53 | def prefix_cache(fs, board_base_path): 54 | if isinstance(fs, str): 55 | proto_name = fs 56 | else: 57 | proto_name = protocol_to_string(fs.protocol) 58 | base_hash = hash_name(board_base_path, False) 59 | 60 | return f"{proto_name}_{base_hash}" 61 | 62 | 63 | class HashMapper: 64 | def __init__(self, hash_prefix): 65 | self.hash_prefix = hash_prefix 66 | 67 | def __call__(self, path: str) -> str: 68 | if self.hash_prefix is not None: 69 | # optionally make the name relative to a parent path 70 | # using the hash of parent path as a prefix, to flatten a bit 71 | _hash = Path(path).relative_to(Path(self.hash_prefix)) 72 | return _hash 73 | 74 | else: 75 | raise NotImplementedError() 76 | 77 | 78 | class PinsAccessTimeCacheMapper: 79 | def __init__(self, hash_prefix): 80 | self.hash_prefix = hash_prefix 81 | 82 | def __call__(self, path): 83 | # hash full path, and put anything after the final / at the end, just 84 | # to make it easier to browse. 85 | # this has 86 | base_name = hash_name(path, False) 87 | suffix = Path(path).name 88 | return f"{base_name}_{suffix}" 89 | 90 | 91 | class PinsRscCacheMapper: 92 | """Modifies the PinsCache to allow hash_prefix to be an RSC server url. 93 | 94 | Note that this class also modifies the first / in a path to be a -, so that 95 | pin contents will not be put into subdirectories, for e.g. michael/mtcars/data.txt. 96 | """ 97 | 98 | def __init__(self, hash_prefix): 99 | self.hash_prefix = hash_prefix 100 | 101 | def __call__(self, path): 102 | # the main change in this function is that, for same_name, it returns 103 | # the full path 104 | # change pin path of form / to + 105 | _hash = path.replace("/", "+", 1) 106 | return _hash 107 | 108 | 109 | class PinsCache(SimpleCacheFileSystem): 110 | protocol = "pinscache" 111 | 112 | def __init__(self, *args, hash_prefix=None, mapper=HashMapper, **kwargs): 113 | super().__init__(*args, **kwargs) 114 | self.hash_prefix = hash_prefix 115 | self._mapper = mapper(hash_prefix) 116 | 117 | def hash_name(self, path, *args, **kwargs): 118 | return self._mapper(path) 119 | 120 | def _open(self, path, *args, **kwargs): 121 | # For some reason, the open method of SimpleCacheFileSystem doesn't 122 | # call _make_local_details, so we need to patch in here. 123 | # Note that methods like .cat() do call it. Other Caches don't have this issue. 124 | path = self._strip_protocol(path) 125 | self._make_local_details(path) 126 | 127 | return super()._open(path, *args, **kwargs) 128 | 129 | def _make_local_details(self, path): 130 | # modifies method to create any parent directories needed by the cached file 131 | # note that this is called in ._open(), at the point it's known the file 132 | # will be cached 133 | fn = super()._make_local_details(path) 134 | _log.info(f"cache file: {fn}") 135 | Path(fn).parent.mkdir(parents=True, exist_ok=True) 136 | 137 | return fn 138 | 139 | # same as upstream, brought in to preserve backwards compatibility 140 | def _check_file(self, path): 141 | self._check_cache() 142 | sha = self._mapper(path) 143 | for storage in self.storage: 144 | fn = os.path.join(storage, sha) 145 | if os.path.exists(fn): 146 | return fn 147 | 148 | 149 | class PinsUrlCache(PinsCache): 150 | protocol = "pinsurlcache" 151 | 152 | def hash_name(self, path, same_name): 153 | # strip final arg from path 154 | # note that R pins uses fs::path_file, and I'm not sure exactly how it 155 | # behaves for the many forms url paths can take. 156 | # e.g. path_file(.../extdata/) -> extdata 157 | # e.g. path_file(.../extdata?123) -> extdata?123 158 | path_parts = urllib.parse.urlparse(path)[2] 159 | 160 | # strip off final whitespace and / (if it exists) 161 | # TODO(compat): python pins currently not keeping query part of url 162 | final_part = path_parts.rstrip().rstrip("/").rsplit("/", 1)[-1] 163 | 164 | # TODO: what happens in R pins if no final part? 165 | if final_part == "": 166 | final_part = PLACEHOLDER_FILE 167 | 168 | # hash url 169 | prefix = hash_name(path, False) 170 | 171 | # note that we include an extra version folder, so it conforms with 172 | # pin board path form: /// 173 | proto_name = protocol_to_string(self.fs.protocol) 174 | full_prefix = "_".join([proto_name, prefix]) 175 | return str(Path(full_prefix) / PLACEHOLDER_VERSION / final_part) 176 | 177 | 178 | class PinsAccessTimeCache(SimpleCacheFileSystem): 179 | name = "pinsaccesstimecache" 180 | 181 | def __init__( 182 | self, *args, hash_prefix=None, mapper=PinsAccessTimeCacheMapper, **kwargs 183 | ): 184 | super().__init__(*args, **kwargs) 185 | self.hash_prefix = hash_prefix 186 | self._mapper = mapper(hash_prefix) 187 | 188 | def hash_name(self, path, *args, **kwargs): 189 | return self._mapper(path) 190 | 191 | def _open(self, path, mode="rb", **kwargs): 192 | f = super()._open(path, mode=mode, **kwargs) 193 | fn = self._check_file(path) 194 | 195 | if fn is None: 196 | raise ValueError(f"Cached file should exist for path, but none found: {path}") 197 | 198 | touch_access_time(fn) 199 | 200 | return f 201 | 202 | # same as upstream, brought in to preserve backwards compatibility 203 | def _check_file(self, path): 204 | self._check_cache() 205 | sha = self._mapper(path) 206 | for storage in self.storage: 207 | fn = os.path.join(storage, sha) 208 | if os.path.exists(fn): 209 | return fn 210 | 211 | 212 | class CachePruner: 213 | """Prunes the cache directory, across multiple boards. 214 | 215 | Note 216 | ---- 217 | 218 | `pins` assumes that all boards cache using these rules: 219 | 220 | * path structure: `///`. 221 | * each version has a data.txt file in it. 222 | """ 223 | 224 | meta_path = "data.txt" 225 | 226 | def __init__(self, cache_dir: str | Path): 227 | self.cache_dir = Path(cache_dir) 228 | 229 | def versions(self) -> Iterator[Path]: 230 | for p_version in self.cache_dir.glob("*/*"): 231 | if p_version.is_dir() and (p_version / self.meta_path).exists(): 232 | yield p_version 233 | 234 | def should_prune_version(self, days, path: str | Path): 235 | path = Path(path) 236 | 237 | expiry_time_sec = days * 60 * 60 * 24 238 | prune_before = time.time() - expiry_time_sec 239 | 240 | p_meta = path / self.meta_path 241 | 242 | if not p_meta.exists(): 243 | raise FileNotFoundError(f"No metadata file: {p_meta.absolute()}") 244 | 245 | access_time = p_meta.stat().st_atime 246 | return access_time < prune_before 247 | 248 | def old_versions(self, days): 249 | return [p for p in self.versions() if self.should_prune_version(days, p)] 250 | 251 | def prune(self, days=30): 252 | to_prune = self.old_versions(days) 253 | size = sum(map(disk_usage, to_prune)) 254 | 255 | # TODO: clean this up, general approach to prompting 256 | confirmed = prompt_cache_prune(to_prune, size) 257 | if confirmed: 258 | for path in to_prune: 259 | delete_version(to_prune) 260 | 261 | _log.info("Skipping cache deletion") 262 | 263 | 264 | def delete_version(path: str | Path): 265 | path = Path(path) 266 | shutil.rmtree(str(path.absolute())) 267 | 268 | 269 | def disk_usage(path): 270 | return sum(p.stat().st_size for p in path.glob("**/*") if p.is_file() or p.is_dir()) 271 | 272 | 273 | def prompt_cache_prune(to_prune, size) -> bool: 274 | _log.info(f"Pruning items: {to_prune}") 275 | human_size = humanize.naturalsize(size, binary=True) 276 | resp = input( 277 | f"Delete {len(to_prune)} pin versions, freeing {human_size}?" 278 | "\n1: Yes" 279 | "\n2: No" 280 | "\n\nSelection: " 281 | ) 282 | return resp == "1" 283 | 284 | 285 | def cache_info(): 286 | cache_root = get_cache_dir() 287 | 288 | cache_boards = list(Path(cache_root).glob("*")) 289 | 290 | print(f"Cache info: {cache_root}") 291 | for p_board in cache_boards: 292 | du = disk_usage(p_board) 293 | human_size = humanize.naturalsize(du, binary=True) 294 | rel_path = p_board.relative_to(cache_root) 295 | print(f"* {rel_path}: {human_size}") 296 | 297 | 298 | def cache_prune(days=30, cache_root=None, prompt=True): 299 | if cache_root is None: 300 | cache_root = get_cache_dir() 301 | 302 | final_delete = [] 303 | for p_board in Path(cache_root).glob("*"): 304 | pruner = CachePruner(p_board) 305 | final_delete.extend(pruner.old_versions(days)) 306 | 307 | size = sum(map(disk_usage, final_delete)) 308 | 309 | if not final_delete: 310 | inform(_log, "No stale pins found") 311 | 312 | if prompt: 313 | confirmed = prompt_cache_prune(final_delete, size) 314 | else: 315 | confirmed = True 316 | 317 | if confirmed: 318 | inform(_log, "Deleting pins from cache.") 319 | for p in final_delete: 320 | delete_version(p) 321 | else: 322 | inform(_log, "Skipping deletion of pins from cache.") 323 | 324 | 325 | # TODO: swap to use entrypoint 326 | register_implementation("pinscache", PinsCache) 327 | -------------------------------------------------------------------------------- /pins/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from types import SimpleNamespace 3 | 4 | import appdirs 5 | 6 | PINS_NAME = "pins-py" 7 | PINS_ENV_DATA_DIR = "PINS_DATA_DIR" 8 | PINS_ENV_CACHE_DIR = "PINS_CACHE_DIR" 9 | PINS_ENV_INSECURE_READ = "PINS_ALLOW_PICKLE_READ" 10 | PINS_ENV_ALLOW_RSC_SHORT_NAME = "PINS_ALLOW_RSC_SHORT_NAME" 11 | PINS_ENV_FEATURE_PREVIEW = "PINS_FEATURE_PREVIEW" 12 | 13 | pins_options = SimpleNamespace(quiet=False) 14 | 15 | 16 | def _interpret_int(env_var_name): 17 | env_var = os.environ.get(env_var_name, "0") 18 | try: 19 | env_int = int(env_var) 20 | except ValueError: 21 | raise ValueError( 22 | f"{env_var_name} must be '0' or '1', but was set to " f"{repr(env_var)}." 23 | ) 24 | 25 | flag = bool(env_int) 26 | return flag 27 | 28 | 29 | def get_data_dir(): 30 | return os.environ.get(PINS_ENV_DATA_DIR, appdirs.user_data_dir(PINS_NAME)) 31 | 32 | 33 | def get_cache_dir(): 34 | return os.environ.get(PINS_ENV_CACHE_DIR, appdirs.user_cache_dir(PINS_NAME)) 35 | 36 | 37 | def get_allow_pickle_read(flag): 38 | if flag is None: 39 | return _interpret_int(PINS_ENV_INSECURE_READ) 40 | 41 | return flag 42 | 43 | 44 | def get_allow_rsc_short_name(): 45 | return _interpret_int(PINS_ENV_ALLOW_RSC_SHORT_NAME) 46 | 47 | 48 | def get_feature_preview(): 49 | return _interpret_int(PINS_ENV_FEATURE_PREVIEW) 50 | -------------------------------------------------------------------------------- /pins/data/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib_resources import files as _files 2 | 3 | sources = { 4 | "mtcars": _files("pins") / "data/mtcars.csv", 5 | } 6 | 7 | 8 | def __dir__(): 9 | return list(sources) 10 | 11 | 12 | def __getattr__(k): 13 | import pandas as pd 14 | 15 | f_path = sources.get("mtcars") 16 | 17 | return pd.read_csv(f_path) 18 | -------------------------------------------------------------------------------- /pins/data/mtcars.csv: -------------------------------------------------------------------------------- 1 | mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb 2 | 21,6,160,110,3.9,2.62,16.46,0,1,4,4 3 | 21,6,160,110,3.9,2.875,17.02,0,1,4,4 4 | 22.8,4,108,93,3.85,2.32,18.61,1,1,4,1 5 | 21.4,6,258,110,3.08,3.215,19.44,1,0,3,1 6 | 18.7,8,360,175,3.15,3.44,17.02,0,0,3,2 7 | 18.1,6,225,105,2.76,3.46,20.22,1,0,3,1 8 | 14.3,8,360,245,3.21,3.57,15.84,0,0,3,4 9 | 24.4,4,146.7,62,3.69,3.19,20,1,0,4,2 10 | 22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2 11 | 19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4 12 | 17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4 13 | 16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3 14 | 17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3 15 | 15.2,8,275.8,180,3.07,3.78,18,0,0,3,3 16 | 10.4,8,472,205,2.93,5.25,17.98,0,0,3,4 17 | 10.4,8,460,215,3,5.424,17.82,0,0,3,4 18 | 14.7,8,440,230,3.23,5.345,17.42,0,0,3,4 19 | 32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1 20 | 30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2 21 | 33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1 22 | 21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1 23 | 15.5,8,318,150,2.76,3.52,16.87,0,0,3,2 24 | 15.2,8,304,150,3.15,3.435,17.3,0,0,3,2 25 | 13.3,8,350,245,3.73,3.84,15.41,0,0,3,4 26 | 19.2,8,400,175,3.08,3.845,17.05,0,0,3,2 27 | 27.3,4,79,66,4.08,1.935,18.9,1,1,4,1 28 | 26,4,120.3,91,4.43,2.14,16.7,0,1,5,2 29 | 30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2 30 | 15.8,8,351,264,4.22,3.17,14.5,0,1,5,4 31 | 19.7,6,145,175,3.62,2.77,15.5,0,1,5,6 32 | 15,8,301,335,3.54,3.57,14.6,0,1,5,8 33 | 21.4,4,121,109,4.11,2.78,18.6,1,1,4,2 34 | -------------------------------------------------------------------------------- /pins/databricks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/databricks/__init__.py -------------------------------------------------------------------------------- /pins/databricks/fs.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from io import BytesIO 3 | from pathlib import Path, PurePath 4 | 5 | from fsspec import AbstractFileSystem 6 | 7 | from pins.errors import PinsError 8 | 9 | 10 | class DatabricksFs(AbstractFileSystem): 11 | protocol = "dbc" 12 | 13 | def ls(self, path, detail=False, **kwargs): 14 | return self._databricks_ls(path, detail) 15 | 16 | def exists(self, path: str, **kwargs): 17 | return self._databricks_exists(path) 18 | 19 | def open(self, path: str, mode: str = "rb", *args, **kwargs): 20 | if mode != "rb": 21 | raise NotImplementedError 22 | return self._databricks_open(path) 23 | 24 | def get(self, rpath, lpath, recursive=False, **kwargs): 25 | self._databricks_get(rpath, lpath, recursive, **kwargs) 26 | 27 | def mkdir(self, path, create_parents=True, **kwargs): 28 | if not create_parents: 29 | raise NotImplementedError 30 | self._databricks_mkdir(path) 31 | 32 | def put( 33 | self, 34 | lpath, 35 | rpath, 36 | recursive=True, 37 | maxdepth=None, 38 | **kwargs, 39 | ): 40 | if not recursive: 41 | raise NotImplementedError 42 | if maxdepth is not None: 43 | raise NotImplementedError 44 | self._databricks_put(lpath, rpath) 45 | 46 | def rm(self, path, recursive=True, maxdepth=None) -> None: 47 | if not recursive: 48 | raise NotImplementedError 49 | if maxdepth is not None: 50 | raise NotImplementedError 51 | if self._databricks_exists(path): 52 | self._databricks_rm_dir(path) 53 | 54 | @staticmethod 55 | def _databricks_put(lpath, rpath): 56 | from databricks.sdk import WorkspaceClient 57 | 58 | w = WorkspaceClient() 59 | path = Path(lpath).absolute() 60 | orig_path = path 61 | 62 | def _upload_files(path): 63 | contents = Path(path) 64 | for item in contents.iterdir(): 65 | abs_path = PurePath(path).joinpath(item) 66 | is_file = Path(abs_path).is_file() 67 | if is_file: 68 | rel_path = abs_path.relative_to(orig_path) 69 | db_path = PurePath(rpath).joinpath(rel_path) 70 | file = open(abs_path, "rb") 71 | w.files.upload(str(db_path), BytesIO(file.read()), overwrite=True) 72 | else: 73 | _upload_files(abs_path) 74 | 75 | _upload_files(path) 76 | 77 | def _databricks_get(self, board, rpath, lpath, recursive=False, **kwargs): 78 | from databricks.sdk import WorkspaceClient 79 | 80 | w = WorkspaceClient() 81 | file_type = self._databricks_is_type(rpath) 82 | if file_type == "file": 83 | board.fs.get(rpath, lpath, **kwargs) 84 | return 85 | 86 | def _get_files(path, recursive, **kwargs): 87 | raw_contents = w.files.list_directory_contents(path) 88 | contents = list(raw_contents) 89 | details = list(map(self._databricks_content_details, contents)) 90 | for item in details: 91 | item_path = item.get("path") 92 | if item.get("is_directory"): 93 | if recursive: 94 | _get_files(item_path, recursive=recursive, **kwargs) 95 | else: 96 | rel_path = PurePath(item_path).relative_to(rpath) 97 | target_path = PurePath(lpath).joinpath(rel_path) 98 | board.fs.get(item_path, str(target_path)) 99 | 100 | _get_files(rpath, recursive, **kwargs) 101 | 102 | def _databricks_open(self, path): 103 | from databricks.sdk import WorkspaceClient 104 | 105 | if not self._databricks_exists(path): 106 | raise PinsError(f"File or directory does not exist at path: {path}") 107 | w = WorkspaceClient() 108 | resp = w.files.download(path) 109 | f = BytesIO() 110 | shutil.copyfileobj(resp.contents, f) 111 | f.seek(0) 112 | return f 113 | 114 | def _databricks_exists(self, path: str): 115 | if self._databricks_is_type(path) == "nothing": 116 | return False 117 | else: 118 | return True 119 | 120 | @staticmethod 121 | def _databricks_is_type(path: str): 122 | from databricks.sdk import WorkspaceClient 123 | from databricks.sdk.errors import NotFound 124 | 125 | w = WorkspaceClient() 126 | try: 127 | w.files.get_metadata(path) 128 | except NotFound: 129 | try: 130 | w.files.get_directory_metadata(path) 131 | except NotFound: 132 | return "nothing" 133 | else: 134 | return "directory" 135 | else: 136 | return "file" 137 | 138 | def _databricks_ls(self, path, detail): 139 | from databricks.sdk import WorkspaceClient 140 | 141 | if not self._databricks_exists(path): 142 | raise PinsError(f"File or directory does not exist at path: {path}") 143 | w = WorkspaceClient() 144 | if self._databricks_is_type(path) == "file": 145 | if detail: 146 | return [dict(name=path, size=None, type="file")] 147 | else: 148 | return path 149 | 150 | contents_raw = w.files.list_directory_contents(path) 151 | contents = list(contents_raw) 152 | items = [] 153 | for item in contents: 154 | item = self._databricks_content_details(item) 155 | item_path = item.get("path") 156 | item_path = item_path.rstrip("/") 157 | if detail: 158 | if item.get("is_directory"): 159 | item_type = "directory" 160 | else: 161 | item_type = "file" 162 | items.append(dict(name=item_path, size=None, type=item_type)) 163 | else: 164 | items.append(item_path) 165 | return items 166 | 167 | def _databricks_rm_dir(self, path): 168 | from databricks.sdk import WorkspaceClient 169 | 170 | w = WorkspaceClient() 171 | raw_contents = w.files.list_directory_contents(path) 172 | contents = list(raw_contents) 173 | details = list(map(self._databricks_content_details, contents)) 174 | for item in details: 175 | item_path = item.get("path") 176 | if item.get("is_directory"): 177 | self._databricks_rm_dir(item_path) 178 | else: 179 | w.files.delete(item_path) 180 | w.files.delete_directory(path) 181 | 182 | @staticmethod 183 | def _databricks_mkdir(path): 184 | from databricks.sdk import WorkspaceClient 185 | 186 | w = WorkspaceClient() 187 | w.files.create_directory(path) 188 | 189 | @staticmethod 190 | def _databricks_content_details(item): 191 | details = { 192 | "path": item.path, 193 | "name": item.name, 194 | "is_directory": item.is_directory, 195 | } 196 | return details 197 | -------------------------------------------------------------------------------- /pins/drivers.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | from pathlib import Path 3 | from typing import Any 4 | 5 | from pins._adaptors import Adaptor, create_adaptor 6 | 7 | from .config import PINS_ENV_INSECURE_READ, get_allow_pickle_read 8 | from .errors import PinsInsecureReadError 9 | from .meta import Meta 10 | 11 | # TODO: move IFileSystem out of boards, to fix circular import 12 | # from .boards import IFileSystem 13 | 14 | 15 | UNSAFE_TYPES = frozenset(["joblib"]) 16 | REQUIRES_SINGLE_FILE = frozenset(["csv", "joblib"]) 17 | 18 | 19 | def load_path(filename: str, path_to_version, pin_type=None): 20 | # file path creation ------------------------------------------------------ 21 | if pin_type == "table": 22 | # this type contains an rds and csv files named data.{ext}, so we match 23 | # R pins behavior and hardcode the name 24 | filename = "data.csv" 25 | 26 | if path_to_version is not None: 27 | if isinstance(path_to_version, str): 28 | path_to_version = path_to_version.rstrip("/") 29 | path_to_file = f"{path_to_version}/{filename}" 30 | else: 31 | # BoardUrl doesn't have versions, and the file is the full url 32 | path_to_file = filename 33 | 34 | return path_to_file 35 | 36 | 37 | def load_file(filename: str, fs, path_to_version, pin_type): 38 | return fs.open(load_path(filename, path_to_version, pin_type)) 39 | 40 | 41 | def load_data( 42 | meta: Meta, 43 | fs, 44 | path_to_version: "str | None" = None, 45 | allow_pickle_read: "bool | None" = None, 46 | ): 47 | """Return loaded data, based on meta type. 48 | Parameters 49 | ---------- 50 | meta: Meta 51 | Information about the stored data (e.g. its type). 52 | fs: IFileSystem 53 | An abstract filesystem with a method to .open() files. 54 | path_to_version: 55 | A filepath used as the parent directory the data to-be-loaded lives in. 56 | """ 57 | 58 | # TODO: extandable loading with deferred importing 59 | if meta.type in UNSAFE_TYPES and not get_allow_pickle_read(allow_pickle_read): 60 | raise PinsInsecureReadError( 61 | f"Reading pin type {meta.type} involves reading a pickle file, so is NOT secure." 62 | f"Set the allow_pickle_read=True when creating the board, or the " 63 | f"{PINS_ENV_INSECURE_READ}=1 environment variable.\n" 64 | "See:\n" 65 | " * https://docs.python.org/3/library/pickle.html \n" 66 | " * https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations" 67 | ) 68 | 69 | with load_file(meta.file, fs, path_to_version, meta.type) as f: 70 | if meta.type == "csv": 71 | import pandas as pd 72 | 73 | return pd.read_csv(f) 74 | 75 | elif meta.type == "arrow": 76 | import pandas as pd 77 | 78 | return pd.read_feather(f) 79 | 80 | elif meta.type == "feather": 81 | import pandas as pd 82 | 83 | return pd.read_feather(f) 84 | 85 | elif meta.type == "parquet": 86 | import pandas as pd 87 | 88 | return pd.read_parquet(f) 89 | 90 | elif meta.type == "table": 91 | import pandas as pd 92 | 93 | return pd.read_csv(f) 94 | 95 | elif meta.type == "joblib": 96 | import joblib 97 | 98 | return joblib.load(f) 99 | 100 | elif meta.type == "json": 101 | import json 102 | 103 | return json.load(f) 104 | 105 | elif meta.type == "file": 106 | raise NotImplementedError( 107 | "Methods like `.pin_read()` are not able to read 'file' type pins." 108 | " Use `.pin_download()` to download the file." 109 | ) 110 | 111 | elif meta.type == "rds": 112 | try: 113 | import rdata # pyright: ignore[reportMissingImports] 114 | 115 | return rdata.read_rds(f) 116 | except ModuleNotFoundError: 117 | raise ModuleNotFoundError( 118 | "Install the 'rdata' package to attempt to convert 'rds' files into Python objects." 119 | ) 120 | 121 | raise NotImplementedError(f"No driver for type {meta.type}") 122 | 123 | 124 | def save_data( 125 | obj: "Adaptor | Any", fname, pin_type=None, apply_suffix: bool = True 126 | ) -> "str | Sequence[str]": 127 | # TODO: extensible saving with deferred importing 128 | # TODO: how to encode arguments to saving / loading drivers? 129 | # e.g. pandas index options 130 | # TODO: would be useful to have singledispatch func for a "default saver" 131 | # as argument to board, and then type dispatchers for explicit cases 132 | # of saving / loading objects different ways. 133 | 134 | if isinstance(obj, Adaptor): 135 | adaptor, obj = obj, obj._d 136 | else: 137 | adaptor = create_adaptor(obj) 138 | 139 | if apply_suffix: 140 | if pin_type == "file": 141 | suffix = "".join(Path(obj).suffixes) 142 | else: 143 | suffix = f".{pin_type}" 144 | else: 145 | suffix = "" 146 | 147 | if isinstance(fname, list): 148 | final_name = fname 149 | else: 150 | final_name = f"{fname}{suffix}" 151 | 152 | if pin_type == "csv": 153 | adaptor.write_csv(final_name) 154 | elif pin_type == "arrow": 155 | # NOTE: R pins accepts the type arrow, and saves it as feather. 156 | # we allow reading this type, but raise an error for writing. 157 | adaptor.write_feather(final_name) 158 | elif pin_type == "feather": 159 | msg = ( 160 | 'Saving data as type "feather" no longer supported. Use type "arrow" instead.' 161 | ) 162 | raise NotImplementedError(msg) 163 | elif pin_type == "parquet": 164 | adaptor.write_parquet(final_name) 165 | elif pin_type == "joblib": 166 | adaptor.write_joblib(final_name) 167 | elif pin_type == "json": 168 | adaptor.write_json(final_name) 169 | elif pin_type == "file": 170 | import contextlib 171 | import shutil 172 | 173 | if isinstance(obj, list): 174 | for file, final in zip(obj, final_name): 175 | with contextlib.suppress(shutil.SameFileError): 176 | shutil.copyfile(str(file), final) 177 | return obj 178 | # ignore the case where the source is the same as the target 179 | else: 180 | with contextlib.suppress(shutil.SameFileError): 181 | shutil.copyfile(str(obj), final_name) 182 | 183 | else: 184 | raise NotImplementedError(f"Cannot save type: {pin_type}") 185 | 186 | return final_name 187 | 188 | 189 | def default_title(obj: Any, name: str) -> str: 190 | # Kept for backward compatibility only. 191 | return create_adaptor(obj).default_title(name) 192 | -------------------------------------------------------------------------------- /pins/errors.py: -------------------------------------------------------------------------------- 1 | class PinsError(Exception): 2 | pass 3 | 4 | 5 | class PinsVersionError(PinsError): 6 | pass 7 | 8 | 9 | class PinsInsecureReadError(PinsError): 10 | pass 11 | -------------------------------------------------------------------------------- /pins/meta.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Mapping, Sequence 4 | from dataclasses import InitVar, asdict, dataclass, field, fields 5 | from pathlib import Path 6 | from typing import Any, ClassVar 7 | 8 | import yaml 9 | 10 | from ._types import IOBase, StrOrFile 11 | from .versions import Version, VersionRaw, guess_version 12 | 13 | META_FILENAME = "data.txt" 14 | DEFAULT_API_VERSION = 1 15 | 16 | 17 | @dataclass 18 | class MetaRaw: 19 | """Absolute minimum metadata for a pin. 20 | 21 | Parameters 22 | ---------- 23 | file: 24 | All relevant files contained in the pin. Note that these be absolute paths 25 | to fetch from the target filesystem. 26 | type: 27 | The type of pin data stored. This is used to determine how to read / write it. 28 | """ 29 | 30 | file: str | Sequence[str] | None 31 | type: str 32 | name: str 33 | 34 | 35 | @dataclass 36 | class Meta: 37 | """Represent metadata for a pin version. 38 | 39 | Parameters 40 | ---------- 41 | title: 42 | A title for the pin. 43 | description: 44 | A detailed description of the pin contents. 45 | tags: 46 | Optional tags applied to the pin. 47 | created: 48 | Datetime the pin was created (TODO: document format). 49 | pin_hash: 50 | A hash of the pin. 51 | file: 52 | All relevant files in the pin. Should be relative to this pin's folder. 53 | file_size: 54 | The total size of the files in the pin. 55 | type: 56 | The type of pin data stored. This is used to determine how to read / write it. 57 | api_version: 58 | The internal version of the metadata format. 59 | name: 60 | TODO - where is this in R pins? 61 | user: 62 | A dictionary of additional metadata that may be specified by the user. 63 | local: 64 | A dictionary of additional metadata that may be added by the board, depending 65 | on the backend used. E.g. Posit Connect content id, url, etc.. 66 | 67 | """ 68 | 69 | _excluded: ClassVar[set[str]] = {"name", "version", "local"} 70 | 71 | title: str | None 72 | description: str | None 73 | 74 | # TODO(defer): different from R pins, which has a local field 75 | created: str 76 | pin_hash: str 77 | 78 | file: str | Sequence[str] 79 | file_size: int 80 | type: str 81 | 82 | api_version: int 83 | 84 | # In the metadata yaml, the created field uses a custom format, so 85 | # we need a version object in order to render it. You can think of 86 | # the version here as "the thing that was used to create version_name, 87 | # pin_hash, created, etc.." 88 | version: VersionRaw 89 | 90 | tags: list[str] | None = None 91 | name: str | None = None 92 | user: Mapping = field(default_factory=dict) 93 | local: Mapping = field(default_factory=dict) 94 | 95 | unknown_fields: InitVar[dict | None] = None 96 | 97 | def __post_init__(self, unknown_fields: dict | None): 98 | unknown_fields = {} if unknown_fields is None else unknown_fields 99 | 100 | self._unknown_fields = unknown_fields 101 | 102 | def __getattr__(self, k): 103 | try: 104 | return self._unknown_fields[k] 105 | except KeyError: 106 | raise AttributeError(f"No metadata field not found: {k}") 107 | 108 | def to_dict(self) -> dict[str, Any]: 109 | data = asdict(self) 110 | 111 | return data 112 | 113 | def to_pin_dict(self): 114 | d = self.to_dict() 115 | 116 | for k in self._excluded: 117 | del d[k] 118 | 119 | # TODO: once tag writing is implemented, delete this line 120 | del d["tags"] 121 | 122 | return d 123 | 124 | @classmethod 125 | def from_pin_dict(cls, data, pin_name, version, local=None) -> Meta: 126 | # TODO: re-arrange Meta argument positions to reflect what's been 127 | # learned about default arguments. e.g. title was not used at some 128 | # point in api_version 1 129 | all_field_names = {entry.name for entry in fields(Meta)} 130 | 131 | keep_fields = all_field_names - cls._excluded 132 | 133 | extra = {"title": None} if "title" not in data else {} 134 | local = {} if local is None else local 135 | 136 | meta_data = {k: v for k, v in data.items() if k in keep_fields} 137 | unknown = {k: v for k, v in data.items() if k not in keep_fields} 138 | 139 | return cls( 140 | **meta_data, 141 | **extra, 142 | name=pin_name, 143 | version=version, 144 | local=local, 145 | unknown_fields=unknown, 146 | ) 147 | 148 | def to_pin_yaml(self, f: IOBase | None = None) -> str | None: 149 | data = self.to_pin_dict() 150 | 151 | return yaml.dump(data, f) 152 | 153 | 154 | @dataclass 155 | class MetaV0: 156 | file: str | Sequence[str] 157 | type: str 158 | description: str | None 159 | 160 | name: str 161 | 162 | version: VersionRaw 163 | 164 | # holds raw data.txt contents 165 | original_fields: dict = field(default_factory=dict) 166 | user: dict = field(default_factory=dict, init=False) 167 | local: Mapping = field(default_factory=dict) 168 | 169 | title: ClassVar[None] = None 170 | created: ClassVar[None] = None 171 | pin_hash: ClassVar[None] = None 172 | file_size: ClassVar[None] = None 173 | api_version: ClassVar[None] = None 174 | 175 | def to_dict(self): 176 | return asdict(self) 177 | 178 | @classmethod 179 | def from_pin_dict(cls, data, pin_name, version, local=None) -> MetaV0: 180 | # could infer from dataclasses.fields(), but seems excessive. 181 | req_fields = {"type", "description"} 182 | 183 | # Note that we need to .get(), since fields may not be in metadata 184 | req_inputs = {k: data.get(k) for k in req_fields} 185 | req_inputs["file"] = data["path"] 186 | 187 | local = {} if local is None else local 188 | return cls( 189 | **req_inputs, 190 | name=pin_name, 191 | original_fields=data, 192 | version=version, 193 | local=local, 194 | ) 195 | 196 | def to_pin_dict(self): 197 | raise NotImplementedError("v0 pins metadata are read only.") 198 | 199 | def to_pin_yaml(self, *args, **kwargs): 200 | self.to_pin_dict() 201 | 202 | 203 | class MetaFactory: 204 | """Responsible for creating and loading (e.g. from yaml) of meta objects.""" 205 | 206 | def get_meta_name(self, *args, **kwargs) -> str: 207 | return META_FILENAME 208 | 209 | def get_version_for_meta(self, api_version) -> Version: 210 | if api_version != 1: 211 | raise NotImplementedError(f"Unsupported api_version: {api_version}") 212 | 213 | return Version 214 | 215 | def create( 216 | self, 217 | base_folder: str | Path, 218 | files: Sequence[StrOrFile], 219 | type, 220 | # TODO: when files is a string name should be okay as None 221 | name, 222 | title, 223 | description=None, 224 | created=None, 225 | user=None, 226 | ) -> Meta: 227 | if title is None: 228 | raise NotImplementedError("title arguments required") 229 | if isinstance(files, str): 230 | from pathlib import Path 231 | 232 | version = Version.from_files([files], created) 233 | p_file = Path(files) 234 | file_size = p_file.stat().st_size 235 | file_name = str(Path(files).relative_to(Path(base_folder))) 236 | 237 | elif isinstance(files, IOBase): 238 | # TODO: in theory can calculate size from a file object, but let's 239 | # wait until it's clear how calculating file size fits into pins 240 | # e.g. in combination with folders, etc.. 241 | 242 | # from os import fstat 243 | # 244 | # version = Version.from_files([files], created) 245 | # files_size = fstat(files.fileno()).st_size 246 | 247 | raise NotImplementedError("Cannot create from file object.") 248 | else: 249 | if isinstance(files, (list, tuple)): 250 | from pathlib import Path 251 | 252 | file_name = [Path(f).name for f in files] 253 | file_size = [Path(f).stat().st_size for f in files] 254 | version = Version.from_files(files, created) 255 | 256 | return Meta( 257 | title=title, 258 | description=description, 259 | file=file_name, # TODO: FINISH 260 | file_size=file_size, 261 | pin_hash=version.hash, 262 | created=version.render_created(), 263 | type=type, 264 | api_version=DEFAULT_API_VERSION, 265 | name=name, 266 | user=user if user is not None else {}, 267 | version=version, 268 | ) 269 | 270 | def create_raw(self, files: Sequence[StrOrFile], type: str, name: str) -> MetaRaw: 271 | return MetaRaw(files, type, name) 272 | 273 | def read_pin_yaml( 274 | self, 275 | f: IOBase, 276 | pin_name: str, 277 | version: str | VersionRaw, 278 | local=None, 279 | ) -> Meta: 280 | if isinstance(version, str): 281 | version_obj = guess_version(version) 282 | else: 283 | version_obj = version 284 | 285 | data = yaml.safe_load(f) 286 | 287 | api_version = data.get("api_version", 0) 288 | if api_version >= 2: 289 | raise NotImplementedError( 290 | f"api_version {api_version} by this version of the pins library" 291 | ) 292 | elif api_version == 0: 293 | cls_meta = MetaV0 294 | else: 295 | cls_meta = Meta 296 | 297 | return cls_meta.from_pin_dict(data, pin_name, version=version_obj, local=local) 298 | -------------------------------------------------------------------------------- /pins/rsconnect/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/rsconnect/__init__.py -------------------------------------------------------------------------------- /pins/rsconnect/html/highlight.js-9.15.9/qtcreator_light.css: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Qt Creator light color scheme 4 | 5 | */ 6 | 7 | 8 | .hljs { 9 | display: block; 10 | overflow-x: auto; 11 | background: #ffffff; 12 | } 13 | 14 | .hljs, 15 | .hljs-subst, 16 | .hljs-tag, 17 | .hljs-title { 18 | color: #000000; 19 | } 20 | 21 | .hljs-strong, 22 | .hljs-emphasis { 23 | color: #000000; 24 | } 25 | 26 | .hljs-bullet, 27 | .hljs-quote, 28 | .hljs-number, 29 | .hljs-regexp, 30 | .hljs-literal { 31 | color: #000080; 32 | } 33 | 34 | .hljs-code 35 | .hljs-selector-class { 36 | color: #800080; 37 | } 38 | 39 | .hljs-emphasis, 40 | .hljs-stronge, 41 | .hljs-type { 42 | font-style: italic; 43 | } 44 | 45 | .hljs-keyword, 46 | .hljs-selector-tag, 47 | .hljs-function, 48 | .hljs-section, 49 | .hljs-symbol, 50 | .hljs-name { 51 | color: #808000; 52 | } 53 | 54 | .hljs-attribute { 55 | color: #800000; 56 | } 57 | 58 | .hljs-variable, 59 | .hljs-params, 60 | .hljs-class .hljs-title { 61 | color: #0055AF; 62 | } 63 | 64 | .hljs-string, 65 | .hljs-selector-id, 66 | .hljs-selector-attr, 67 | .hljs-selector-pseudo, 68 | .hljs-type, 69 | .hljs-built_in, 70 | .hljs-builtin-name, 71 | .hljs-template-tag, 72 | .hljs-template-variable, 73 | .hljs-addition, 74 | .hljs-link { 75 | color: #008000; 76 | } 77 | 78 | .hljs-comment, 79 | .hljs-meta, 80 | .hljs-deletion { 81 | color: #008000; 82 | } 83 | -------------------------------------------------------------------------------- /pins/rsconnect/html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 29 | 30 | 31 | 32 |
33 |

{{pin_name}}

34 | {% if pin_metadata %} 35 |

36 | {% if date %}Last updated from Python: {{ date }} •{% endif %} 37 | Format: {{ pin_metadata.type }} • 38 | API: v{{ pin_metadata.api_version }} 39 |

40 |

{{ pin_metadata.description }}

41 |

Download data: {{ pin_files }}

42 |
43 | Raw metadata 44 |
{{ pin_metadata.to_pin_yaml() }}
45 |
46 | {% endif %} 47 |
48 | 49 |
50 |

Python Code

51 | 52 |
from pins import board_connect
53 | from dotenv import load_dotenv
54 | load_dotenv()
55 | 
56 | board = {{board_deparse}}
57 | board.pin_read("{{pin_name}}")
58 | 59 | 64 |
65 | 66 |
67 |

R Code

68 |
library(pins)
69 | 
70 | board <- board_connect(auth = "envvar")
71 | pin_read(board, "{{pin_name}}")
72 |
73 | 74 |
75 |

Preview (up to 100 rows)

76 |
77 | 80 |
81 |
82 | 83 | 84 | -------------------------------------------------------------------------------- /pins/rsconnect/html/pagedtable-1.1/pagedtable.css: -------------------------------------------------------------------------------- 1 | .pagedtable { 2 | overflow: auto; 3 | padding-left: 8px; 4 | padding-right: 8px; 5 | } 6 | 7 | .pagedtable table { 8 | width: 100%; 9 | max-width: 100%; 10 | margin: 0; 11 | border-bottom: 1px solid #dddddd; 12 | font-weight: 100; 13 | line-height: 24px; 14 | } 15 | 16 | .pagedtable td, .pagedtable th { 17 | padding: 2px 4px 3px 4px; 18 | } 19 | 20 | .pagedtable th { 21 | border: none; 22 | border-bottom: 1px solid #dddddd; 23 | 24 | min-width: 45px; 25 | font-weight: normal; 26 | } 27 | 28 | .pagedtable-empty th { 29 | display: none; 30 | } 31 | 32 | .pagedtable td { 33 | white-space: nowrap; 34 | overflow: hidden; 35 | text-overflow: ellipsis; 36 | } 37 | 38 | .pagedtable .even { 39 | background-color: #fafafa; 40 | } 41 | 42 | .pagedtable-padding-col { 43 | display: none; 44 | } 45 | 46 | .pagedtable a { 47 | -webkit-touch-callout: none; 48 | -webkit-user-select: none; 49 | -khtml-user-select: none; 50 | -moz-user-select: none; 51 | -ms-user-select: none; 52 | user-select: none; 53 | } 54 | 55 | .pagedtable-index-nav { 56 | cursor: pointer; 57 | padding: 0 5px 0 5px; 58 | float: right; 59 | border: 0; 60 | } 61 | 62 | .pagedtable-index-nav-disabled { 63 | cursor: default; 64 | text-decoration: none; 65 | color: #999; 66 | } 67 | 68 | a.pagedtable-index-nav-disabled:hover { 69 | text-decoration: none; 70 | color: #999; 71 | } 72 | 73 | .pagedtable-indexes { 74 | cursor: pointer; 75 | float: right; 76 | border: 0; 77 | } 78 | 79 | .pagedtable-index-current { 80 | cursor: default; 81 | text-decoration: none; 82 | color: #333; 83 | border: 0; 84 | } 85 | 86 | a.pagedtable-index-current:hover { 87 | text-decoration: none; 88 | color: #333; 89 | } 90 | 91 | .pagedtable-index { 92 | width: 30px; 93 | display: inline-block; 94 | text-align: center; 95 | border: 0; 96 | } 97 | 98 | .pagedtable-index-separator-left { 99 | display: inline-block; 100 | color: #333; 101 | font-size: 9px; 102 | padding: 0 0 0 0; 103 | cursor: default; 104 | } 105 | 106 | .pagedtable-index-separator-right { 107 | display: inline-block; 108 | color: #333; 109 | font-size: 9px; 110 | padding: 0 4px 0 0; 111 | cursor: default; 112 | } 113 | 114 | .pagedtable-footer { 115 | padding-top: 6px; 116 | padding-bottom: 5px; 117 | } 118 | 119 | .pagedtable-not-empty .pagedtable-footer { 120 | } 121 | 122 | .pagedtable-info { 123 | overflow: hidden; 124 | color: #999; 125 | white-space: nowrap; 126 | text-overflow: ellipsis; 127 | } 128 | 129 | .pagedtable-header-name { 130 | overflow: hidden; 131 | text-overflow: ellipsis; 132 | } 133 | 134 | .pagedtable-header-type { 135 | color: #999; 136 | height: 0px; 137 | } 138 | 139 | .pagedtable-na-cell { 140 | font-style: italic; 141 | opacity: 0.3; 142 | } 143 | -------------------------------------------------------------------------------- /pins/tests/.gitignore: -------------------------------------------------------------------------------- 1 | rsconnect_api_keys.json 2 | -------------------------------------------------------------------------------- /pins/tests/_snapshots/test_board_pin_write_rsc_index_html/data.txt: -------------------------------------------------------------------------------- 1 | api_version: 1 2 | created: 20200113T235859Z 3 | description: some description 4 | file: test_rsc_pin.csv 5 | file_size: 19 6 | pin_hash: a6cf5331bf3de6c6 7 | title: some pin 8 | type: csv 9 | user: {} 10 | -------------------------------------------------------------------------------- /pins/tests/_snapshots/test_board_pin_write_rsc_index_html/highlight.js-9.15.9/qtcreator_light.css: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Qt Creator light color scheme 4 | 5 | */ 6 | 7 | 8 | .hljs { 9 | display: block; 10 | overflow-x: auto; 11 | background: #ffffff; 12 | } 13 | 14 | .hljs, 15 | .hljs-subst, 16 | .hljs-tag, 17 | .hljs-title { 18 | color: #000000; 19 | } 20 | 21 | .hljs-strong, 22 | .hljs-emphasis { 23 | color: #000000; 24 | } 25 | 26 | .hljs-bullet, 27 | .hljs-quote, 28 | .hljs-number, 29 | .hljs-regexp, 30 | .hljs-literal { 31 | color: #000080; 32 | } 33 | 34 | .hljs-code 35 | .hljs-selector-class { 36 | color: #800080; 37 | } 38 | 39 | .hljs-emphasis, 40 | .hljs-stronge, 41 | .hljs-type { 42 | font-style: italic; 43 | } 44 | 45 | .hljs-keyword, 46 | .hljs-selector-tag, 47 | .hljs-function, 48 | .hljs-section, 49 | .hljs-symbol, 50 | .hljs-name { 51 | color: #808000; 52 | } 53 | 54 | .hljs-attribute { 55 | color: #800000; 56 | } 57 | 58 | .hljs-variable, 59 | .hljs-params, 60 | .hljs-class .hljs-title { 61 | color: #0055AF; 62 | } 63 | 64 | .hljs-string, 65 | .hljs-selector-id, 66 | .hljs-selector-attr, 67 | .hljs-selector-pseudo, 68 | .hljs-type, 69 | .hljs-built_in, 70 | .hljs-builtin-name, 71 | .hljs-template-tag, 72 | .hljs-template-variable, 73 | .hljs-addition, 74 | .hljs-link { 75 | color: #008000; 76 | } 77 | 78 | .hljs-comment, 79 | .hljs-meta, 80 | .hljs-deletion { 81 | color: #008000; 82 | } 83 | -------------------------------------------------------------------------------- /pins/tests/_snapshots/test_board_pin_write_rsc_index_html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 29 | 30 | 31 | 32 |
33 |

derek/test_rsc_pin

34 | 35 |

36 | Last updated from Python: 2020-01-13 23:58:59 • 37 | Format: csv • 38 | API: v1 39 |

40 |

some description

41 |

Download data: test_rsc_pin.csv

42 |
43 | Raw metadata 44 |
api_version: 1
45 | created: 20200113T235859Z
46 | description: some description
47 | file: test_rsc_pin.csv
48 | file_size: 19
49 | pin_hash: a6cf5331bf3de6c6
50 | title: some pin
51 | type: csv
52 | user: {}
53 | 
54 |
55 | 56 |
57 | 58 |
59 |

Python Code

60 | 61 |
from pins import board_connect
62 | from dotenv import load_dotenv
63 | load_dotenv()
64 | 
65 | board = board_connect(server_url='http://localhost:3939')
66 | board.pin_read("derek/test_rsc_pin")
67 | 68 | 73 |
74 | 75 |
76 |

R Code

77 |
library(pins)
78 | 
79 | board <- board_connect(auth = "envvar")
80 | pin_read(board, "derek/test_rsc_pin")
81 |
82 | 83 |
84 |

Preview (up to 100 rows)

85 |
86 | 89 |
90 |
91 | 92 | -------------------------------------------------------------------------------- /pins/tests/_snapshots/test_board_pin_write_rsc_index_html/pagedtable-1.1/pagedtable.css: -------------------------------------------------------------------------------- 1 | .pagedtable { 2 | overflow: auto; 3 | padding-left: 8px; 4 | padding-right: 8px; 5 | } 6 | 7 | .pagedtable table { 8 | width: 100%; 9 | max-width: 100%; 10 | margin: 0; 11 | border-bottom: 1px solid #dddddd; 12 | font-weight: 100; 13 | line-height: 24px; 14 | } 15 | 16 | .pagedtable td, .pagedtable th { 17 | padding: 2px 4px 3px 4px; 18 | } 19 | 20 | .pagedtable th { 21 | border: none; 22 | border-bottom: 1px solid #dddddd; 23 | 24 | min-width: 45px; 25 | font-weight: normal; 26 | } 27 | 28 | .pagedtable-empty th { 29 | display: none; 30 | } 31 | 32 | .pagedtable td { 33 | white-space: nowrap; 34 | overflow: hidden; 35 | text-overflow: ellipsis; 36 | } 37 | 38 | .pagedtable .even { 39 | background-color: #fafafa; 40 | } 41 | 42 | .pagedtable-padding-col { 43 | display: none; 44 | } 45 | 46 | .pagedtable a { 47 | -webkit-touch-callout: none; 48 | -webkit-user-select: none; 49 | -khtml-user-select: none; 50 | -moz-user-select: none; 51 | -ms-user-select: none; 52 | user-select: none; 53 | } 54 | 55 | .pagedtable-index-nav { 56 | cursor: pointer; 57 | padding: 0 5px 0 5px; 58 | float: right; 59 | border: 0; 60 | } 61 | 62 | .pagedtable-index-nav-disabled { 63 | cursor: default; 64 | text-decoration: none; 65 | color: #999; 66 | } 67 | 68 | a.pagedtable-index-nav-disabled:hover { 69 | text-decoration: none; 70 | color: #999; 71 | } 72 | 73 | .pagedtable-indexes { 74 | cursor: pointer; 75 | float: right; 76 | border: 0; 77 | } 78 | 79 | .pagedtable-index-current { 80 | cursor: default; 81 | text-decoration: none; 82 | color: #333; 83 | border: 0; 84 | } 85 | 86 | a.pagedtable-index-current:hover { 87 | text-decoration: none; 88 | color: #333; 89 | } 90 | 91 | .pagedtable-index { 92 | width: 30px; 93 | display: inline-block; 94 | text-align: center; 95 | border: 0; 96 | } 97 | 98 | .pagedtable-index-separator-left { 99 | display: inline-block; 100 | color: #333; 101 | font-size: 9px; 102 | padding: 0 0 0 0; 103 | cursor: default; 104 | } 105 | 106 | .pagedtable-index-separator-right { 107 | display: inline-block; 108 | color: #333; 109 | font-size: 9px; 110 | padding: 0 4px 0 0; 111 | cursor: default; 112 | } 113 | 114 | .pagedtable-footer { 115 | padding-top: 6px; 116 | padding-bottom: 5px; 117 | } 118 | 119 | .pagedtable-not-empty .pagedtable-footer { 120 | } 121 | 122 | .pagedtable-info { 123 | overflow: hidden; 124 | color: #999; 125 | white-space: nowrap; 126 | text-overflow: ellipsis; 127 | } 128 | 129 | .pagedtable-header-name { 130 | overflow: hidden; 131 | text-overflow: ellipsis; 132 | } 133 | 134 | .pagedtable-header-type { 135 | color: #999; 136 | height: 0px; 137 | } 138 | 139 | .pagedtable-na-cell { 140 | font-style: italic; 141 | opacity: 0.3; 142 | } 143 | -------------------------------------------------------------------------------- /pins/tests/_snapshots/test_board_pin_write_rsc_index_html/test_rsc_pin.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 1.0,a 3 | 2.0,b 4 | ,c 5 | -------------------------------------------------------------------------------- /pins/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | from pathlib import Path 4 | 5 | import pytest 6 | from importlib_resources import files 7 | from pytest import mark as m 8 | 9 | from pins.tests.helpers import ( 10 | BoardBuilder, 11 | DbcBoardBuilder, 12 | RscBoardBuilder, 13 | Snapshot, 14 | rm_env, 15 | ) 16 | 17 | EXAMPLE_REL_PATH = "pins/tests/pins-compat" 18 | PATH_TO_EXAMPLE_BOARD = files("pins") / "tests/pins-compat" 19 | PATH_TO_EXAMPLE_BOARD_DBC = "/Volumes/workshops/my-board/my-volume/test" 20 | PATH_TO_EXAMPLE_VERSION = PATH_TO_EXAMPLE_BOARD / "df_csv/20220214T163720Z-9bfad/" 21 | EXAMPLE_PIN_NAME = "df_csv" 22 | 23 | PATH_TO_MANIFEST_BOARD = files("pins") / "tests/pin-board" 24 | 25 | # parameters that can be used more than once per session 26 | params_safe = [ 27 | pytest.param(lambda: BoardBuilder("file"), id="file", marks=m.fs_file), 28 | pytest.param(lambda: BoardBuilder("s3"), id="s3", marks=m.fs_s3), 29 | pytest.param(lambda: BoardBuilder("gcs"), id="gcs", marks=m.fs_gcs), 30 | pytest.param(lambda: BoardBuilder("abfs"), id="abfs", marks=m.fs_abfs), 31 | pytest.param(lambda: DbcBoardBuilder("dbc"), id="dbc", marks=m.fs_dbc), 32 | ] 33 | 34 | # rsc should only be used once, because users are created at docker setup time 35 | param_rsc = pytest.param(lambda: RscBoardBuilder("rsc"), id="rsc", marks=m.fs_rsc) 36 | 37 | params_backend = [*params_safe, param_rsc] 38 | 39 | 40 | @pytest.fixture(params=params_backend, scope="session") 41 | def backend(request): 42 | backend = request.param() 43 | yield backend 44 | backend.teardown() 45 | 46 | 47 | @pytest.fixture(scope="session") 48 | def http_example_board_path(): 49 | # backend = BoardBuilder("s3") 50 | # yield backend.create_tmp_board(str(PATH_TO_EXAMPLE_BOARD.absolute())).board 51 | # backend.teardown() 52 | # TODO: could putting it in a publicly available bucket folder 53 | return ( 54 | "https://raw.githubusercontent.com/machow/pins-python/main/pins/tests/pins-compat" 55 | ) 56 | 57 | 58 | @pytest.fixture 59 | def snapshot(request): 60 | p_snap = files("pins") / "tests/_snapshots" / request.node.originalname 61 | snap = Snapshot(p_snap, request.config.getoption("--snapshot-update")) 62 | 63 | return snap 64 | 65 | 66 | @pytest.fixture 67 | def df(): 68 | import pandas as pd 69 | 70 | return pd.DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}) 71 | 72 | 73 | @pytest.fixture 74 | def tmp_cache(): 75 | with rm_env("PINS_CACHE_DIR"): 76 | with tempfile.TemporaryDirectory() as tmp_dir: 77 | os.environ["PINS_CACHE_DIR"] = str(tmp_dir) 78 | yield Path(tmp_dir) 79 | 80 | 81 | @pytest.fixture 82 | def tmp_data_dir(): 83 | with rm_env("PINS_DATA_DIR"): 84 | with tempfile.TemporaryDirectory() as tmp_dir: 85 | os.environ["PINS_DATA_DIR"] = str(tmp_dir) 86 | yield Path(tmp_dir) 87 | 88 | 89 | def pytest_addoption(parser): 90 | parser.addoption("--snapshot-update", action="store_true") 91 | -------------------------------------------------------------------------------- /pins/tests/example-bundle/data.txt: -------------------------------------------------------------------------------- 1 | api_version: 1 2 | created: 20220304T153828Z 3 | description: null 4 | file: data_frame.csv 5 | file_size: 15 6 | name: data_frame.csv 7 | pin_hash: c65b0e9785abaa60 8 | title: some title 9 | type: csv 10 | user: {} 11 | -------------------------------------------------------------------------------- /pins/tests/example-bundle/data_frame.csv: -------------------------------------------------------------------------------- 1 | ,x 2 | 0,1 3 | 1,2 4 | 2,3 5 | -------------------------------------------------------------------------------- /pins/tests/example-bundle/index.html: -------------------------------------------------------------------------------- 1 | yo 2 | -------------------------------------------------------------------------------- /pins/tests/example-bundle/manifest.json: -------------------------------------------------------------------------------- 1 | {"version": 1, "local": "en_US", "platform": "3.5.1", "metadata": {"appmode": "static", "primary_rmd": null, "primary_html": "index.html", "content_category": "pin", "has_parameters": false}, "packages": null, "files": ["index.html", "manifest.json", "data_frame.csv", "data.txt"], "users": null} 2 | -------------------------------------------------------------------------------- /pins/tests/pin-board/_pins.yaml: -------------------------------------------------------------------------------- 1 | x: 2 | - x/20221215T180351Z-c3943/ 3 | 'y': 4 | - y/20221215T180357Z-9ae7a/ 5 | - y/20221215T180400Z-b81d5/ 6 | -------------------------------------------------------------------------------- /pins/tests/pin-board/x/20221215T180351Z-c3943/data.txt: -------------------------------------------------------------------------------- 1 | file: x.json 2 | file_size: 23 3 | pin_hash: c3943ca5a9aab2df 4 | type: json 5 | title: 'x: a pinned integer vector' 6 | description: ~ 7 | tags: ~ 8 | created: 20221215T180351Z 9 | api_version: 1.0 10 | -------------------------------------------------------------------------------- /pins/tests/pin-board/x/20221215T180351Z-c3943/x.json: -------------------------------------------------------------------------------- 1 | [1,2,3,4,5,6,7,8,9,10] 2 | -------------------------------------------------------------------------------- /pins/tests/pin-board/y/20221215T180357Z-9ae7a/data.txt: -------------------------------------------------------------------------------- 1 | file: y.rds 2 | file_size: 61 3 | pin_hash: 9ae7a970010c84e0 4 | type: rds 5 | title: 'y: a pinned integer vector' 6 | description: ~ 7 | tags: ~ 8 | created: 20221215T180357Z 9 | api_version: 1.0 10 | -------------------------------------------------------------------------------- /pins/tests/pin-board/y/20221215T180357Z-9ae7a/y.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/tests/pin-board/y/20221215T180357Z-9ae7a/y.rds -------------------------------------------------------------------------------- /pins/tests/pin-board/y/20221215T180400Z-b81d5/data.txt: -------------------------------------------------------------------------------- 1 | file: y.json 2 | file_size: 53 3 | pin_hash: b81d5bea9e760608 4 | type: json 5 | title: 'y: a pinned integer vector' 6 | description: ~ 7 | tags: ~ 8 | created: 20221215T180400Z 9 | api_version: 1.0 10 | -------------------------------------------------------------------------------- /pins/tests/pin-board/y/20221215T180400Z-b81d5/y.json: -------------------------------------------------------------------------------- 1 | [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20] 2 | -------------------------------------------------------------------------------- /pins/tests/pins-compat/df_arrow/20220214T163720Z-ad0c1/data.txt: -------------------------------------------------------------------------------- 1 | file: df_arrow.arrow 2 | file_size: 1282 3 | pin_hash: ad0c1a5a64ad7ca7 4 | type: arrow 5 | title: 'df_arrow: a pinned 2 x 2 data frame' 6 | description: ~ 7 | created: 20220214T163720Z 8 | api_version: 1.0 9 | -------------------------------------------------------------------------------- /pins/tests/pins-compat/df_arrow/20220214T163720Z-ad0c1/df_arrow.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/tests/pins-compat/df_arrow/20220214T163720Z-ad0c1/df_arrow.arrow -------------------------------------------------------------------------------- /pins/tests/pins-compat/df_csv/20220214T163718Z-eceac/data.txt: -------------------------------------------------------------------------------- 1 | file: df_csv.csv 2 | file_size: 20 3 | pin_hash: eceac651f7d06360 4 | type: csv 5 | title: 'df_csv: a pinned 2 x 2 data frame' 6 | description: ~ 7 | created: 20220214T163718Z 8 | api_version: 1.0 9 | -------------------------------------------------------------------------------- /pins/tests/pins-compat/df_csv/20220214T163718Z-eceac/df_csv.csv: -------------------------------------------------------------------------------- 1 | "x","y" 2 | 1,"a" 3 | 2,"b" 4 | -------------------------------------------------------------------------------- /pins/tests/pins-compat/df_csv/20220214T163720Z-9bfad/data.txt: -------------------------------------------------------------------------------- 1 | file: df_csv.csv 2 | file_size: 28 3 | pin_hash: 9bfad6d1a322a904 4 | type: csv 5 | title: 'df_csv: a pinned 2 x 3 data frame' 6 | description: ~ 7 | created: 20220214T163720Z 8 | api_version: 1.0 9 | -------------------------------------------------------------------------------- /pins/tests/pins-compat/df_csv/20220214T163720Z-9bfad/df_csv.csv: -------------------------------------------------------------------------------- 1 | "x","y","z" 2 | 1,"a",3 3 | 2,"b",4 4 | -------------------------------------------------------------------------------- /pins/tests/pins-compat/df_rds/20220214T163720Z-35b15/data.txt: -------------------------------------------------------------------------------- 1 | file: df_rds.rds 2 | file_size: 116 3 | pin_hash: 35b1570263448755 4 | type: rds 5 | title: 'df_rds: a pinned 2 x 2 data frame' 6 | description: ~ 7 | created: 20220214T163720Z 8 | api_version: 1.0 9 | -------------------------------------------------------------------------------- /pins/tests/pins-compat/df_rds/20220214T163720Z-35b15/df_rds.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/tests/pins-compat/df_rds/20220214T163720Z-35b15/df_rds.rds -------------------------------------------------------------------------------- /pins/tests/pins-compat/df_unversioned/20220214T163720Z-35b15/data.txt: -------------------------------------------------------------------------------- 1 | file: df_unversioned.rds 2 | file_size: 116 3 | pin_hash: 35b1570263448755 4 | type: rds 5 | title: 'df_unversioned: a pinned 2 x 2 data frame' 6 | description: ~ 7 | created: 20220214T163720Z 8 | api_version: 1.0 9 | -------------------------------------------------------------------------------- /pins/tests/pins-compat/df_unversioned/20220214T163720Z-35b15/df_unversioned.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/tests/pins-compat/df_unversioned/20220214T163720Z-35b15/df_unversioned.rds -------------------------------------------------------------------------------- /pins/tests/pins-old-types/a-table/v/data.csv: -------------------------------------------------------------------------------- 1 | "a","b" 2 | 1,"x" 3 | 2,"y" 4 | -------------------------------------------------------------------------------- /pins/tests/pins-old-types/a-table/v/data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/tests/pins-old-types/a-table/v/data.rds -------------------------------------------------------------------------------- /pins/tests/pins-old-types/a-table/v/data.txt: -------------------------------------------------------------------------------- 1 | path: 2 | - data.csv 3 | - data.rds 4 | type: table 5 | rows: 2 6 | cols: 2 7 | columns: 8 | a: integer 9 | b: character 10 | -------------------------------------------------------------------------------- /pins/tests/test_adaptors.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import joblib 4 | import pandas as pd 5 | import pytest 6 | from pandas.testing import assert_frame_equal 7 | 8 | from pins._adaptors import ( 9 | AbstractPandasFrame, 10 | Adaptor, 11 | DFAdaptor, 12 | PandasAdaptor, 13 | create_adaptor, 14 | ) 15 | 16 | 17 | class TestCreateAdaptor: 18 | def test_pandas(self): 19 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 20 | adaptor = create_adaptor(df) 21 | assert isinstance(adaptor, Adaptor) 22 | assert isinstance(adaptor, PandasAdaptor) 23 | 24 | def test_non_df(self): 25 | adaptor = create_adaptor(42) 26 | assert isinstance(adaptor, Adaptor) 27 | assert not isinstance(adaptor, PandasAdaptor) 28 | assert not isinstance(adaptor, DFAdaptor) 29 | 30 | def test_already_adaptor(self): 31 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 32 | adaptor = create_adaptor(df) 33 | assert isinstance(adaptor, PandasAdaptor) 34 | assert create_adaptor(adaptor) is adaptor 35 | 36 | 37 | class TestAdaptor: 38 | def test_write_json(self, tmp_path: Path): 39 | data = {"a": 1, "b": 2} 40 | adaptor = Adaptor(data) 41 | file = tmp_path / "file.json" 42 | adaptor.write_json(file) 43 | assert file.read_text() == '{"a": 1, "b": 2}' 44 | 45 | def test_write_joblib(self, tmp_path: Path): 46 | data = {"a": 1, "b": 2} 47 | adaptor = Adaptor(data) 48 | file = tmp_path / "file.joblib" 49 | adaptor.write_joblib(file) 50 | 51 | # Dump independently and check contents 52 | expected_file = tmp_path / "expected.joblib" 53 | joblib.dump(data, expected_file) 54 | assert expected_file.read_bytes() == file.read_bytes() 55 | 56 | def test_write_csv(self): 57 | with pytest.raises(NotImplementedError): 58 | adaptor = Adaptor(42) 59 | adaptor.write_csv("file.csv") 60 | 61 | def test_write_parquet(self): 62 | with pytest.raises(NotImplementedError): 63 | adaptor = Adaptor(42) 64 | adaptor.write_parquet("file.parquet") 65 | 66 | def test_write_feather(self): 67 | with pytest.raises(NotImplementedError): 68 | adaptor = Adaptor(42) 69 | adaptor.write_feather("file.feather") 70 | 71 | class TestDataPreview: 72 | def test_int(self): 73 | adaptor = Adaptor(42) 74 | assert adaptor.data_preview == "{}" 75 | 76 | def test_dict(self): 77 | data = {"a": 1, "b": 2} 78 | adaptor = Adaptor(data) 79 | assert adaptor.data_preview == "{}" 80 | 81 | def test_default_title(self): 82 | adaptor = Adaptor(42) 83 | assert adaptor.default_title("my_data") == "my_data: a pinned int object" 84 | 85 | 86 | class TestPandasAdaptor: 87 | def test_df_type(self): 88 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 89 | adaptor = PandasAdaptor(df) 90 | assert adaptor.df_type == "DataFrame" 91 | 92 | def test_columns(self): 93 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 94 | adaptor = PandasAdaptor(df) 95 | assert isinstance(adaptor, DFAdaptor) 96 | assert isinstance(adaptor, PandasAdaptor) 97 | assert adaptor.columns == ["a", "b"] 98 | 99 | def test_shape(self): 100 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 101 | adaptor = PandasAdaptor(df) 102 | assert isinstance(adaptor, DFAdaptor) 103 | assert isinstance(adaptor, PandasAdaptor) 104 | assert adaptor.shape == (3, 2) 105 | 106 | def test_head(self): 107 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 108 | adaptor = PandasAdaptor(df) 109 | head1_df = pd.DataFrame({"a": [1], "b": [4]}) 110 | expected = create_adaptor(head1_df) 111 | assert isinstance(adaptor, DFAdaptor) 112 | assert isinstance(adaptor.head(1), DFAdaptor) 113 | assert isinstance(adaptor.head(1), PandasAdaptor) 114 | assert_frame_equal(adaptor.head(1)._d, expected._d) 115 | 116 | def test_to_json(self): 117 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 118 | adaptor = PandasAdaptor(df) 119 | assert isinstance(adaptor, DFAdaptor) 120 | assert adaptor.to_json() == """[{"a":1,"b":4},{"a":2,"b":5},{"a":3,"b":6}]""" 121 | 122 | def test_write_csv(self, tmp_path: Path): 123 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 124 | adaptor = PandasAdaptor(df) 125 | file = tmp_path / "file.csv" 126 | adaptor.write_csv(file) 127 | assert file.read_text() == "a,b\n1,4\n2,5\n3,6\n" 128 | 129 | def test_write_parquet(self, tmp_path: Path): 130 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 131 | adaptor = PandasAdaptor(df) 132 | file = tmp_path / "file.parquet" 133 | adaptor.write_parquet(file) 134 | assert_frame_equal(pd.read_parquet(file), df) 135 | 136 | def test_write_feather(self, tmp_path: Path): 137 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 138 | adaptor = PandasAdaptor(df) 139 | file = tmp_path / "file.feather" 140 | adaptor.write_feather(file) 141 | assert_frame_equal(pd.read_feather(file), df) 142 | 143 | def test_data_preview(self): 144 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 145 | adaptor = PandasAdaptor(df) 146 | expected = ( 147 | '{"data": [{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}], ' 148 | '"columns": [{"name": ["a"], "label": ["a"], "align": ["left"], "type": [""]}, ' 149 | '{"name": ["b"], "label": ["b"], "align": ["left"], "type": [""]}]}' 150 | ) 151 | assert adaptor.data_preview == expected 152 | 153 | def test_default_title(self): 154 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 155 | adaptor = PandasAdaptor(df) 156 | assert adaptor.default_title("my_df") == "my_df: a pinned 3 x 2 DataFrame" 157 | 158 | 159 | class TestAbstractBackends: 160 | class TestAbstractPandasFrame: 161 | def test_isinstance(self): 162 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 163 | assert isinstance(df, AbstractPandasFrame) 164 | 165 | def test_not_isinstance(self): 166 | assert not isinstance(42, AbstractPandasFrame) 167 | -------------------------------------------------------------------------------- /pins/tests/test_cache.py: -------------------------------------------------------------------------------- 1 | import time 2 | from pathlib import Path 3 | 4 | import pytest 5 | from fsspec import filesystem 6 | 7 | from pins.cache import ( 8 | CachePruner, 9 | PinsCache, 10 | PinsUrlCache, 11 | cache_prune, 12 | touch_access_time, 13 | ) 14 | 15 | # NOTE: windows time.time() implementation appears to have 16 millisecond precision, so 16 | # we need to add a small delay, in order to avoid prune checks appearing to happen at the 17 | # exact same moment something earlier was created / accessed. 18 | # see: https://stackoverflow.com/a/1938096/1144523 19 | 20 | 21 | # Utilities =================================================================== 22 | 23 | 24 | def _sleep(): 25 | # time-based issues keep arising erratically in windows checks, so try to shoot 26 | # well past 27 | time.sleep(0.3) 28 | 29 | 30 | @pytest.fixture 31 | def some_file(tmp_path): 32 | p = tmp_path / "some_file.txt" 33 | p.touch() 34 | return p 35 | 36 | 37 | def test_touch_access_time_manual(some_file): 38 | some_file.stat().st_atime 39 | 40 | access_time = time.time() - 60 * 60 * 24 41 | touch_access_time(some_file, access_time) 42 | 43 | assert some_file.stat().st_atime == access_time 44 | 45 | 46 | def test_touch_access_time_auto(some_file): 47 | orig_access = some_file.stat().st_atime 48 | 49 | _sleep() 50 | new_time = touch_access_time(some_file) 51 | 52 | assert some_file.stat().st_atime == new_time 53 | assert orig_access < new_time 54 | 55 | 56 | # Cache Classes =============================================================== 57 | 58 | # Boards w/ default cache ===================================================== 59 | 60 | 61 | def test_pins_cache_hash_name_preserves(): 62 | cache = PinsCache(fs=filesystem("file"), hash_prefix="") 63 | assert cache.hash_name("a/b/c.txt") == Path("a/b/c.txt") 64 | 65 | 66 | def test_pins_cache_url_hash_name(): 67 | cache = PinsUrlCache(fs=filesystem("file")) 68 | hashed = cache.hash_name("http://example.com/a.txt", True) 69 | 70 | p_hash = Path(hashed) 71 | 72 | # should have form // 73 | assert p_hash.name == "a.txt" 74 | 75 | # count parent dirs, excluding root (e.g. "." or "/") 76 | n_parents = len(p_hash.parents) - 1 77 | assert n_parents == 2 78 | 79 | 80 | @pytest.mark.skip("TODO") 81 | def test_pins_cache_open(): 82 | # check that opening works and creates the cached file 83 | pass 84 | 85 | 86 | # Cache pruning =============================================================== 87 | 88 | 89 | @pytest.fixture 90 | def a_cache(tmp_path): 91 | return tmp_path / "board_cache" 92 | 93 | 94 | def create_metadata(p, access_time): 95 | p.mkdir(parents=True, exist_ok=True) 96 | meta = p / "data.txt" 97 | meta.touch() 98 | touch_access_time(meta, access_time) 99 | 100 | 101 | @pytest.fixture 102 | def pin1_v1(a_cache): # current 103 | v1 = a_cache / "a_pin" / "version_1" 104 | create_metadata(v1, time.time()) 105 | 106 | return v1 107 | 108 | 109 | @pytest.fixture 110 | def pin1_v2(a_cache): 111 | v2 = a_cache / "a_pin" / "version_2" 112 | create_metadata(v2, time.time() - 60 * 60 * 24) # one day ago 113 | 114 | return v2 115 | 116 | 117 | @pytest.fixture 118 | def pin2_v3(a_cache): 119 | v3 = a_cache / "other_pin" / "version_3" 120 | create_metadata(v3, time.time() - 60 * 60 * 48) # two days ago 121 | 122 | return v3 123 | 124 | 125 | def test_cache_pruner_old_versions_none(a_cache, pin1_v1): 126 | _sleep() 127 | 128 | pruner = CachePruner(a_cache) 129 | 130 | old = pruner.old_versions(days=1) 131 | 132 | assert len(old) == 0 133 | 134 | 135 | def test_cache_pruner_old_versions_days0(a_cache, pin1_v1): 136 | _sleep() 137 | 138 | pruner = CachePruner(a_cache) 139 | old = pruner.old_versions(days=0) 140 | 141 | assert len(old) == 1 142 | assert old[0] == pin1_v1 143 | 144 | 145 | def test_cache_pruner_old_versions_some(a_cache, pin1_v1, pin1_v2): 146 | _sleep() 147 | 148 | # create: tmp_dir/pin1/version1 149 | 150 | pruner = CachePruner(a_cache) 151 | 152 | old = pruner.old_versions(days=1) 153 | 154 | assert len(old) == 1 155 | assert old[0] == pin1_v2 156 | 157 | 158 | def test_cache_pruner_old_versions_multi_pins(a_cache, pin1_v2, pin2_v3): 159 | _sleep() 160 | 161 | pruner = CachePruner(a_cache) 162 | old = pruner.old_versions(days=1) 163 | 164 | assert len(old) == 2 165 | assert set(old) == {pin1_v2, pin2_v3} 166 | 167 | 168 | def test_cache_prune_prompt(a_cache, pin1_v1, pin2_v3, monkeypatch): 169 | _sleep() 170 | 171 | cache_prune(days=1, cache_root=a_cache.parent, prompt=False) 172 | 173 | versions = list(a_cache.glob("*/*")) 174 | 175 | # pin2_v3 deleted 176 | assert len(versions) == 1 177 | -------------------------------------------------------------------------------- /pins/tests/test_compat.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pytest 4 | 5 | from pins.errors import PinsError 6 | from pins.tests.conftest import ( 7 | PATH_TO_EXAMPLE_BOARD, 8 | PATH_TO_EXAMPLE_BOARD_DBC, 9 | PATH_TO_MANIFEST_BOARD, 10 | ) 11 | from pins.tests.helpers import skip_if_dbc, xfail_fs 12 | 13 | NOT_A_PIN = "not_a_pin_abcdefg" 14 | PIN_CSV = "df_csv" 15 | 16 | # set up board ---- 17 | 18 | 19 | @pytest.fixture(scope="session") 20 | def board(backend): 21 | board = backend.create_tmp_board(str(PATH_TO_EXAMPLE_BOARD.absolute())) 22 | if board.fs.protocol == "dbc": 23 | board = backend.create_tmp_board(str(PATH_TO_EXAMPLE_BOARD_DBC)) 24 | yield board 25 | 26 | backend.teardown_board(board) 27 | 28 | 29 | @pytest.fixture(scope="session") 30 | def board_manifest(backend): 31 | # skip on rsconnect, since it can't add a manifest and the pin names 32 | # are too short for use to upload (rsc requires names > 3 characters) 33 | if backend.fs_name in ["rsc", "dbc"]: 34 | pytest.skip() 35 | 36 | board = backend.create_tmp_board(str(PATH_TO_MANIFEST_BOARD.absolute())) 37 | 38 | yield board 39 | 40 | backend.teardown_board(board) 41 | 42 | 43 | # pin_list -------------------------------------------------------------------- 44 | 45 | 46 | def test_compat_pin_list(board): 47 | src_sorted = sorted(board.pin_list()) 48 | dst_sorted = ["df_arrow", "df_csv", "df_rds", "df_unversioned"] 49 | 50 | if board.fs.protocol == "rsc": 51 | # rsc backend uses / for full name 52 | dst_sorted = [f"{board.user_name}/{content}" for content in dst_sorted] 53 | if board.fs.protocol == "dbc": 54 | # TODO: update to match when not read-only 55 | dst_sorted = [ 56 | "cool_pin", 57 | "cool_pin2", 58 | "cool_pin3", 59 | "data", 60 | "df_csv", 61 | "reviews", 62 | "reviews2", 63 | "reviews3", 64 | ] 65 | 66 | assert src_sorted == dst_sorted 67 | 68 | 69 | # pin_versions ---------------------------------------------------------------- 70 | 71 | 72 | def test_compat_pin_versions(board): 73 | if board.fs.protocol == "rsc": 74 | pytest.skip("RSC uses bundle ids as pin versions") 75 | versions = board.pin_versions("df_csv", as_df=False) 76 | v_strings = list(v.version for v in versions) 77 | # TODO: update when dbc is not read-only 78 | if board.fs.protocol == "dbc": 79 | v_strings == ["20250410T083026Z-a173c"] 80 | else: 81 | assert v_strings == ["20220214T163718Z-eceac", "20220214T163720Z-9bfad"] 82 | 83 | 84 | @pytest.mark.skip("Used to diagnose os listdir ordering") 85 | def test_compat_os_listdir(): 86 | import os 87 | 88 | res = os.listdir(PATH_TO_EXAMPLE_BOARD / "df_csv") 89 | dst = ["20220214T163718Z-eceac", "20220214T163720Z-9bfad"] 90 | 91 | assert res == dst 92 | 93 | 94 | # pin_exists -------------------------------------------------------------------- 95 | 96 | 97 | def test_compat_pin_exists_succeed(board): 98 | assert board.pin_exists(PIN_CSV) 99 | 100 | 101 | def test_compat_pin_exists_fails(board): 102 | assert board.pin_exists(NOT_A_PIN) is False 103 | 104 | 105 | # pin_meta -------------------------------------------------------------------- 106 | 107 | 108 | def test_compat_pin_meta(board): 109 | # Note that this fetches the latest of 2 versions 110 | meta = board.pin_meta(PIN_CSV) 111 | 112 | if board.fs.protocol == "rsc": 113 | # TODO: afaik the bundle id is largely non-deterministic, so not possible 114 | # to test, but should think a bit more about it. 115 | assert meta.name == "derek/df_csv" 116 | # TODO: update when dbc boards are not read-only 117 | elif board.fs.protocol == "dbc": 118 | assert meta.title == "df_csv: a pinned 3 x 2 DataFrame" 119 | assert meta.description is None 120 | assert meta.created == "20250410T083026Z" 121 | assert meta.file == "df_csv.csv" 122 | assert meta.file_size == 16 123 | assert meta.pin_hash == "a173cd6a53908980" 124 | assert meta.type == "csv" 125 | return 126 | else: 127 | assert meta.version.version == "20220214T163720Z-9bfad" 128 | assert meta.version.created == datetime.datetime(2022, 2, 14, 16, 37, 20) 129 | assert meta.version.hash == "9bfad" 130 | 131 | assert meta.name == "df_csv" 132 | 133 | assert meta.title == "df_csv: a pinned 2 x 3 data frame" 134 | assert meta.description is None 135 | assert meta.created == "20220214T163720Z" 136 | assert meta.file == "df_csv.csv" 137 | assert meta.file_size == 28 138 | assert meta.pin_hash == "9bfad6d1a322a904" 139 | assert meta.type == "csv" 140 | 141 | # TODO(question): coding api_version as a yaml float intentional? 142 | assert meta.api_version == 1.0 143 | assert meta.user == {} 144 | 145 | 146 | def test_compat_pin_meta_pin_missing(board): 147 | with pytest.raises(PinsError) as exc_info: 148 | board.pin_meta(NOT_A_PIN) 149 | 150 | assert f"{NOT_A_PIN} does not exist" in exc_info.value.args[0] 151 | 152 | 153 | @xfail_fs("rsc") 154 | def test_compat_pin_meta_version_arg(board): 155 | # note that in RSConnect the version is the bundle id 156 | # TODO: update when dbc is not read-only 157 | if board.fs.protocol == "dbc": 158 | meta = board.pin_meta(PIN_CSV, "20250410T083026Z-a173c") 159 | assert meta.version.version == "20250410T083026Z-a173c" 160 | assert meta.version.hash == "a173c" 161 | else: 162 | meta = board.pin_meta(PIN_CSV, "20220214T163718Z-eceac") 163 | assert meta.version.version == "20220214T163718Z-eceac" 164 | assert meta.version.hash == "eceac" 165 | 166 | 167 | def test_compat_pin_meta_version_arg_error(board): 168 | bad_version = "123" 169 | with pytest.raises(PinsError) as exc_info: 170 | board.pin_meta(PIN_CSV, bad_version) 171 | 172 | msg = exc_info.value.args[0] 173 | assert PIN_CSV in msg 174 | assert bad_version in msg 175 | 176 | 177 | # pin_read ---- 178 | 179 | 180 | def test_compat_pin_read(board): 181 | import pandas as pd 182 | 183 | p_data = PATH_TO_EXAMPLE_BOARD / "df_csv" / "20220214T163720Z-9bfad" / "df_csv.csv" 184 | 185 | src_df = board.pin_read("df_csv") 186 | 187 | # TODO: update when dbc boards are not read-only 188 | if board.fs.protocol == "dbc": 189 | dst_df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) 190 | else: 191 | dst_df = pd.read_csv(p_data) 192 | 193 | assert isinstance(src_df, pd.DataFrame) 194 | assert src_df.equals(dst_df) 195 | 196 | 197 | @skip_if_dbc 198 | def test_compat_pin_read_supported_rds(board): 199 | pytest.importorskip("rdata") 200 | import pandas as pd 201 | 202 | src_df = board.pin_read("df_rds") 203 | 204 | assert isinstance(src_df, pd.DataFrame) 205 | 206 | 207 | # pin_write ---- 208 | 209 | # manifest ----- 210 | 211 | 212 | def test_board_pin_write_manifest_name_error(board_manifest): 213 | if board_manifest.fs.protocol == "rsc": 214 | pytest.skip() 215 | 216 | with pytest.raises(ValueError) as exc_info: 217 | board_manifest.pin_write([1], "_pins.yaml", type="json") 218 | 219 | assert "name '_pins.yaml' is reserved for internal use." in exc_info.value.args[0] 220 | 221 | 222 | def test_board_manifest_pin_list_no_internal_name(board_manifest): 223 | assert set(board_manifest.pin_list()) == {"x", "y"} 224 | 225 | 226 | def test_board_manifest_pin_exist_internal_name_errors(board_manifest): 227 | with pytest.raises(ValueError) as exc_info: 228 | board_manifest.pin_exists("_pins.yaml") 229 | 230 | assert "reserved for internal use." in exc_info.value.args[0] 231 | 232 | 233 | def test_board_manifest_pin_read_internal_errors(board_manifest): 234 | with pytest.raises(ValueError) as exc_info: 235 | board_manifest.pin_read("_pins.yaml") 236 | 237 | assert "reserved for internal use." in exc_info.value.args[0] 238 | 239 | 240 | def test_board_manifest_pin_search(board_manifest): 241 | res = board_manifest.pin_search("x", as_df=False) 242 | 243 | assert len(res) == 1 244 | assert res[0].name == "x" 245 | -------------------------------------------------------------------------------- /pins/tests/test_compat_old_types.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from importlib_resources import files 3 | 4 | from pins import board_folder 5 | 6 | OLD_BOARD = files("pins") / "tests" / "pins-old-types" 7 | DST_DF = pd.DataFrame({"a": [1, 2], "b": ["x", "y"]}) 8 | 9 | 10 | def test_compat_old_types_load_table(): 11 | board = board_folder(OLD_BOARD) 12 | src_df = board.pin_read("a-table") 13 | 14 | assert isinstance(src_df, pd.DataFrame) 15 | assert src_df.equals(DST_DF) 16 | -------------------------------------------------------------------------------- /pins/tests/test_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from pins import config 6 | from pins.tests.helpers import rm_env 7 | 8 | 9 | @pytest.fixture 10 | def env_unset(): 11 | with rm_env( 12 | config.PINS_ENV_DATA_DIR, 13 | config.PINS_ENV_CACHE_DIR, 14 | config.PINS_ENV_INSECURE_READ, 15 | ): 16 | yield 17 | 18 | 19 | def test_allow_pickle_read_no_env(env_unset): 20 | assert config.get_allow_pickle_read(True) is True 21 | assert config.get_allow_pickle_read(False) is False 22 | 23 | 24 | def test_allow_pickle_read_env_1(env_unset): 25 | os.environ[config.PINS_ENV_INSECURE_READ] = "1" 26 | 27 | assert config.get_allow_pickle_read(True) is True 28 | assert config.get_allow_pickle_read(False) is False 29 | assert config.get_allow_pickle_read(None) is True 30 | 31 | 32 | def test_allow_pickle_read_env_0(env_unset): 33 | os.environ[config.PINS_ENV_INSECURE_READ] = "0" 34 | 35 | assert config.get_allow_pickle_read(True) is True 36 | assert config.get_allow_pickle_read(False) is False 37 | assert config.get_allow_pickle_read(None) is False 38 | -------------------------------------------------------------------------------- /pins/tests/test_constructors.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | import pandas as pd 5 | import pytest 6 | from pandas.testing import assert_frame_equal 7 | 8 | from pins import constructors as c 9 | from pins.tests.conftest import ( 10 | EXAMPLE_REL_PATH, 11 | PATH_TO_EXAMPLE_BOARD, 12 | PATH_TO_EXAMPLE_VERSION, 13 | ) 14 | from pins.tests.helpers import rm_env, skip_if_dbc 15 | 16 | 17 | @pytest.fixture 18 | def df_csv(): 19 | return pd.read_csv(PATH_TO_EXAMPLE_VERSION / "df_csv.csv") 20 | 21 | 22 | def check_dir_writable(p_dir): 23 | assert p_dir.parent.exists() 24 | assert os.access(p_dir.parent.absolute(), os.W_OK) 25 | 26 | 27 | def check_cache_file_path(p_file, p_cache): 28 | rel_path = p_file.relative_to(p_cache) 29 | 30 | # parents has every entry you'd get if you called .parents all the way to some root. 31 | # for a relative path, the root is likely ".", so we subtract 1 to get the number 32 | # of parent directories. 33 | # note this essentially counts slashes, in a inter-OS friendly way. 34 | n_parents = len(rel_path.parents) - 1 35 | assert n_parents == 2 36 | 37 | 38 | def construct_from_board(board): 39 | prot = board.fs.protocol 40 | fs_name = prot if isinstance(prot, str) else prot[0] 41 | 42 | if fs_name in ["file", ("file", "local")]: 43 | board = c.board_folder(board.board) 44 | elif fs_name == "dbc": 45 | board = c.board_databricks(board.board) 46 | elif fs_name == "rsc": 47 | board = c.board_rsconnect( 48 | server_url=board.fs.api.server_url, api_key=board.fs.api.api_key 49 | ) 50 | elif fs_name == "abfs": 51 | board = c.board_azure(board.board) 52 | elif fs_name == "gs": 53 | board = c.board_gcs(board.board) 54 | else: 55 | board = getattr(c, f"board_{fs_name}")(board.board) 56 | 57 | return board 58 | 59 | 60 | # End-to-end constructor tests 61 | 62 | 63 | # there are two facets of boards: reading and writing. 64 | # copied from test_compat 65 | @pytest.mark.skip_on_github 66 | def test_constructor_board_url_data(tmp_cache, http_example_board_path, df_csv): 67 | board = c.board_url( 68 | http_example_board_path, 69 | # could derive from example version path 70 | pin_paths={"df_csv": "df_csv/20220214T163720Z-9bfad/"}, 71 | ) 72 | 73 | df = board.pin_read("df_csv") 74 | 75 | # check data ---- 76 | assert_frame_equal(df, df_csv) 77 | 78 | 79 | @pytest.mark.xfail 80 | @pytest.mark.skip_on_github 81 | def test_constructor_board_url_cache( 82 | tmp_cache, http_example_board_path, df_csv, tmp_path 83 | ): 84 | # TODO: downloading a pin does not put files in the same directory, since 85 | # in this case we are hashing on the full url. 86 | 87 | board = c.board_url( 88 | http_example_board_path, 89 | # could derive from example version path 90 | pin_paths={"df_csv": "df_csv/20220214T163718Z-eceac/"}, 91 | ) 92 | 93 | board.pin_read("df_csv") 94 | 95 | # cannot write or view pin versions 96 | 97 | with pytest.raises(NotImplementedError): 98 | board.pin_write(df_csv) 99 | with pytest.raises(NotImplementedError): 100 | board.pin_versions("df_csv") 101 | with pytest.raises(NotImplementedError): 102 | board.pin_version_delete(name="df_csv", version="20220214T163718Z") 103 | with pytest.raises(NotImplementedError): 104 | df = pd.DataFrame({"x": [1, 2, 3]}) 105 | path = tmp_path / "data.csv" 106 | df.to_csv(path, index=False) 107 | board.pin_upload(path, "cool_pin") 108 | 109 | # check cache ---- 110 | http_dirs = list(tmp_cache.glob("http_*")) 111 | 112 | assert len(http_dirs) == 1 113 | 114 | # there are two files in the flat cache (metadata, and the csv) 115 | parent = http_dirs[0] 116 | res = list(parent.rglob("*")) 117 | assert len(res) == 2 118 | 119 | # validate that it creates an empty metadata file 120 | assert len(x for x in res if x.endswith("df_csv.csv")) == 1 121 | assert len(x for x in res if x.endswith("data.txt")) == 1 122 | 123 | assert len(list(parent.glob("**/*"))) == 2 124 | 125 | 126 | @pytest.mark.skip_on_github 127 | def test_constructor_board_url_file(tmp_cache, http_example_board_path): 128 | # TODO: downloading a pin does not put files in the same directory, since 129 | # in this case we are hashing on the full url. 130 | 131 | board = c.board_url( 132 | http_example_board_path, 133 | # could derive from example version path 134 | pin_paths={"df_csv": "df_csv/20220214T163718Z-eceac/df_csv.csv"}, 135 | ) 136 | 137 | board.pin_download("df_csv") 138 | 139 | # check cache ---- 140 | http_dirs = list(tmp_cache.glob("http_*")) 141 | 142 | assert len(http_dirs) == 1 143 | 144 | # there are two files in the flat cache (metadata, and the csv) 145 | parent = http_dirs[0] 146 | res = list(parent.rglob("*")) 147 | assert len(res) == 1 148 | 149 | assert str(res[0]).endswith("df_csv.csv") 150 | 151 | new_board = eval(c.board_deparse(board), c.__dict__) 152 | assert new_board.pin_list() == board.pin_list() 153 | 154 | 155 | @pytest.mark.skip_on_github 156 | def test_constructor_board_github(tmp_cache, http_example_board_path, df_csv): 157 | board = c.board_github("rstudio", "pins-python", EXAMPLE_REL_PATH) # noqa 158 | 159 | df = board.pin_read("df_csv") 160 | assert_frame_equal(df, df_csv) 161 | 162 | cache_options = list(tmp_cache.glob("github_*")) 163 | assert len(cache_options) == 1 164 | cache_dir = cache_options[0] 165 | 166 | res = list(cache_dir.rglob("**/*.csv")) 167 | assert len(res) == 1 168 | 169 | check_cache_file_path(res[0], cache_dir) 170 | 171 | 172 | @pytest.fixture(scope="function") 173 | def board(backend): 174 | # TODO: copied from test_compat.py 175 | 176 | board = backend.create_tmp_board(str(PATH_TO_EXAMPLE_BOARD.absolute())) 177 | yield board 178 | backend.teardown_board(board) 179 | 180 | 181 | @skip_if_dbc # passes, but skipping since this cannot clean itself up properly 182 | def test_constructor_boards(board, df_csv, tmp_cache): 183 | # TODO: would be nice to have fixtures for each board constructor 184 | # doesn't need to copy over pins-compat content 185 | 186 | # create board from constructor ------------------------------------------- 187 | board = construct_from_board(board) 188 | 189 | # read a pin and check its contents --------------------------------------- 190 | 191 | df = board.pin_read("df_csv") 192 | 193 | # check data 194 | # TODO: update when dbc boards are not read-only 195 | if board.fs.protocol == "dbc": 196 | pass 197 | else: 198 | assert_frame_equal(df, df_csv) 199 | 200 | # check the cache structure ----------------------------------------------- 201 | 202 | # check cache 203 | if board.fs.protocol in ["file", ("file", "local")]: 204 | # no caching for local file boards 205 | pass 206 | else: 207 | # check path structure ---- 208 | 209 | options = list(tmp_cache.glob("*")) 210 | assert len(options) == 1 211 | 212 | cache_dir = options[0] 213 | res = list(cache_dir.rglob("*/*.csv")) 214 | assert len(res) == 1 215 | 216 | check_cache_file_path(res[0], cache_dir) 217 | 218 | # check cache touch on access time ---- 219 | 220 | meta = board.pin_meta("df_csv") 221 | p_cache_meta = ( 222 | Path(board._get_cache_path(meta.name, meta.version.version)) / "data.txt" 223 | ) 224 | orig_access = p_cache_meta.stat().st_atime 225 | 226 | board.pin_meta("df_csv") 227 | 228 | new_access = p_cache_meta.stat().st_atime 229 | 230 | assert orig_access < new_access 231 | 232 | 233 | @pytest.fixture(scope="function") 234 | def board2(backend): 235 | board2 = backend.create_tmp_board() 236 | yield board2 237 | backend.teardown_board(board2) 238 | 239 | 240 | @skip_if_dbc 241 | def test_constructor_boards_multi_user(board2, df_csv, tmp_cache): 242 | prot = board2.fs.protocol 243 | fs_name = prot if isinstance(prot, str) else prot[0] 244 | 245 | if fs_name == "rsc": 246 | # TODO: RSConnect writes pin names like /, so would need to 247 | # modify test 248 | pytest.skip() 249 | elif fs_name == "abfs": 250 | fs_name = "azure" 251 | 252 | first = construct_from_board(board2) 253 | 254 | first.pin_write(df_csv, "df_csv", type="csv") 255 | assert first.pin_list() == ["df_csv"] 256 | 257 | second = construct_from_board(board2) 258 | second.pin_write(df_csv, "another_df_csv", type="csv") 259 | 260 | assert sorted(second.pin_list()) == sorted(["df_csv", "another_df_csv"]) 261 | 262 | 263 | # Board particulars =========================================================== 264 | 265 | 266 | @pytest.mark.skip_on_github 267 | def test_board_constructor_local_default_writable(): 268 | with rm_env("PINS_DATA_DIR"): 269 | board = c.board_local() 270 | p_board = Path(board.board) 271 | 272 | check_dir_writable(p_board) 273 | assert p_board.name == "pins-py" 274 | 275 | 276 | def test_board_constructor_temp_writable(): 277 | with rm_env("PINS_DATA_DIR"): 278 | board = c.board_temp() 279 | p_board = Path(board.board) 280 | 281 | check_dir_writable(p_board) 282 | assert len(list(p_board.glob("*"))) == 0 283 | 284 | 285 | def test_board_constructor_folder(tmp_path: Path, df): 286 | board = c.board_folder(str(tmp_path)) 287 | board.pin_write(df, "some_df", type="csv") 288 | 289 | assert (tmp_path / "some_df").exists() 290 | df2 = board.pin_read("some_df") 291 | 292 | assert df.equals(df2) 293 | 294 | 295 | # Deparsing =================================================================== 296 | 297 | 298 | def test_board_deparse(board): 299 | prot = board.fs.protocol 300 | 301 | with rm_env("CONNECT_API_KEY"): 302 | if prot == "rsc": 303 | os.environ["CONNECT_API_KEY"] = board.fs.api.api_key 304 | 305 | new_board = eval(c.board_deparse(board), c.__dict__) 306 | new_board.pin_list() 307 | -------------------------------------------------------------------------------- /pins/tests/test_drivers.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import Path 4 | 5 | import fsspec 6 | import pandas as pd 7 | import pytest 8 | 9 | from pins._adaptors import create_adaptor 10 | from pins.config import PINS_ENV_INSECURE_READ 11 | from pins.drivers import default_title, load_data, load_path, save_data 12 | from pins.errors import PinsInsecureReadError 13 | from pins.meta import MetaRaw 14 | from pins.tests.helpers import rm_env 15 | 16 | 17 | @pytest.fixture 18 | def some_joblib(tmp_path: Path): 19 | import joblib 20 | 21 | p_obj = tmp_path / "some.joblib" 22 | joblib.dump({"a": 1}, p_obj) 23 | 24 | return p_obj 25 | 26 | 27 | # default title --------------------------------------------------------------- 28 | 29 | 30 | class ExC: 31 | class D: 32 | pass 33 | 34 | 35 | @pytest.mark.parametrize( 36 | "obj, dst_title", 37 | [ 38 | (pd.DataFrame({"x": [1, 2]}), "somename: a pinned 2 x 1 DataFrame"), 39 | (pd.DataFrame({"x": [1], "y": [2]}), "somename: a pinned 1 x 2 DataFrame"), 40 | (ExC(), "somename: a pinned ExC object"), 41 | (ExC().D(), "somename: a pinned ExC.D object"), 42 | ([1, 2, 3], "somename: a pinned list object"), 43 | ], 44 | ) 45 | def test_default_title(obj, dst_title): 46 | res = default_title(obj, "somename") 47 | assert res == dst_title 48 | 49 | 50 | @pytest.mark.parametrize( 51 | "type_", 52 | [ 53 | "csv", 54 | "arrow", 55 | "parquet", 56 | "joblib", 57 | ], 58 | ) 59 | def test_driver_roundtrip(tmp_path: Path, type_): 60 | # TODO: I think this test highlights the challenge of getting the flow 61 | # between metadata, drivers, and the metafactory right. 62 | # There is the name of the data (relative to the pin directory), and the full 63 | # name of data in its temporary directory. 64 | import pandas as pd 65 | 66 | df = pd.DataFrame({"x": [1, 2, 3]}) 67 | 68 | fname = "some_df" 69 | full_file = f"{fname}.{type_}" 70 | 71 | p_obj = tmp_path / fname 72 | res_fname = save_data(df, p_obj, type_) 73 | 74 | assert Path(res_fname).name == full_file 75 | 76 | meta = MetaRaw(full_file, type_, "my_pin") 77 | obj = load_data(meta, fsspec.filesystem("file"), tmp_path, allow_pickle_read=True) 78 | 79 | assert df.equals(obj) 80 | 81 | 82 | @pytest.mark.parametrize( 83 | "type_", 84 | [ 85 | "json", 86 | ], 87 | ) 88 | def test_driver_roundtrip_json(tmp_path: Path, type_): 89 | df = {"x": [1, 2, 3]} 90 | 91 | fname = "some_df" 92 | full_file = f"{fname}.{type_}" 93 | 94 | p_obj = tmp_path / fname 95 | res_fname = save_data(df, p_obj, type_) 96 | 97 | assert Path(res_fname).name == full_file 98 | 99 | meta = MetaRaw(full_file, type_, "my_pin") 100 | obj = load_data(meta, fsspec.filesystem("file"), tmp_path, allow_pickle_read=True) 101 | 102 | assert df == obj 103 | 104 | 105 | def test_driver_feather_write_error(tmp_path: Path): 106 | import pandas as pd 107 | 108 | df = pd.DataFrame({"x": [1, 2, 3]}) 109 | 110 | fname = "some_df" 111 | 112 | p_obj = tmp_path / fname 113 | 114 | with pytest.raises(NotImplementedError) as exc_info: 115 | save_data(df, p_obj, "feather") 116 | 117 | assert '"feather" no longer supported.' in exc_info.value.args[0] 118 | 119 | 120 | def test_driver_feather_read_backwards_compat(tmp_path: Path): 121 | import pandas as pd 122 | 123 | df = pd.DataFrame({"x": [1, 2, 3]}) 124 | 125 | fname = "some_df" 126 | full_file = f"{fname}.feather" 127 | 128 | df.to_feather(tmp_path / full_file) 129 | 130 | obj = load_data( 131 | MetaRaw(full_file, "feather", "my_pin"), fsspec.filesystem("file"), tmp_path 132 | ) 133 | 134 | assert df.equals(obj) 135 | 136 | 137 | def test_driver_pickle_read_fail_explicit(some_joblib): 138 | meta = MetaRaw(some_joblib.name, "joblib", "my_pin") 139 | with pytest.raises(PinsInsecureReadError): 140 | load_data( 141 | meta, fsspec.filesystem("file"), some_joblib.parent, allow_pickle_read=False 142 | ) 143 | 144 | 145 | def test_driver_pickle_read_fail_default(some_joblib): 146 | meta = MetaRaw(some_joblib.name, "joblib", "my_pin") 147 | with rm_env(PINS_ENV_INSECURE_READ), pytest.raises(PinsInsecureReadError): 148 | load_data( 149 | meta, fsspec.filesystem("file"), some_joblib.parent, allow_pickle_read=False 150 | ) 151 | 152 | 153 | def test_driver_apply_suffix_false(tmp_path: Path): 154 | import pandas as pd 155 | 156 | df = pd.DataFrame({"x": [1, 2, 3]}) 157 | 158 | fname = "some_df" 159 | type_ = "csv" 160 | 161 | p_obj = tmp_path / fname 162 | res_fname = save_data(df, p_obj, type_, apply_suffix=False) 163 | 164 | assert Path(res_fname).name == "some_df" 165 | 166 | 167 | class TestSaveData: 168 | def test_accepts_pandas_df(self, tmp_path: Path): 169 | import pandas as pd 170 | 171 | df = pd.DataFrame({"x": [1, 2, 3]}) 172 | result = save_data(df, tmp_path / "some_df", "csv") 173 | assert Path(result) == tmp_path / "some_df.csv" 174 | 175 | def test_accepts_adaptor(self, tmp_path: Path): 176 | import pandas as pd 177 | 178 | df = pd.DataFrame({"x": [1, 2, 3]}) 179 | adaptor = create_adaptor(df) 180 | result = save_data(adaptor, tmp_path / "some_df", "csv") 181 | assert Path(result) == tmp_path / "some_df.csv" 182 | 183 | 184 | class TestLoadFile: 185 | def test_str_file(self): 186 | class _MockMetaStrFile: 187 | file: str = "a" 188 | type: str = "csv" 189 | 190 | assert load_path(_MockMetaStrFile().file, None, _MockMetaStrFile().type) == "a" 191 | 192 | def test_table(self): 193 | class _MockMetaTable: 194 | file: str = "a" 195 | type: str = "table" 196 | 197 | assert load_path(_MockMetaTable().file, None, _MockMetaTable().type) == "data.csv" 198 | 199 | def test_version(self): 200 | class _MockMetaTable: 201 | file: str = "a" 202 | type: str = "csv" 203 | 204 | assert load_path(_MockMetaTable().file, "v1", _MockMetaTable().type) == "v1/a" 205 | -------------------------------------------------------------------------------- /pins/tests/test_meta.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from datetime import datetime 3 | from io import StringIO 4 | 5 | import pytest 6 | import yaml 7 | 8 | from pins.meta import Meta, MetaFactory 9 | from pins.versions import Version 10 | 11 | META_DEFAULTS = { 12 | "title": "some title", 13 | "description": "some description", 14 | "file": "some_file.csv", 15 | "file_size": 3, 16 | "pin_hash": "abcdef", 17 | "created": "20001230T124647Z", 18 | "type": "csv", 19 | "api_version": 1, 20 | "version": Version(datetime(2000, 12, 30, 12, 46, 47), "abcdef"), 21 | } 22 | 23 | 24 | @pytest.fixture 25 | def meta(): 26 | return Meta(**META_DEFAULTS) 27 | 28 | 29 | @pytest.mark.xfail 30 | def test_meta_to_dict_is_recursive(meta): 31 | d_meta = meta.to_dict() 32 | assert d_meta["version"] == meta.version.to_dict() 33 | 34 | 35 | def test_meta_to_pin_dict_roundtrip(meta): 36 | d_meta = meta.to_pin_dict() 37 | meta2 = Meta.from_pin_dict(d_meta, meta.name, meta.version) 38 | assert meta == meta2 39 | 40 | 41 | def test_meta_unknown_fields(): 42 | m = Meta(**META_DEFAULTS, unknown_fields={"some_other_field": 1}) 43 | 44 | assert m.some_other_field == 1 45 | 46 | with pytest.raises(AttributeError): 47 | m.should_not_exist_here 48 | 49 | assert "unknown_fields" not in m.to_pin_dict() 50 | assert "some_other_field" not in m.to_pin_dict() 51 | 52 | 53 | def test_meta_factory_create(): 54 | mf = MetaFactory() 55 | with tempfile.TemporaryDirectory() as tmp_dir: 56 | tmp_file = f"{tmp_dir}/some_name" 57 | with open(tmp_file, "wb") as f: 58 | f.write(b"test") 59 | 60 | kwargs = { 61 | "title": "some title", 62 | "description": "some description", 63 | "user": {}, 64 | "type": "csv", 65 | "name": "some_name", 66 | } 67 | 68 | meta = mf.create(tmp_dir, tmp_file, **kwargs) 69 | 70 | # test that kwargs are passed through ---- 71 | for k, v in kwargs.items(): 72 | assert getattr(meta, k) == v 73 | 74 | # test calculated fields ---- 75 | # TODO(compat): should append suffix to name attr (like in R pins)? 76 | # otherwise, will break cross compat? 77 | assert meta.file == "some_name" 78 | assert meta.file_size == 4 79 | 80 | 81 | def test_meta_factory_read_yaml_roundtrip(meta): 82 | pin_yaml = meta.to_pin_yaml() 83 | 84 | mf = MetaFactory() 85 | meta2 = mf.read_pin_yaml(StringIO(pin_yaml), meta.name, meta.version) 86 | 87 | assert meta == meta2 88 | 89 | 90 | def test_meta_factory_roundtrip_unknown(meta): 91 | meta_dict = meta.to_pin_dict() 92 | meta_dict["some_other_field"] = 1 93 | 94 | pin_yaml = yaml.dump(meta_dict) 95 | 96 | mf = MetaFactory() 97 | 98 | meta2 = mf.read_pin_yaml(StringIO(pin_yaml), meta.name, meta.version) 99 | 100 | assert meta2 == meta 101 | assert meta2.some_other_field == 1 102 | -------------------------------------------------------------------------------- /pins/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pins.config import pins_options 4 | from pins.utils import inform 5 | 6 | 7 | @pytest.fixture 8 | def quiet(): 9 | orig = pins_options.quiet 10 | pins_options.quiet = True 11 | yield 12 | pins_options.quiet = orig 13 | 14 | 15 | def test_inform(capsys): 16 | msg = "a message" 17 | inform(None, msg) 18 | captured = capsys.readouterr() 19 | assert captured.err == msg + "\n" 20 | 21 | 22 | def test_inform_quiet(quiet, capsys): 23 | inform(None, "a message") 24 | captured = capsys.readouterr() 25 | assert captured.err == "" 26 | -------------------------------------------------------------------------------- /pins/tests/test_versions.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from io import BytesIO 3 | 4 | import pytest 5 | import xxhash 6 | 7 | from pins.errors import PinsVersionError 8 | from pins.versions import Version 9 | 10 | EXAMPLE_DATE = datetime(2021, 1, 2, 13, 58, 59) 11 | 12 | 13 | @pytest.fixture 14 | def bytes_(): 15 | return BytesIO(b"123"), xxhash.xxh64(b"123").hexdigest() 16 | 17 | 18 | def test_version_from_string(): 19 | version = Version.from_string("20220209T220116Z-baf3f") 20 | assert str(version.created) == "2022-02-09 22:01:16" 21 | assert version.hash == "baf3f" 22 | 23 | 24 | def test_version_from_string_too_many_hyphens(): 25 | with pytest.raises( 26 | PinsVersionError, match="version string can only have 1 '-', but contains 2" 27 | ): 28 | Version.from_string("20220209T220116Z-baf3f-") 29 | 30 | 31 | def test_version_from_string_too_few_hyphens(): 32 | with pytest.raises( 33 | PinsVersionError, match="version string can only have 1 '-', but contains 0" 34 | ): 35 | Version.from_string("20220209T220116Zbaf3f") 36 | 37 | 38 | def test_version_from_string_baddate(): 39 | with pytest.raises(PinsVersionError, match="Invalid date part of version: bug"): 40 | Version.from_string("bug-baf3f") 41 | 42 | 43 | def test_version_hash_file(bytes_): 44 | f_bytes, digest = bytes_ 45 | assert Version.hash_file(f_bytes) == digest 46 | 47 | 48 | def test_version_from_files(bytes_): 49 | f_bytes, digest = bytes_ 50 | v = Version.from_files([f_bytes], EXAMPLE_DATE) 51 | 52 | assert v.hash == digest 53 | assert v.created == EXAMPLE_DATE 54 | -------------------------------------------------------------------------------- /pins/utils.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import sys 4 | from functools import update_wrapper 5 | from types import MethodType 6 | from warnings import warn 7 | 8 | from .config import pins_options 9 | 10 | 11 | def inform(log, msg): 12 | if log is not None: 13 | log.info(msg) 14 | 15 | if not pins_options.quiet: 16 | print(msg, file=sys.stderr) 17 | 18 | 19 | def warn_deprecated(msg): 20 | warn(msg, DeprecationWarning) 21 | 22 | 23 | def hash_name(path, same_name): 24 | if same_name: 25 | _hash = os.path.basename(path) 26 | else: 27 | _hash = hashlib.sha256(path.encode()).hexdigest() 28 | return _hash 29 | 30 | 31 | class ExtendMethodDoc: 32 | # Note that the indentation assumes these are top-level method docstrings, 33 | # so are indented 8 spaces (after the initial sentence). 34 | template = """\ 35 | {current_doc} 36 | 37 | Parent method documentation: 38 | 39 | {parent_doc} 40 | """ 41 | 42 | def __init__(self, func): 43 | self.func = func 44 | 45 | # allows sphinx to add the method signature to the docs 46 | # this is pretty benign, since it's very hard to call a descriptor 47 | # after class initialization (where __set_name__ is called). 48 | self.__call__ = func 49 | 50 | def __set_name__(self, owner, name): 51 | bound_parent_meth = getattr(super(owner, owner), name) 52 | 53 | self._parent_doc = bound_parent_meth.__doc__ 54 | self._orig_doc = self.func.__doc__ 55 | 56 | if self._orig_doc is not None: 57 | # update the docstring of the subclass method to include parent doc. 58 | self.func.__doc__ = self.template.format( 59 | current_doc=self._orig_doc, parent_doc=self._parent_doc 60 | ) 61 | 62 | # make descriptor look like wrapped function 63 | update_wrapper( 64 | self, self.func, ("__doc__", "__name__", "__module__", "__qualname__") 65 | ) 66 | 67 | def __get__(self, obj, objtype=None): 68 | if obj is None: 69 | # accessing from class, return descriptor itself. 70 | return self 71 | 72 | # accessing from instance 73 | return MethodType(self.func, obj) 74 | 75 | def __call__(self, *args, **kwargs): 76 | # this is defined, so that callable(ExtendMethodDoc(...)) is True, 77 | # which allows all the inspect machinery to give sphinx the __call__ 78 | # attribute we set in __init__. 79 | raise NotImplementedError() 80 | 81 | 82 | # based off fsspec.isfilelike 83 | def isfilelike(file) -> bool: 84 | for attr in ["read", "close", "tell"]: 85 | if not hasattr(file, attr): 86 | return False 87 | return True 88 | -------------------------------------------------------------------------------- /pins/versions.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | from collections.abc import Mapping, Sequence 5 | from dataclasses import asdict, dataclass 6 | from datetime import datetime 7 | from pathlib import Path 8 | 9 | from xxhash import xxh64 10 | 11 | from ._types import IOBase, StrOrFile 12 | from .errors import PinsVersionError 13 | 14 | _log = logging.getLogger(__name__) 15 | 16 | VERSION_TIME_FORMAT = "%Y%m%dT%H%M%SZ" 17 | 18 | 19 | class _VersionBase: 20 | pass 21 | 22 | 23 | @dataclass 24 | class VersionRaw(_VersionBase): 25 | version: str 26 | 27 | def to_dict(self) -> Mapping: 28 | return asdict(self) 29 | 30 | 31 | @dataclass 32 | class Version(_VersionBase): 33 | created: datetime 34 | hash: str 35 | 36 | def to_dict(self) -> Mapping: 37 | # properties not automatically added, so need to handle manually 38 | res = asdict(self) 39 | res["version"] = self.version 40 | 41 | return res 42 | 43 | @property 44 | def version(self) -> str: 45 | date_part = self.created.strftime(VERSION_TIME_FORMAT) 46 | hash_part = self.hash[:5] 47 | return f"{date_part}-{hash_part}" 48 | 49 | @staticmethod 50 | def parse_created(x): 51 | return datetime.strptime(x, VERSION_TIME_FORMAT) 52 | 53 | def render_created(self): 54 | return self.created.strftime(VERSION_TIME_FORMAT) 55 | 56 | @staticmethod 57 | def hash_file(f: IOBase, block_size: int = -1) -> str: 58 | # TODO: what kind of things implement the "buffer API"? 59 | hasher = xxh64() 60 | buf = f.read(block_size) 61 | while len(buf) > 0: 62 | hasher.update(buf) 63 | buf = f.read(block_size) 64 | 65 | return hasher.hexdigest() 66 | 67 | @classmethod 68 | def from_string(cls, version: str) -> Version: 69 | parts = version.split("-") 70 | 71 | if len(parts) != 2: 72 | raise PinsVersionError( 73 | f"version string can only have 1 '-', but contains {len(parts) - 1}" 74 | ) 75 | 76 | dt_string, hash_ = parts 77 | 78 | # TODO: the datetime from pins is not timezone aware, but it looks like 79 | # R pins parses as UTC, then unsets the UTC part? 80 | try: 81 | created = cls.parse_created(dt_string) 82 | except ValueError: 83 | raise PinsVersionError(f"Invalid date part of version: {dt_string}") 84 | 85 | obj = cls(created, hash_) 86 | 87 | if obj.version != version: 88 | raise ValueError( 89 | "Version parsing failed. Received version string {version}, but " 90 | "output version is {cls.version}." 91 | ) 92 | 93 | return obj 94 | 95 | @classmethod 96 | def from_files( 97 | cls, files: Sequence[StrOrFile], created: datetime | None = None 98 | ) -> Version: 99 | hashes = [] 100 | for f in files: 101 | hash_ = cls.hash_file(open(f, "rb") if isinstance(f, (str, Path)) else f) 102 | hashes.append(hash_) 103 | 104 | if created is None: 105 | created = datetime.now() 106 | 107 | if len(hashes) > 1: 108 | # Combine the hashes into a single string 109 | combined_hashes = "".join(hashes) 110 | 111 | # Create an xxh64 hash of the combined string 112 | hashes = [xxh64(combined_hashes).hexdigest()] 113 | 114 | return cls(created, hashes[0]) 115 | 116 | @classmethod 117 | def from_meta_fields(cls, created: str, hash: str): 118 | created_dt = cls.parse_created(created) 119 | return cls(created_dt, hash) 120 | 121 | 122 | def guess_version(x: str): 123 | try: 124 | return Version.from_string(x) 125 | except PinsVersionError: 126 | return VersionRaw(x) 127 | 128 | 129 | def version_setup(board, name, new_version, versioned): 130 | if board.pin_exists(name): 131 | versions_df = board.pin_versions(name, as_df=True) 132 | versions = versions_df["version"].to_list() 133 | old_version = versions[-1] 134 | n_versions = len(versions) 135 | 136 | else: 137 | n_versions = 0 138 | 139 | # if pin does not have version specified, see if multiple pins on board/board's version 140 | if versioned is None: 141 | versioned = True if n_versions > 1 else board.versioned 142 | 143 | if versioned or n_versions == 0: 144 | _log.info(f"Creating new version '{new_version}'") 145 | elif n_versions == 1: 146 | _log.info(f"Replacing version '{old_version}' with '{new_version}'") 147 | board.pin_version_delete(name, old_version) 148 | else: 149 | raise PinsVersionError( 150 | "Pin is versioned, but you have requested a write without versions." 151 | "To un-version a pin, you must delete it" 152 | ) 153 | 154 | return new_version 155 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pins" 3 | description = "Publish data sets, models, and other python objects, making it easy to share them across projects and with your colleagues." 4 | readme.content-type = "text/markdown" 5 | readme.file = "README.md" 6 | urls.Documentation = "https://rstudio.github.io/pins-python" 7 | urls.Homepage = "https://github.com/rstudio/pins-python" 8 | authors = [{ name = "Michael Chow", email = "michael.chow@posit.co" }] 9 | maintainers = [{ name = "Isabel Zimmerman", email = "isabel.zimmerman@posit.co" }] 10 | keywords = ["data", "tidyverse"] 11 | classifiers = [ 12 | "Programming Language :: Python :: 3 :: Only", 13 | "Programming Language :: Python :: 3.9", 14 | "Programming Language :: Python :: 3.10", 15 | "Programming Language :: Python :: 3.11", 16 | "Programming Language :: Python :: 3.12", 17 | "Programming Language :: Python :: 3.13", 18 | "License :: OSI Approved :: MIT License", 19 | ] 20 | requires-python = ">=3.9" 21 | dynamic = ["version"] 22 | dependencies = [ 23 | "appdirs<2", # Using appdirs rather than platformdirs is deliberate, see https://github.com/rstudio/pins-python/pull/239 24 | "fsspec>=2022.2", 25 | "humanize>=1", 26 | "importlib-metadata>=4.4", 27 | "importlib-resources>=1.3", 28 | "jinja2>=2.10", 29 | "joblib>=0.12", 30 | "pandas>=0.23", 31 | "pyyaml>=3.13", 32 | "requests", 33 | "xxhash>=1", 34 | "databackend>=0.0.3", 35 | "typing_extensions" 36 | ] 37 | 38 | [project.optional-dependencies] 39 | aws = ["s3fs"] 40 | azure = ["adlfs"] 41 | check = [ 42 | "pre-commit", 43 | "pyright==1.1.372", # Pinned; manually sync with .github/workflows/code-checks.yml 44 | "ruff==0.5.4", # Pinned; manually sync with pre-commit-config.yaml 45 | "types-appdirs", 46 | "databricks-sdk" 47 | ] 48 | databricks = ["databricks-sdk"] 49 | doc = [ 50 | "ipykernel", 51 | "ipython<=8.12", 52 | "nbclient", 53 | "nbformat", 54 | "quartodoc", 55 | ] 56 | gcs = ["gcsfs"] 57 | test = [ 58 | "adlfs>=2024.4.1", 59 | "fastparquet", 60 | "gcsfs", 61 | "pip-tools", 62 | "pyarrow", 63 | "pytest==7.1.3", 64 | "pytest-cases", 65 | "pytest-dotenv", 66 | "pytest-parallel", 67 | "s3fs", 68 | "rdata", 69 | "databricks-sdk", 70 | ] 71 | 72 | [build-system] 73 | requires = ["setuptools>=45", "setuptools-scm>=6.2", "wheel"] 74 | build-backend = "setuptools.build_meta" 75 | 76 | [tool.setuptools] 77 | include-package-data = true 78 | 79 | [tool.setuptools.packages] 80 | find = { namespaces = false } 81 | 82 | [tool.setuptools_scm] 83 | 84 | [tool.distutils.bdist_wheel] 85 | universal = 1 86 | 87 | [tool.pytest.ini_options] 88 | markers = [ 89 | "fs_file: mark test to only run on local filesystem", 90 | "fs_s3: mark test to only run on AWS S3 bucket filesystem", 91 | "fs_gcs: mark test to only run on Google Cloud Storage bucket filesystem", 92 | "fs_abfs: mark test to only run on Azure Datalake filesystem", 93 | "fs_rsc: mark test to only run on Posit Connect filesystem", 94 | "fs_dbc: mark test to only run on Databricks Volume filesystem", 95 | "skip_on_github: skip this test if running on GitHub", 96 | ] 97 | testpaths = ["pins"] 98 | addopts = "--doctest-modules" 99 | doctest_optionflags = "NORMALIZE_WHITESPACE" 100 | 101 | [tool.pyright] 102 | include = ["pins"] 103 | exclude = ["**/__pycache__"] 104 | ignore = ["pins/tests"] 105 | pythonVersion = "3.12" # Use the maximum version supported by python-pins 106 | pythonPlatform = "Linux" 107 | 108 | # Tracking compliance with these rules at https://github.com/rstudio/pins-python/issues/272 109 | reportArgumentType = false 110 | reportAttributeAccessIssue = false 111 | reportCallIssue = false 112 | reportIncompatibleMethodOverride = false 113 | reportMissingTypeStubs = false 114 | reportOptionalMemberAccess = false 115 | reportOptionalSubscript = false 116 | reportPossiblyUnboundVariable = false 117 | reportReturnType = false 118 | 119 | [tool.ruff] 120 | line-length = 90 121 | extend-exclude = ["docs"] 122 | 123 | [tool.ruff.lint] 124 | select = [ 125 | "E", # Style 126 | "F", # Errors 127 | "FA", # Use from __future__ import annotations for cleaner type hints 128 | "I", # Import sorting 129 | "UP", # Upgrade to latest supported Python syntax 130 | "W", # Style 131 | "A", # Don't shadow built-ins 132 | ] 133 | ignore = [ 134 | "E501", # Line too long 135 | "A002", # The pins interface includes builtin names in args, e.g. hash, id, etc. 136 | ] 137 | 138 | [tool.codespell] 139 | skip = ["*.js"] 140 | -------------------------------------------------------------------------------- /requirements/dev.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --extra=check --extra=doc --extra=test --output-file=- --strip-extras pyproject.toml 6 | # 7 | adlfs==2024.12.0 8 | # via pins (pyproject.toml) 9 | aiobotocore==2.22.0 10 | # via s3fs 11 | aiohappyeyeballs==2.6.1 12 | # via aiohttp 13 | aiohttp==3.12.7 14 | # via 15 | # adlfs 16 | # aiobotocore 17 | # gcsfs 18 | # s3fs 19 | aioitertools==0.12.0 20 | # via aiobotocore 21 | aiosignal==1.3.2 22 | # via aiohttp 23 | annotated-types==0.7.0 24 | # via pydantic 25 | appdirs==1.4.4 26 | # via pins (pyproject.toml) 27 | appnope==0.1.4 28 | # via 29 | # ipykernel 30 | # ipython 31 | asttokens==3.0.0 32 | # via stack-data 33 | attrs==25.3.0 34 | # via 35 | # aiohttp 36 | # jsonschema 37 | # pytest 38 | # referencing 39 | # sphobjinv 40 | azure-core==1.34.0 41 | # via 42 | # adlfs 43 | # azure-identity 44 | # azure-storage-blob 45 | azure-datalake-store==0.0.53 46 | # via adlfs 47 | azure-identity==1.23.0 48 | # via adlfs 49 | azure-storage-blob==12.25.1 50 | # via adlfs 51 | backcall==0.2.0 52 | # via ipython 53 | beartype==0.21.0 54 | # via plum-dispatch 55 | black==25.1.0 56 | # via quartodoc 57 | botocore==1.37.3 58 | # via aiobotocore 59 | build==1.2.2.post1 60 | # via pip-tools 61 | cachetools==5.5.2 62 | # via google-auth 63 | certifi==2025.4.26 64 | # via 65 | # requests 66 | # sphobjinv 67 | cffi==1.17.1 68 | # via 69 | # azure-datalake-store 70 | # cryptography 71 | cfgv==3.4.0 72 | # via pre-commit 73 | charset-normalizer==3.4.2 74 | # via requests 75 | click==8.2.1 76 | # via 77 | # black 78 | # pip-tools 79 | # quartodoc 80 | colorama==0.4.6 81 | # via griffe 82 | comm==0.2.2 83 | # via ipykernel 84 | cramjam==2.10.0 85 | # via fastparquet 86 | cryptography==45.0.3 87 | # via 88 | # azure-identity 89 | # azure-storage-blob 90 | # msal 91 | # pyjwt 92 | databackend==0.0.3 93 | # via pins (pyproject.toml) 94 | databricks-sdk==0.55.0 95 | # via pins (pyproject.toml) 96 | debugpy==1.8.14 97 | # via ipykernel 98 | decopatch==1.4.10 99 | # via pytest-cases 100 | decorator==5.2.1 101 | # via 102 | # gcsfs 103 | # ipython 104 | distlib==0.3.9 105 | # via virtualenv 106 | executing==2.2.0 107 | # via stack-data 108 | fastjsonschema==2.21.1 109 | # via nbformat 110 | fastparquet==2024.11.0 111 | # via pins (pyproject.toml) 112 | filelock==3.18.0 113 | # via virtualenv 114 | frozenlist==1.6.2 115 | # via 116 | # aiohttp 117 | # aiosignal 118 | fsspec==2025.5.1 119 | # via 120 | # adlfs 121 | # fastparquet 122 | # gcsfs 123 | # pins (pyproject.toml) 124 | # s3fs 125 | gcsfs==2025.5.1 126 | # via pins (pyproject.toml) 127 | google-api-core==2.25.0 128 | # via 129 | # google-cloud-core 130 | # google-cloud-storage 131 | google-auth==2.40.2 132 | # via 133 | # databricks-sdk 134 | # gcsfs 135 | # google-api-core 136 | # google-auth-oauthlib 137 | # google-cloud-core 138 | # google-cloud-storage 139 | google-auth-oauthlib==1.2.2 140 | # via gcsfs 141 | google-cloud-core==2.4.3 142 | # via google-cloud-storage 143 | google-cloud-storage==3.1.0 144 | # via gcsfs 145 | google-crc32c==1.7.1 146 | # via 147 | # google-cloud-storage 148 | # google-resumable-media 149 | google-resumable-media==2.7.2 150 | # via google-cloud-storage 151 | googleapis-common-protos==1.70.0 152 | # via google-api-core 153 | griffe==1.7.3 154 | # via quartodoc 155 | humanize==4.12.3 156 | # via pins (pyproject.toml) 157 | identify==2.6.12 158 | # via pre-commit 159 | idna==3.10 160 | # via 161 | # requests 162 | # yarl 163 | importlib-metadata==8.7.0 164 | # via 165 | # pins (pyproject.toml) 166 | # quartodoc 167 | importlib-resources==6.5.2 168 | # via 169 | # pins (pyproject.toml) 170 | # quartodoc 171 | iniconfig==2.1.0 172 | # via pytest 173 | ipykernel==6.29.5 174 | # via pins (pyproject.toml) 175 | ipython==8.12.0 176 | # via 177 | # ipykernel 178 | # pins (pyproject.toml) 179 | isodate==0.7.2 180 | # via azure-storage-blob 181 | jedi==0.19.2 182 | # via ipython 183 | jinja2==3.1.6 184 | # via pins (pyproject.toml) 185 | jmespath==1.0.1 186 | # via 187 | # aiobotocore 188 | # botocore 189 | joblib==1.5.1 190 | # via pins (pyproject.toml) 191 | jsonschema==4.24.0 192 | # via 193 | # nbformat 194 | # sphobjinv 195 | jsonschema-specifications==2025.4.1 196 | # via jsonschema 197 | jupyter-client==8.6.3 198 | # via 199 | # ipykernel 200 | # nbclient 201 | jupyter-core==5.8.1 202 | # via 203 | # ipykernel 204 | # jupyter-client 205 | # nbclient 206 | # nbformat 207 | makefun==1.16.0 208 | # via 209 | # decopatch 210 | # pytest-cases 211 | markdown-it-py==3.0.0 212 | # via rich 213 | markupsafe==3.0.2 214 | # via jinja2 215 | matplotlib-inline==0.1.7 216 | # via 217 | # ipykernel 218 | # ipython 219 | mdurl==0.1.2 220 | # via markdown-it-py 221 | msal==1.32.3 222 | # via 223 | # azure-datalake-store 224 | # azure-identity 225 | # msal-extensions 226 | msal-extensions==1.3.1 227 | # via azure-identity 228 | multidict==6.4.4 229 | # via 230 | # aiobotocore 231 | # aiohttp 232 | # yarl 233 | mypy-extensions==1.1.0 234 | # via black 235 | nbclient==0.10.2 236 | # via pins (pyproject.toml) 237 | nbformat==5.10.4 238 | # via 239 | # nbclient 240 | # pins (pyproject.toml) 241 | nest-asyncio==1.6.0 242 | # via ipykernel 243 | nodeenv==1.9.1 244 | # via 245 | # pre-commit 246 | # pyright 247 | numpy==2.2.6 248 | # via 249 | # fastparquet 250 | # pandas 251 | # rdata 252 | # xarray 253 | oauthlib==3.2.2 254 | # via requests-oauthlib 255 | packaging==25.0 256 | # via 257 | # black 258 | # build 259 | # fastparquet 260 | # ipykernel 261 | # pytest 262 | # pytest-cases 263 | # xarray 264 | pandas==2.2.3 265 | # via 266 | # fastparquet 267 | # pins (pyproject.toml) 268 | # rdata 269 | # xarray 270 | parso==0.8.4 271 | # via jedi 272 | pathspec==0.12.1 273 | # via black 274 | pexpect==4.9.0 275 | # via ipython 276 | pickleshare==0.7.5 277 | # via ipython 278 | pip-tools==7.4.1 279 | # via pins (pyproject.toml) 280 | platformdirs==4.3.8 281 | # via 282 | # black 283 | # jupyter-core 284 | # virtualenv 285 | pluggy==1.6.0 286 | # via pytest 287 | plum-dispatch==2.5.7 288 | # via quartodoc 289 | pre-commit==4.2.0 290 | # via pins (pyproject.toml) 291 | prompt-toolkit==3.0.51 292 | # via ipython 293 | propcache==0.3.1 294 | # via 295 | # aiohttp 296 | # yarl 297 | proto-plus==1.26.1 298 | # via google-api-core 299 | protobuf==6.31.1 300 | # via 301 | # google-api-core 302 | # googleapis-common-protos 303 | # proto-plus 304 | psutil==7.0.0 305 | # via ipykernel 306 | ptyprocess==0.7.0 307 | # via pexpect 308 | pure-eval==0.2.3 309 | # via stack-data 310 | py==1.11.0 311 | # via pytest 312 | pyarrow==20.0.0 313 | # via pins (pyproject.toml) 314 | pyasn1==0.6.1 315 | # via 316 | # pyasn1-modules 317 | # rsa 318 | pyasn1-modules==0.4.2 319 | # via google-auth 320 | pycparser==2.22 321 | # via cffi 322 | pydantic==2.11.5 323 | # via quartodoc 324 | pydantic-core==2.33.2 325 | # via pydantic 326 | pygments==2.19.1 327 | # via 328 | # ipython 329 | # rich 330 | pyjwt==2.10.1 331 | # via 332 | # msal 333 | # pyjwt 334 | pyproject-hooks==1.2.0 335 | # via 336 | # build 337 | # pip-tools 338 | pyright==1.1.372 339 | # via pins (pyproject.toml) 340 | pytest==7.1.3 341 | # via 342 | # pins (pyproject.toml) 343 | # pytest-dotenv 344 | # pytest-parallel 345 | pytest-cases==3.8.6 346 | # via pins (pyproject.toml) 347 | pytest-dotenv==0.5.2 348 | # via pins (pyproject.toml) 349 | pytest-parallel==0.1.1 350 | # via pins (pyproject.toml) 351 | python-dateutil==2.9.0.post0 352 | # via 353 | # aiobotocore 354 | # botocore 355 | # jupyter-client 356 | # pandas 357 | python-dotenv==1.1.0 358 | # via pytest-dotenv 359 | pytz==2025.2 360 | # via pandas 361 | pyyaml==6.0.2 362 | # via 363 | # pins (pyproject.toml) 364 | # pre-commit 365 | # quartodoc 366 | pyzmq==26.4.0 367 | # via 368 | # ipykernel 369 | # jupyter-client 370 | quartodoc==0.10.0 371 | # via pins (pyproject.toml) 372 | rdata==0.11.2 373 | # via pins (pyproject.toml) 374 | referencing==0.36.2 375 | # via 376 | # jsonschema 377 | # jsonschema-specifications 378 | requests==2.32.3 379 | # via 380 | # azure-core 381 | # azure-datalake-store 382 | # databricks-sdk 383 | # gcsfs 384 | # google-api-core 385 | # google-cloud-storage 386 | # msal 387 | # pins (pyproject.toml) 388 | # quartodoc 389 | # requests-oauthlib 390 | requests-oauthlib==2.0.0 391 | # via google-auth-oauthlib 392 | rich==14.0.0 393 | # via plum-dispatch 394 | rpds-py==0.25.1 395 | # via 396 | # jsonschema 397 | # referencing 398 | rsa==4.9.1 399 | # via google-auth 400 | ruff==0.5.4 401 | # via pins (pyproject.toml) 402 | s3fs==2025.5.1 403 | # via pins (pyproject.toml) 404 | six==1.17.0 405 | # via 406 | # azure-core 407 | # python-dateutil 408 | sphobjinv==2.3.1.3 409 | # via quartodoc 410 | stack-data==0.6.3 411 | # via ipython 412 | tabulate==0.9.0 413 | # via quartodoc 414 | tblib==3.1.0 415 | # via pytest-parallel 416 | tomli==2.2.1 417 | # via pytest 418 | tornado==6.5.1 419 | # via 420 | # ipykernel 421 | # jupyter-client 422 | traitlets==5.14.3 423 | # via 424 | # comm 425 | # ipykernel 426 | # ipython 427 | # jupyter-client 428 | # jupyter-core 429 | # matplotlib-inline 430 | # nbclient 431 | # nbformat 432 | types-appdirs==1.4.3.5 433 | # via pins (pyproject.toml) 434 | typing-extensions==4.14.0 435 | # via 436 | # azure-core 437 | # azure-identity 438 | # azure-storage-blob 439 | # pins (pyproject.toml) 440 | # plum-dispatch 441 | # pydantic 442 | # pydantic-core 443 | # quartodoc 444 | # rdata 445 | # referencing 446 | # typing-inspection 447 | typing-inspection==0.4.1 448 | # via pydantic 449 | tzdata==2025.2 450 | # via pandas 451 | urllib3==2.4.0 452 | # via 453 | # botocore 454 | # requests 455 | virtualenv==20.31.2 456 | # via pre-commit 457 | watchdog==6.0.0 458 | # via quartodoc 459 | wcwidth==0.2.13 460 | # via prompt-toolkit 461 | wheel==0.45.1 462 | # via pip-tools 463 | wrapt==1.17.2 464 | # via aiobotocore 465 | xarray==2025.4.0 466 | # via rdata 467 | xxhash==3.5.0 468 | # via pins (pyproject.toml) 469 | yarl==1.20.0 470 | # via aiohttp 471 | zipp==3.22.0 472 | # via importlib-metadata 473 | 474 | # The following packages are considered to be unsafe in a requirements file: 475 | # pip 476 | # setuptools 477 | -------------------------------------------------------------------------------- /requirements/minimum.txt: -------------------------------------------------------------------------------- 1 | fsspec==2022.2.0 2 | xxhash==1.0.0 3 | pandas==0.23.0 4 | jinja2==2.10.0 5 | joblib==0.12.0 6 | importlib-metadata==4.4 7 | importlib-resources==1.3 8 | appdirs<2.0.0 9 | humanize==1.0.0 10 | databackend==0.0.3 11 | -------------------------------------------------------------------------------- /script/ci-compat-check/.gitignore: -------------------------------------------------------------------------------- 1 | tmp 2 | -------------------------------------------------------------------------------- /script/ci-compat-check/Makefile: -------------------------------------------------------------------------------- 1 | BOARD_BASE_DIR=tmp 2 | BOARD_PY=$(BOARD_BASE_DIR)/board-py 3 | BOARD_R=$(BOARD_BASE_DIR)/board-r 4 | 5 | all: validate 6 | 7 | clean: 8 | rm -r $(BOARD_PY) $(BOARD_R) 9 | 10 | validate: $(BOARD_PY) $(BOARD_R) 11 | @echo "\n\nRUNNING R PINS ---\n" 12 | Rscript validate_py_to_r.R $(BOARD_PY) $(BOARD_R) 13 | @echo "\n\nRUNNING PYTHON PINS ---\n" 14 | python validate_r_to_py.py $(BOARD_PY) $(BOARD_R) 15 | 16 | $(BOARD_PY): dump_py_pins.py 17 | python dump_py_pins.py $@ 18 | 19 | $(BOARD_R): dump_r_pins.R 20 | Rscript dump_r_pins.R $@ 21 | -------------------------------------------------------------------------------- /script/ci-compat-check/dump_py_pins.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from pins import board_folder 4 | from pins.data import mtcars 5 | 6 | if len(sys.argv) < 2: 7 | raise ValueError("must pass board location as command-line argument") 8 | else: 9 | BOARD_PATH = sys.argv[1] 10 | 11 | board = board_folder(BOARD_PATH) 12 | board.pin_write(mtcars, "mtcars", type="csv") 13 | -------------------------------------------------------------------------------- /script/ci-compat-check/dump_r_pins.R: -------------------------------------------------------------------------------- 1 | library(pins) 2 | args <- commandArgs(trailingOnly=TRUE) 3 | 4 | board <- board_folder(args[1]) 5 | board %>% pin_write(mtcars, "mtcars", type="csv") 6 | -------------------------------------------------------------------------------- /script/ci-compat-check/validate_py_to_r.R: -------------------------------------------------------------------------------- 1 | library(pins) 2 | 3 | args <- commandArgs(trailingOnly=TRUE) 4 | 5 | 6 | # create board ---- 7 | 8 | board_py <- board_folder(args[1]) 9 | board_r <- board_folder(args[2]) 10 | 11 | 12 | # check pins ---- 13 | 14 | cat("Checking mtcars pin\n") 15 | 16 | res_mtcars <- board_py %>% pin_read("mtcars") 17 | stopifnot(all.equal(res_mtcars, datasets::mtcars, check.attributes=FALSE)) 18 | 19 | meta_mtcars_py <- board_py %>% pin_meta("mtcars") 20 | cat("\nPython meta:\n\n") 21 | print(meta_mtcars_py) 22 | 23 | meta_mtcars_r <- board_r %>% pin_meta("mtcars") 24 | cat("\nR meta:\n\n") 25 | print(meta_mtcars_r) 26 | -------------------------------------------------------------------------------- /script/ci-compat-check/validate_r_to_py.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from pins import board_folder, data 4 | 5 | path_py, path_r = sys.argv[1], sys.argv[2] 6 | 7 | # create board ---- 8 | 9 | board_py = board_folder(path_py) 10 | board_r = board_folder(path_r) 11 | 12 | 13 | # check pins ---- 14 | 15 | print("Checking mtcars pin") 16 | 17 | res_mtcars = board_r.pin_read("mtcars") 18 | assert res_mtcars.equals(data.mtcars) 19 | 20 | meta_mtcars_py = board_py.pin_meta("mtcars") 21 | print("\nPython meta:\n") 22 | print(meta_mtcars_py) 23 | 24 | meta_mtcars_r = board_r.pin_meta("mtcars") 25 | print("\nR meta:\n") 26 | print(meta_mtcars_r) 27 | -------------------------------------------------------------------------------- /script/setup-rsconnect/add-users.sh: -------------------------------------------------------------------------------- 1 | awk ' { system("useradd -m -s /bin/bash "$1); system("echo \""$1":"$2"\" | chpasswd"); system("id "$1) } ' /etc/users.txt 2 | -------------------------------------------------------------------------------- /script/setup-rsconnect/dump_api_keys.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | 4 | from pins.rsconnect.api import _HackyConnect 5 | 6 | OUT_FILE = sys.argv[1] 7 | 8 | 9 | def get_api_key(user, password, email): 10 | rsc = _HackyConnect("http://localhost:3939") 11 | 12 | return rsc.create_first_admin(user, password, email).api_key 13 | 14 | 15 | api_keys = { 16 | "admin": get_api_key("admin", "admin0", "admin@example.com"), 17 | "susan": get_api_key("susan", "susan", "susan@example.com"), 18 | "derek": get_api_key("derek", "derek", "derek@example.com"), 19 | } 20 | 21 | json.dump(api_keys, open(OUT_FILE, "w")) 22 | -------------------------------------------------------------------------------- /script/setup-rsconnect/rstudio-connect.gcfg: -------------------------------------------------------------------------------- 1 | [Server] 2 | DataDir = /data 3 | Address = http://localhost:3939 4 | 5 | [HTTP] 6 | Listen = :3939 7 | 8 | [Authentication] 9 | Provider = pam 10 | 11 | [Authorization] 12 | DefaultUserRole = publisher 13 | 14 | [Python] 15 | Enabled = false 16 | 17 | [RPackageRepository "CRAN"] 18 | URL = https://packagemanager.rstudio.com/cran/__linux__/bionic/latest 19 | 20 | [RPackageRepository "RSPM"] 21 | URL = https://packagemanager.rstudio.com/cran/__linux__/bionic/latest 22 | -------------------------------------------------------------------------------- /script/setup-rsconnect/users.txt: -------------------------------------------------------------------------------- 1 | admin admin0 2 | test test 3 | susan susan 4 | derek derek 5 | -------------------------------------------------------------------------------- /script/stage_example_bundle.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pandas as pd 4 | 5 | from pins.meta import MetaFactory 6 | from pins.rsconnect.fs import PinBundleManifest 7 | 8 | p_root = Path("pins/tests/example-bundle") 9 | p_root.parent.mkdir(parents=True, exist_ok=True) 10 | 11 | p_index = p_root / "index.html" 12 | p_index.write_text("yo") 13 | 14 | p_data = p_root / "data_frame.csv" 15 | df = pd.DataFrame({"x": [1, 2, 3]}) 16 | df.to_csv(p_data) 17 | 18 | p_meta = p_root / "data.txt" 19 | meta = MetaFactory().create(str(p_data), "csv", title="some title", name="data_frame.csv") 20 | meta.to_yaml(p_meta.open("w")) 21 | 22 | # add manifest last, since it enumerates all the files 23 | # this lets you download them individually from rsconnect 24 | PinBundleManifest.add_manifest_to_directory(str(p_root)) 25 | -------------------------------------------------------------------------------- /script/stage_r_pins.R: -------------------------------------------------------------------------------- 1 | library(pins) 2 | 3 | df <- data.frame(x = 1:2, y = c("a", "b")) 4 | df_v2 <- data.frame(x = 1:2, y = c("a", "b"), z = 3:4) 5 | 6 | #board <- board_s3("ci-pins", prefix = "r-pins-test") 7 | board <- board_folder("pins/tests/pins-compat", versioned=TRUE) 8 | 9 | all_pins <- board %>% pin_list() 10 | board %>% pin_delete(all_pins) 11 | 12 | # write two versions of df as CSV ---- 13 | board %>% pin_write(df, "df_csv", type="csv") 14 | Sys.sleep(2) 15 | board %>% pin_write(df_v2, "df_csv", type="csv") 16 | 17 | # write two versions of df as arrow ---- 18 | board %>% pin_write(df, "df_arrow", type="arrow") 19 | 20 | # write two versions of df as RDS ---- 21 | board %>% pin_write(df, "df_rds", type="rds") 22 | 23 | # write unversioned pin as CSV 24 | board %>% pin_write(df, "df_unversioned", versioned=FALSE) 25 | -------------------------------------------------------------------------------- /script/stage_r_pins_old_types.R: -------------------------------------------------------------------------------- 1 | cache = tempfile() 2 | board_register_local(cache = cache) 3 | 4 | some_df = data.frame(a = 1:2, b = c("x","y")) 5 | pin(some_df, name="a-table") 6 | 7 | # note that pin automatically changes _ to - 8 | # TODO: for now manually copying into pins/tests/pins-old-types 9 | # Note that a trivial version name, v, is used to check the reading behavior 10 | # since pins v0 does not save versions 11 | # >>> mkdir pins/tests/pins-old-types/a-table/v/ 12 | # >>> cp -r pins/tests/pins-old-types/a-table/v/ 13 | fs::path(cache, "a-table") 14 | --------------------------------------------------------------------------------