├── .env.dev
├── .github
    ├── CODE_OF_CONDUCT.md
    └── workflows
    │   ├── ci.yml
    │   ├── code-checks.yml
    │   └── cross-compat.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── MAINTAINERS.md
├── MANIFEST.in
├── Makefile
├── README.md
├── README.qmd
├── binder
    ├── postBuild
    ├── requirements.txt
    └── runtime.txt
├── docker-compose.yml
├── docs
    ├── .gitignore
    ├── _extensions
    │   └── machow
    │   │   └── interlinks
    │   │       ├── .gitignore
    │   │       ├── _extension.yml
    │   │       └── interlinks.lua
    ├── _quarto.yml
    ├── customize-pins-metadata.qmd
    ├── favicon.ico
    ├── get_started.qmd
    ├── index.qmd
    └── logo.png
├── pins
    ├── __init__.py
    ├── _adaptors.py
    ├── _types.py
    ├── boards.py
    ├── cache.py
    ├── config.py
    ├── constructors.py
    ├── data
    │   ├── __init__.py
    │   └── mtcars.csv
    ├── databricks
    │   ├── __init__.py
    │   └── fs.py
    ├── drivers.py
    ├── errors.py
    ├── meta.py
    ├── rsconnect
    │   ├── __init__.py
    │   ├── api.py
    │   ├── fs.py
    │   └── html
    │   │   ├── highlight.js-9.15.9
    │   │       ├── highlight.js
    │   │       └── qtcreator_light.css
    │   │   ├── index.html
    │   │   └── pagedtable-1.1
    │   │       ├── pagedtable.css
    │   │       └── pagedtable.js
    ├── tests
    │   ├── .gitignore
    │   ├── _snapshots
    │   │   └── test_board_pin_write_rsc_index_html
    │   │   │   ├── data.txt
    │   │   │   ├── highlight.js-9.15.9
    │   │   │       ├── highlight.js
    │   │   │       └── qtcreator_light.css
    │   │   │   ├── index.html
    │   │   │   ├── pagedtable-1.1
    │   │   │       ├── pagedtable.css
    │   │   │       └── pagedtable.js
    │   │   │   └── test_rsc_pin.csv
    │   ├── conftest.py
    │   ├── example-bundle
    │   │   ├── data.txt
    │   │   ├── data_frame.csv
    │   │   ├── index.html
    │   │   └── manifest.json
    │   ├── helpers.py
    │   ├── pin-board
    │   │   ├── _pins.yaml
    │   │   ├── x
    │   │   │   └── 20221215T180351Z-c3943
    │   │   │   │   ├── data.txt
    │   │   │   │   └── x.json
    │   │   └── y
    │   │   │   ├── 20221215T180357Z-9ae7a
    │   │   │       ├── data.txt
    │   │   │       └── y.rds
    │   │   │   └── 20221215T180400Z-b81d5
    │   │   │       ├── data.txt
    │   │   │       └── y.json
    │   ├── pins-compat
    │   │   ├── df_arrow
    │   │   │   └── 20220214T163720Z-ad0c1
    │   │   │   │   ├── data.txt
    │   │   │   │   └── df_arrow.arrow
    │   │   ├── df_csv
    │   │   │   ├── 20220214T163718Z-eceac
    │   │   │   │   ├── data.txt
    │   │   │   │   └── df_csv.csv
    │   │   │   └── 20220214T163720Z-9bfad
    │   │   │   │   ├── data.txt
    │   │   │   │   └── df_csv.csv
    │   │   ├── df_rds
    │   │   │   └── 20220214T163720Z-35b15
    │   │   │   │   ├── data.txt
    │   │   │   │   └── df_rds.rds
    │   │   └── df_unversioned
    │   │   │   └── 20220214T163720Z-35b15
    │   │   │       ├── data.txt
    │   │   │       └── df_unversioned.rds
    │   ├── pins-old-types
    │   │   └── a-table
    │   │   │   └── v
    │   │   │       ├── data.csv
    │   │   │       ├── data.rds
    │   │   │       └── data.txt
    │   ├── test_adaptors.py
    │   ├── test_boards.py
    │   ├── test_cache.py
    │   ├── test_compat.py
    │   ├── test_compat_old_types.py
    │   ├── test_config.py
    │   ├── test_constructors.py
    │   ├── test_drivers.py
    │   ├── test_meta.py
    │   ├── test_rsconnect_api.py
    │   ├── test_utils.py
    │   └── test_versions.py
    ├── utils.py
    └── versions.py
├── pyproject.toml
├── requirements
    ├── dev.txt
    └── minimum.txt
└── script
    ├── ci-compat-check
        ├── .gitignore
        ├── Makefile
        ├── dump_py_pins.py
        ├── dump_r_pins.R
        ├── validate_py_to_r.R
        └── validate_r_to_py.py
    ├── setup-rsconnect
        ├── add-users.sh
        ├── dump_api_keys.py
        ├── rstudio-connect.gcfg
        └── users.txt
    ├── stage_example_bundle.py
    ├── stage_r_pins.R
    └── stage_r_pins_old_types.R


/.env.dev:
--------------------------------------------------------------------------------
 1 | # This allows the unit tests to run, while not using
 2 | # the full <user_name>/<pin_name> format.
 3 | PINS_ALLOW_RSC_SHORT_NAME=1
 4 | PINS_FEATURE_PREVIEW=1
 5 | 
 6 | # Pins optional config ----
 7 | #PINS_CACHE_DIR=.pins_cache
 8 | #PINS_DATA_DIR=.pins_data
 9 | 
10 | # AWS S3 backend ----
11 | AWS_ACCESS_KEY_ID=
12 | AWS_SECRET_ACCESS_KEY=
13 | AWS_REGION=us-east-1
14 | 
15 | # Azure backend ----
16 | AZURE_STORAGE_ACCOUNT_NAME=cipins
17 | AZURE_STORAGE_ACCOUNT_KEY=
18 | 
19 | # GCS backend ----
20 | # Note that this backend uses gcsfs's
21 | # default auth setting, which requires authenticating
22 | # via the gcloud cli.
23 | 
24 | # Posit Connect license ----
25 | RSC_LICENSE=
26 | 
27 | # Uncomment and change the variables below to specify the bucket (directory) the buckets
28 | # in which test boards will be created. E.g. "ci-pins" means boards will be created
29 | # in the ci-pins bucket on s3.
30 | # (Note that the local file backend always uses a temporary directory.)
31 | #
32 | # PINS_TEST_S3__PATH="ci-pins"
33 | 
34 | # Databricks backend ----
35 | DATABRICKS_HOST=
36 | DATABRICKS_TOKEN=
37 | DATABRICKS_VOLUME=
38 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, caste, color, religion, or sexual
 10 | identity and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the overall
 26 |   community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or advances of
 31 |   any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email address,
 35 |   without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at codeofconduct@posit.co.
 63 | All complaints will be reviewed and investigated promptly and fairly.
 64 | 
 65 | All community leaders are obligated to respect the privacy and security of the
 66 | reporter of any incident.
 67 | 
 68 | ## Enforcement Guidelines
 69 | 
 70 | Community leaders will follow these Community Impact Guidelines in determining
 71 | the consequences for any action they deem in violation of this Code of Conduct:
 72 | 
 73 | ### 1. Correction
 74 | 
 75 | **Community Impact**: Use of inappropriate language or other behavior deemed
 76 | unprofessional or unwelcome in the community.
 77 | 
 78 | **Consequence**: A private, written warning from community leaders, providing
 79 | clarity around the nature of the violation and an explanation of why the
 80 | behavior was inappropriate. A public apology may be requested.
 81 | 
 82 | ### 2. Warning
 83 | 
 84 | **Community Impact**: A violation through a single incident or series of
 85 | actions.
 86 | 
 87 | **Consequence**: A warning with consequences for continued behavior. No
 88 | interaction with the people involved, including unsolicited interaction with
 89 | those enforcing the Code of Conduct, for a specified period of time. This
 90 | includes avoiding interactions in community spaces as well as external channels
 91 | like social media. Violating these terms may lead to a temporary or permanent
 92 | ban.
 93 | 
 94 | ### 3. Temporary Ban
 95 | 
 96 | **Community Impact**: A serious violation of community standards, including
 97 | sustained inappropriate behavior.
 98 | 
 99 | **Consequence**: A temporary ban from any sort of interaction or public
100 | communication with the community for a specified period of time. No public or
101 | private interaction with the people involved, including unsolicited interaction
102 | with those enforcing the Code of Conduct, is allowed during this period.
103 | Violating these terms may lead to a permanent ban.
104 | 
105 | ### 4. Permanent Ban
106 | 
107 | **Community Impact**: Demonstrating a pattern of violation of community
108 | standards, including sustained inappropriate behavior, harassment of an
109 | individual, or aggression toward or disparagement of classes of individuals.
110 | 
111 | **Consequence**: A permanent ban from any sort of public interaction within the
112 | community.
113 | 
114 | ## Attribution
115 | 
116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
117 | version 2.1, available at
118 | <https://www.contributor-covenant.org/version/2/1/code_of_conduct.html>.
119 | 
120 | Community Impact Guidelines were inspired by
121 | [Mozilla's code of conduct enforcement ladder][https://github.com/mozilla/inclusion].
122 | 
123 | For answers to common questions about this code of conduct, see the FAQ at
124 | <https://www.contributor-covenant.org/faq>. Translations are available at <https://www.contributor-covenant.org/translations>.
125 | 
126 | [homepage]: https://www.contributor-covenant.org
127 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |   push:
  6 |     branches: ['main', 'dev-*']
  7 |   pull_request:
  8 |   release:
  9 |     types: [published]
 10 | 
 11 | env:
 12 |   PINS_ALLOW_RSC_SHORT_NAME: 1
 13 |   PINS_FEATURE_PREVIEW: 1
 14 | 
 15 | jobs:
 16 |   tests:
 17 |     name: "Tests"
 18 |     runs-on: ${{ matrix.os }}
 19 |     if: ${{ !github.event.pull_request.head.repo.fork }}
 20 |     strategy:
 21 |       fail-fast: false
 22 |       matrix:
 23 |         python: ["3.9", "3.10", "3.11", "3.12", "3.13"]
 24 |         os: ["ubuntu-latest"]
 25 |         pytest_opts: ["--workers 4 --tests-per-worker 1"]
 26 |         requirements: [""]
 27 |         include:
 28 |           - os: "ubuntu-latest"
 29 |             python: "3.9"
 30 |             requirements: "requirements/minimum.txt"
 31 |           - os: "macos-latest"
 32 |             python: "3.10"
 33 |             # ignore doctests, as they involve calls to github, and all mac machines
 34 |             # use the same IP address
 35 |             pytest_opts: "--workers 4 --tests-per-worker 1 -k pins/tests"
 36 |           - os: "windows-latest"
 37 |             python: "3.10"
 38 |             # ignore doctests
 39 |             pytest_opts: "-k pins/tests"
 40 |     steps:
 41 |       - uses: actions/checkout@v4
 42 |       - uses: actions/setup-python@v4
 43 |         with:
 44 |           python-version: ${{ matrix.python }}
 45 |       - name: Install dependencies
 46 |         shell: bash
 47 |         run: |
 48 |           python -m pip install --upgrade pip
 49 | 
 50 |           # optionally install from requirements file
 51 |           if [ $REQUIREMENTS ]; then
 52 |             pip install -r $REQUIREMENTS
 53 |           fi
 54 | 
 55 |           python -m pip install -e .[test]
 56 | 
 57 |       - name: Set up Cloud SDK
 58 |         uses: google-github-actions/setup-gcloud@v0
 59 |         with:
 60 |           project_id: siuba-tests
 61 |           service_account_key: ${{ secrets.GCP_SA_KEY }}
 62 |           export_default_credentials: true
 63 | 
 64 |       - name: Run tests
 65 |         shell: bash
 66 |         run: |
 67 |           pytest pins -m 'not fs_rsc and not skip_on_github' $PYTEST_OPTS
 68 |         env:
 69 |           AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
 70 |           AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 71 |           AWS_REGION: "us-east-1"
 72 |           AZURE_STORAGE_ACCOUNT_NAME: ${{ secrets.AZURE_STORAGE_ACCOUNT_NAME }}
 73 |           AZURE_STORAGE_ACCOUNT_KEY: ${{ secrets.AZURE_STORAGE_ACCOUNT_KEY }}
 74 |           DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
 75 |           DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
 76 |           PYTEST_OPTS: ${{ matrix.pytest_opts }}
 77 |           REQUIREMENTS: ${{ matrix.requirements }}
 78 |           ACTION_OS: ${{ matrix.os }}
 79 |           # fixes error on macosx virtual machine with pytest-parallel
 80 |           # https://github.com/browsertron/pytest-parallel/issues/93
 81 |           no_proxy: "*"
 82 | 
 83 |   test-rsconnect:
 84 |     name: "Test Posit Connect"
 85 |     runs-on: ubuntu-latest
 86 |     if: ${{ !github.event.pull_request.head.repo.fork }}
 87 |     steps:
 88 |       - uses: actions/checkout@v4
 89 |       - uses: actions/setup-python@v4
 90 |         with:
 91 |           python-version: "3.10"
 92 |       - name: Install dependencies
 93 |         run: |
 94 |           python -m pip install --upgrade pip
 95 |           python -m pip install -r requirements/dev.txt
 96 |           python -m pip install -e .
 97 | 
 98 |       - name: run Posit Connect
 99 |         run: |
100 |           docker compose up --build -d
101 |           make dev
102 |         env:
103 |           RSC_LICENSE: ${{ secrets.RSC_LICENSE }}
104 |           GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
105 | 
106 |       # NOTE: edited to run checks for python package
107 |       - name: Run tests
108 |         run: |
109 |           pytest pins -m 'fs_rsc and not skip_on_github'
110 | 
111 | 
112 |   test-fork:
113 |     name: "Test a fork PR (no secrets)"
114 |     runs-on: ubuntu-latest
115 |     if: ${{ github.event.pull_request.head.repo.fork }}
116 |     steps:
117 |       - uses: actions/checkout@v4
118 |       - uses: actions/setup-python@v4
119 |         with:
120 |           python-version: "3.10"
121 |       - name: Install dependencies
122 |         run: |
123 |           python -m pip install --upgrade pip
124 | 
125 |           python -m pip install -e .[test]
126 |       - name: Run tests
127 |         run: |
128 |           # TODO: better way to disable all cloud backend tests?
129 |           pytest pins -m 'not fs_rsc and not fs_s3 and not fs_gcs and not fs_abfs and not skip_on_github'
130 | 
131 | 
132 |   build-docs:
133 |     name: "Build Docs"
134 |     runs-on: ubuntu-latest
135 |     steps:
136 |       - uses: actions/checkout@v3
137 |       - uses: actions/setup-python@v4
138 |         with:
139 |           python-version: "3.10"
140 |       - name: Install dependencies
141 |         run: |
142 |           python -m pip install --upgrade pip
143 |           python -m pip install -r requirements/dev.txt
144 |           python -m pip install -e .
145 |           python -m ipykernel install --user
146 | 
147 |       - name: Set up Quarto
148 |         uses: quarto-dev/quarto-actions/setup@v2
149 |       - name: Build docs
150 |         run: |
151 |           make docs-build
152 |       - name: Save docs artifact
153 |         uses: actions/upload-artifact@v4
154 |         with:
155 |           name: docs-html
156 |           path: docs/_site
157 | 
158 |   preview-docs:
159 |     name: "Preview Docs:"
160 |     runs-on: ubuntu-latest
161 |     needs: ["build-docs"]
162 |     if: "${{github.event_name == 'pull_request' && !github.event.pull_request.head.repo.fork }}"
163 | 
164 |     steps:
165 |       - uses: actions/download-artifact@v4
166 |         with:
167 |           name: docs-html
168 |           path: docs/_site
169 | 
170 |       # Determine the release name ---
171 | 
172 |       - name: Configure pull release name
173 |         if: ${{github.event_name == 'pull_request'}}
174 |         run: |
175 |           echo "RELEASE_NAME=pr-${PR_NUMBER}" >> $GITHUB_ENV
176 |         env:
177 |           PR_NUMBER: ${{ github.event.number }}
178 | 
179 |       # create deployment ----
180 | 
181 |       - name: Create Github Deployment
182 |         uses: bobheadxi/deployments@v0.4.3
183 |         id: deployment
184 |         with:
185 |           step: start
186 |           token: ${{ secrets.GITHUB_TOKEN }}
187 |           env: ${{ env.RELEASE_NAME }}
188 |           ref: ${{ github.head_ref }}
189 |           transient: true
190 |           logs: 'https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}'
191 | 
192 |       # push docs ----
193 | 
194 |       - name: Netlify docs preview
195 |         run: |
196 |           npm install -g netlify-cli
197 |           # push main branch to production, others to preview --
198 |           netlify deploy --dir=docs/_site --alias="${ALIAS}"
199 | 
200 |         env:
201 |           NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
202 |           NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
203 |           ALIAS: ${{ steps.deployment.outputs.env }}
204 | 
205 |       # update deployment ----
206 | 
207 |       - name: Update Github Deployment
208 |         uses: bobheadxi/deployments@v0.4.3
209 |         if: ${{ always() }}
210 |         with:
211 |           step: finish
212 |           token: ${{ secrets.GITHUB_TOKEN }}
213 |           status: ${{ job.status }}
214 |           deployment_id: ${{ steps.deployment.outputs.deployment_id }}
215 |           env_url: 'https://${{ steps.deployment.outputs.env }}--pins-python.netlify.app'
216 |           logs: 'https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}'
217 | 
218 |   publish-docs:
219 |     name: "Publish Docs"
220 |     runs-on: ubuntu-latest
221 |     needs: ["build-docs", "tests", "test-rsconnect"]
222 |     if: github.ref == 'refs/heads/main'
223 |     steps:
224 |       - uses: actions/download-artifact@v4
225 |         with:
226 |           name: docs-html
227 |           path: docs/_site
228 |       - uses: peaceiris/actions-gh-pages@v3
229 |         with:
230 |           github_token: ${{ secrets.GITHUB_TOKEN }}
231 |           publish_dir: docs/_site
232 | 
233 |   release-pypi:
234 |     name: "Release to pypi"
235 |     runs-on: ubuntu-latest
236 |     if: github.event_name == 'release'
237 |     needs: [build-docs, tests, test-rsconnect]
238 |     steps:
239 |       - uses: actions/checkout@v4
240 |       - uses: actions/setup-python@v4
241 |         with:
242 |           python-version: "3.10"
243 |       - name: "Build Package"
244 |         run: |
245 |           python -m pip install build wheel
246 |           python -m build --sdist --wheel
247 | 
248 |       - name: "Deploy to Test PyPI"
249 |         uses: pypa/gh-action-pypi-publish@release/v1
250 |         with:
251 |           user: __token__
252 |           password: ${{ secrets.PYPI_API_TOKEN }}
253 | 


--------------------------------------------------------------------------------
/.github/workflows/code-checks.yml:
--------------------------------------------------------------------------------
 1 | name: Code Checks
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches: ['main', 'dev-*']
 7 |   pull_request:
 8 |   release:
 9 |     types: [published]
10 | 
11 | jobs:
12 |   pre-commit:
13 |     name: "Run pre-commit"
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - uses: actions/setup-python@v4
18 |       - uses: pre-commit/action@v3.0.1
19 | 
20 |   pyright:
21 |     name: "Run Pyright"
22 |     runs-on: ubuntu-latest
23 |     steps:
24 |       - uses: actions/checkout@v4
25 |       - uses: actions/setup-python@v4
26 |         with:
27 |           python-version: 3.13 # Use the maximum version supported by python-pins
28 |       - name: Install dependencies
29 |         shell: bash
30 |         run: |
31 |           python -m pip install --upgrade pip
32 |           python -m pip install -e .[check]
33 |       - uses: jakebailey/pyright-action@v2
34 |         with:
35 |           version: 1.1.372
36 | 


--------------------------------------------------------------------------------
/.github/workflows/cross-compat.yml:
--------------------------------------------------------------------------------
 1 | name: Cross compatibility
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       r_pins_tag:
 7 |         description: "Tag or commit from pins-r (e.g. v1.0.3)"
 8 |         default: "__cran__"
 9 |         required: true
10 |   push:
11 |     branches: ['main', 'dev-*']
12 |   pull_request:
13 |   release:
14 |     types: [published]
15 | 
16 | env:
17 |   PINS_ALLOW_RSC_SHORT_NAME: 1
18 |   PINS_FEATURE_PREVIEW: 1
19 | 
20 | jobs:
21 |   check-cross-compatibility:
22 |     name: "Check cross lib compatibility"
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |     - uses: actions/checkout@v4
26 | 
27 |     - name: Install libcurl on Linux
28 |       run: sudo apt-get update -y && sudo apt-get install -y libcurl4-openssl-dev
29 | 
30 |     # r ---
31 | 
32 |     - uses: r-lib/actions/setup-r@v2
33 |       with:
34 |         use-public-rspm: true
35 | 
36 |     - name: Install R dependencies (from CRAN)
37 |       run: "install.packages('pins')"
38 |       shell: Rscript {0}
39 |       if: ${{ github.event.name != 'workflow_dispatch' || inputs.r_pins_tag == '__cran__' }}
40 | 
41 |     - name: Install R dependencies (from github)
42 |       run: |
43 |         install.packages("remotes")
44 |         remotes::install_github(paste0('rstudio/pins-r@', Sys.getenv('R_PINS_TAG')))
45 |       shell: Rscript {0}
46 |       env:
47 |         R_PINS_TAG: ${{ inputs.r_pins_tag }}
48 |         GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
49 |       if: ${{ github.event.name == 'workflow_dispatch' && inputs.r_pins_tag != '__cran__' }}
50 | 
51 |     # python ---
52 | 
53 |     - uses: actions/setup-python@v2
54 |       with:
55 |         python-version: "3.10"
56 |     - name: Install py dependencies
57 |       run: |
58 |         python -m pip install --upgrade pip
59 |         python -m pip install -r requirements/dev.txt
60 |         python -m pip install -e .
61 | 
62 |     # write and test ---
63 | 
64 |     - name: Run script/ci-compat-check
65 |       run: make ci-compat-check
66 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Mac specific
  2 | .DS_Store
  3 | 
  4 | # Vim swapfiles
  5 | *.sw[op]
  6 | 
  7 | # Byte-compiled / optimized / DLL files
  8 | __pycache__/
  9 | *.py[cod]
 10 | *$py.class
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | pip-wheel-metadata/
 30 | share/python-wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .nox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | *.py,cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # IPython
 87 | profile_default/
 88 | ipython_config.py
 89 | 
 90 | # pyenv
 91 | .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101 | __pypackages__/
102 | 
103 | # Celery stuff
104 | celerybeat-schedule
105 | celerybeat.pid
106 | 
107 | # SageMath parsed files
108 | *.sage.py
109 | 
110 | # Environments
111 | .env
112 | .venv
113 | env/
114 | venv/
115 | ENV/
116 | env.bak/
117 | venv.bak/
118 | 
119 | # Spyder project settings
120 | .spyderproject
121 | .spyproject
122 | 
123 | # Rope project settings
124 | .ropeproject
125 | 
126 | # mkdocs documentation
127 | /site
128 | 
129 | # mypy
130 | .mypy_cache/
131 | .dmypy.json
132 | dmypy.json
133 | 
134 | # Pyre type checker
135 | .pyre/
136 | 
137 | # RStudio
138 | .Rproj.user
139 | *.Rproj
140 | 
141 | # Quarto
142 | /.quarto/
143 | _site/
144 | objects.json
145 | reference/
146 | src/
147 | 
148 | /.luarc.json
149 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: "(.*\\.csv)|(^pins/tests/_snapshots)"
 2 | repos:
 3 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 4 |     rev: v2.4.0
 5 |     hooks:
 6 |       - id: trailing-whitespace
 7 |       - id: end-of-file-fixer
 8 |       - id: check-yaml
 9 |         args: ["--unsafe"]
10 |       - id: check-added-large-files
11 |   - repo: https://github.com/charliermarsh/ruff-pre-commit
12 |     rev: "v0.5.4" # Sync with pyproject.toml
13 |     hooks:
14 |       - id: ruff
15 |         args: ["--fix"]
16 |       - id: ruff-format
17 |   - repo: https://github.com/codespell-project/codespell
18 |     rev: v2.4.1
19 |     hooks:
20 |       - id: codespell
21 |         additional_dependencies:
22 |           - tomli
23 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # pins-python
 2 | 
 3 | ## Development
 4 | 
 5 | ### Install pins with dev dependencies
 6 | 
 7 | ```shell
 8 | python -m pip install -e .[dev]
 9 | ```
10 | 
11 | ### Install pre-commit hooks
12 | 
13 | This project uses [pre-commit](https://pre-commit.com/) to check and format each commit.
14 | 
15 | You can set it up by running the following code in this repo:
16 | 
17 | ```
18 | python -m pip install pre-commit
19 | pre-commit install
20 | ```
21 | 
22 | ### Setting version number
23 | 
24 | This project uses [setuptools_scm](https://github.com/pypa/setuptools_scm) to
25 | automatically track and change version numbers within the `pins` package.
26 | It works by checking the last tagged commit.
27 | 
28 | In order to set the version number, create a tag like the following.
29 | 
30 | ```shell
31 | git tag v0.0.1
32 | ```
33 | 
34 | In order to see the version number being used for the current commit, run:
35 | 
36 | ```
37 | python -m setuptools_scm
38 | ```
39 | 
40 | ## Test
41 | 
42 | Tests can be run using pytest:
43 | 
44 | ```shell
45 | pytest pins
46 | 
47 | # run all tests except those for Posit Connect
48 | pytest pins -m 'not fs_rsc'
49 | 
50 | # run only local filesystem backend tests
51 | pytest pins -m 'fs_file'
52 | 
53 | # run all tests except those for S3 and GCS
54 | pytest pins -m 'not fs_s3 and not fs_gcs'
55 | 
56 | # run all tests except those using data on GitHub
57 | # n.b. doctests cannot have marks https://github.com/pytest-dev/pytest/issues/5794
58 | pytest pins -m 'not skip_on_github' -k 'not pins.boards.BoardManual'
59 | ```
60 | 
61 | There are two important details to note for testing:
62 | 
63 | * **Backends**. pins can write to backends like s3, azure, and Posit Connect, so you
64 |     will need to set credentials to test against them.
65 | * **Pytest Marks**. You can disable tests over a specific backend through pytest's
66 |     `-m` flag. For example...
67 |   - Skip S3: `pytest pins -m 'not fs_s3'`
68 |   - Test only s3: `pytest pins -m 'fs_s3'`
69 |   - List all marks: `pytest pins --markers`
70 | 
71 | ### Configuring backends
72 | 
73 | * Copy `.env.dev` to be `.env`
74 | * Modify `.env` to file in environment variables (e.g. AWS_ACCESS_KEY_ID)
75 | * Be careful not to put any sensitive information in `.env.dev`!
76 | 
77 | ### Setting up Posit Connect tests
78 | 
79 | ```
80 | # Be sure to set RSC_LICENSE in .env
81 | make dev
82 | ```
83 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 pins-python authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MAINTAINERS.md:
--------------------------------------------------------------------------------
1 | # Who maintains pins
2 | 
3 | The pins-python package is currently maintained by Isabel Zimmerman <isabel.zimmerman@posit.co>. [Posit Software, PBC](https://posit.co/products/open-source/) is a copyright holder and funder of this package.
4 | 
5 | Several individuals in the community have taken an active role in helping to maintain this package and submit fixes. Those individuals are shown in the git changelog.
6 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | prune .*
2 | prune docs
3 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SPHINX_BUILDARGS=
 2 | # Note that these are keys generated by the docker rsconnect service, so are
 3 | # not really secrets. They are saved to json to make it easy to use rsconnect
 4 | # as multiple users from the tests
 5 | RSC_API_KEYS=pins/tests/rsconnect_api_keys.json
 6 | 
 7 | dev: pins/tests/rsconnect_api_keys.json
 8 | 
 9 | dev-start:
10 | 	docker compose up -d
11 | 	docker compose exec -T rsconnect bash < script/setup-rsconnect/add-users.sh
12 | 	# curl fails with error 52 without a short sleep....
13 | 	sleep 5
14 | 	curl -s --retry 10 --retry-connrefused http://localhost:3939
15 | 
16 | dev-stop:
17 | 	docker compose down
18 | 	rm -f $(RSC_API_KEYS)
19 | 
20 | $(RSC_API_KEYS): dev-start
21 | 	python script/setup-rsconnect/dump_api_keys.py $@
22 | 
23 | README.md:
24 | 	quarto render README.qmd
25 | 
26 | test: test-most test-rsc
27 | 
28 | test-most:
29 | 	pytest pins -m "not fs_rsc and not fs_s3" --workers 4 --tests-per-worker 1 -vv
30 | 
31 | test-rsc:
32 | 	pytest pins -m "fs_rsc"
33 | 
34 | docs-build:
35 | 	cd docs && python -m quartodoc build --verbose
36 | 	cd docs && quarto render
37 | 
38 | docs-clean:
39 | 	rm -rf docs/_build docs/api/api_card
40 | 
41 | requirements/dev.txt: pyproject.toml
42 | 	@# allows you to do this...
43 | 	@# make requirements | tee > requirements/some_file.txt
44 | 	@pip-compile pyproject.toml --rebuild --extra doc --extra test --extra check --output-file=- > $@
45 | 
46 | binder/requirements.txt: requirements/dev.txt
47 | 	cp $< $@
48 | 
49 | ci-compat-check:
50 | 	# TODO: mark as dummy
51 | 	$(MAKE) -C script/$@
52 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pins
  2 | <a href="https://rstudio.github.io/pins-python/"><img src="docs/logo.png" align="right" height="138" /></a>
  3 | 
  4 | ![PyPI - Version](https://img.shields.io/pypi/v/pins.svg) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pins) [![Checked with pyright](https://microsoft.github.io/pyright/img/pyright_badge.svg)](https://microsoft.github.io/pyright/)
  5 | 
  6 | 
  7 | 
  8 | The pins package publishes data, models, and other Python objects,
  9 | making it easy to share them across projects and with your colleagues.
 10 | You can pin objects to a variety of pin *boards*, including folders (to
 11 | share on a networked drive or with services like DropBox), Posit
 12 | Connect, Amazon S3, and Google Cloud Storage. Pins can be automatically
 13 | versioned, making it straightforward to track changes, re-run analyses
 14 | on historical data, and undo mistakes.
 15 | 
 16 | See the [documentation](https://rstudio.github.io/pins-python) for
 17 | getting started.
 18 | 
 19 | You can use pins from R as well as Python. For example, you can use one
 20 | language to read a pin created with the other. Learn more about [pins
 21 | for R](https://pins.rstudio.com).
 22 | 
 23 | ## Installation
 24 | 
 25 | You can install the released version of pins from
 26 | [PyPI](https://pypi.org/project/pins/):
 27 | 
 28 | ``` shell
 29 | python -m pip install pins
 30 | ```
 31 | 
 32 | And the development version from
 33 | [GitHub](https://github.com/rstudio/pins-python) with:
 34 | 
 35 | ``` shell
 36 | python -m pip install git+https://github.com/rstudio/pins-python
 37 | ```
 38 | 
 39 | ## Usage
 40 | 
 41 | To use the pins package, you must first create a pin board. A good place
 42 | to start is `board_folder()`, which stores pins in a directory you
 43 | specify. Here I’ll use a special version of `board_folder()` called
 44 | `board_temp()` which creates a temporary board that’s automatically
 45 | deleted when your Python script or notebook session ends. This is great
 46 | for examples, but obviously you shouldn’t use it for real work!
 47 | 
 48 | ``` python
 49 | import pins
 50 | from pins.data import mtcars
 51 | 
 52 | board = pins.board_temp()
 53 | ```
 54 | 
 55 | You can “pin” (save) data to a board with the `.pin_write()` method. It
 56 | requires three arguments: an object, a name, and a pin type:
 57 | 
 58 | ``` python
 59 | board.pin_write(mtcars.head(), "mtcars", type="csv")
 60 | ```
 61 | 
 62 |     Writing pin:
 63 |     Name: 'mtcars'
 64 |     Version: 20230523T115348Z-120a5
 65 | 
 66 |     Meta(title='mtcars: a pinned 5 x 11 DataFrame', description=None, created='20230523T115348Z', pin_hash='120a54f7e0818041', file='mtcars.csv', file_size=249, type='csv', api_version=1, version=Version(created=datetime.datetime(2023, 5, 23, 11, 53, 48, 555797), hash='120a54f7e0818041'), tags=None, name='mtcars', user={}, local={})
 67 | 
 68 | Above, we saved the data as a CSV, but depending on what you’re saving
 69 | and who else you want to read it, you might use the `type` argument to
 70 | instead save it as a `joblib`, `parquet`, or `json` file.
 71 | 
 72 | You can later retrieve the pinned data with `.pin_read()`:
 73 | 
 74 | ``` python
 75 | board.pin_read("mtcars")
 76 | ```
 77 | 
 78 |         mpg  cyl   disp   hp  drat     wt   qsec  vs  am  gear  carb
 79 |     0  21.0    6  160.0  110  3.90  2.620  16.46   0   1     4     4
 80 |     1  21.0    6  160.0  110  3.90  2.875  17.02   0   1     4     4
 81 |     2  22.8    4  108.0   93  3.85  2.320  18.61   1   1     4     1
 82 |     3  21.4    6  258.0  110  3.08  3.215  19.44   1   0     3     1
 83 |     4  18.7    8  360.0  175  3.15  3.440  17.02   0   0     3     2
 84 | 
 85 | A board on your computer is good place to start, but the real power of
 86 | pins comes when you use a board that’s shared with multiple people. To
 87 | get started, you can use `board_folder()` with a directory on a shared
 88 | drive or in DropBox, or if you use [Posit
 89 | Connect](https://posit.co/products/enterprise/connect/) you can use
 90 | `board_connect()`:
 91 | 
 92 | ``` python
 93 | # Note that this uses one approach to connecting,
 94 | # the environment variables CONNECT_SERVER and CONNECT_API_KEY
 95 | 
 96 | board = pins.board_connect()
 97 | board.pin_write(tidy_sales_data, "hadley/sales-summary", type="csv")
 98 | ```
 99 | 
100 | Then, someone else (or an automated report) can read and use your pin:
101 | 
102 | ``` python
103 | board = board_connect()
104 | board.pin_read("hadley/sales-summary")
105 | ```
106 | 
107 | You can easily control who gets to access the data using the Posit
108 | Connect permissions pane.
109 | 
110 | The pins package also includes boards that allow you to share data on
111 | services like Amazon’s S3 (`board_s3()`), Google Cloud Storage
112 | (`board_gcs()`), and Azure blob storage (`board_azure()`).
113 | 
114 | ## Contributing
115 | 
116 | - This project is released with a [Contributor Code of
117 |   Conduct](https://www.contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html).
118 |   By contributing to this project, you agree to abide by its terms.
119 | 
120 | - If you think you have encountered a bug, please [submit an
121 |   issue](https://github.com/rstudio/pins-python/issues).
122 | 


--------------------------------------------------------------------------------
/README.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | format: gfm
  3 | ---
  4 | 
  5 | ```{python}
  6 | #| include: false
  7 | # this keeps the pandas dataframe repr from spitting out scoped style tags
  8 | # which don't render on github
  9 | import pandas as pd
 10 | pd.set_option("display.notebook_repr_html", False)
 11 | ```
 12 | 
 13 | # pins <a href="https://rstudio.github.io/pins-python/"><img src="docs/logo.png" align="right" height="138" /></a>
 14 | 
 15 | The pins package publishes data, models, and other Python objects, making it
 16 | easy to share them across projects and with your colleagues. You can pin
 17 | objects to a variety of pin *boards*, including folders (to share on a
 18 | networked drive or with services like DropBox), Posit Connect, Amazon
 19 | S3, and Google Cloud Storage.
 20 | Pins can be automatically versioned, making it straightforward to track changes,
 21 | re-run analyses on historical data, and undo mistakes.
 22 | 
 23 | See the [documentation](https://rstudio.github.io/pins-python) for getting started.
 24 | 
 25 | You can use pins from R as well as Python. For example, you can use one language
 26 | to read a pin created with the other. Learn more about
 27 | [pins for R](https://pins.rstudio.com).
 28 | 
 29 | ## Installation
 30 | 
 31 | You can install the released version of pins from [PyPI](https://pypi.org/project/pins/):
 32 | 
 33 | ```shell
 34 | python -m pip install pins
 35 | ```
 36 | 
 37 | And the development version from [GitHub](https://github.com/rstudio/pins-python) with:
 38 | 
 39 | ```shell
 40 | python -m pip install git+https://github.com/rstudio/pins-python
 41 | ```
 42 | 
 43 | ## Usage
 44 | 
 45 | To use the pins package, you must first create a pin board. A good place
 46 | to start is `board_folder()`, which stores pins in a directory you
 47 | specify. Here I’ll use a special version of `board_folder()` called
 48 | `board_temp()` which creates a temporary board that’s automatically
 49 | deleted when your Python script or notebook session ends. This is great for examples, but
 50 | obviously you shouldn't use it for real work!
 51 | 
 52 | ```{python}
 53 | import pins
 54 | from pins.data import mtcars
 55 | 
 56 | board = pins.board_temp()
 57 | ```
 58 | 
 59 | You can "pin" (save) data to a board with the `.pin_write()` method. It requires three
 60 | arguments: an object, a name, and a pin type:
 61 | 
 62 | ```{python}
 63 | board.pin_write(mtcars.head(), "mtcars", type="csv")
 64 | ```
 65 | 
 66 | Above, we saved the data as a CSV, but depending on
 67 | what you’re saving and who else you want to read it, you might use the
 68 | `type` argument to instead save it as a `joblib`, `parquet`, or `json` file.
 69 | 
 70 | You can later retrieve the pinned data with `.pin_read()`:
 71 | 
 72 | ```{python}
 73 | board.pin_read("mtcars")
 74 | ```
 75 | 
 76 | A board on your computer is good place to start, but the real power of
 77 | pins comes when you use a board that’s shared with multiple people. To
 78 | get started, you can use `board_folder()` with a directory on a shared
 79 | drive or in DropBox, or if you use [Posit
 80 | Connect](https://posit.co/products/enterprise/connect/) you can use
 81 | `board_connect()`:
 82 | 
 83 | ```python
 84 | # Note that this uses one approach to connecting,
 85 | # the environment variables CONNECT_SERVER and CONNECT_API_KEY
 86 | 
 87 | board = pins.board_connect()
 88 | board.pin_write(tidy_sales_data, "hadley/sales-summary", type="csv")
 89 | ```
 90 | 
 91 | Then, someone else (or an automated report) can read and use your
 92 | pin:
 93 | 
 94 | ```python
 95 | board = board_connect()
 96 | board.pin_read("hadley/sales-summary")
 97 | ```
 98 | 
 99 | You can easily control who gets to access the data using the Posit
100 | Connect permissions pane.
101 | 
102 | The pins package also includes boards that allow you to share data on
103 | services like Amazon’s S3 (`board_s3()`), Google Cloud Storage (`board_gcs()`),
104 | and Azure blob storage (`board_azure()`).
105 | 
106 | ## Contributing
107 | 
108 | - This project is released with a [Contributor Code of Conduct](https://www.contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms.
109 | 
110 | - If you think you have encountered a bug, please [submit an issue](https://github.com/rstudio/pins-python/issues).
111 | 


--------------------------------------------------------------------------------
/binder/postBuild:
--------------------------------------------------------------------------------
1 | set -e
2 | 
3 | pip install -e .
4 | 


--------------------------------------------------------------------------------
/binder/requirements.txt:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile with python 3.8
  3 | # To update, run:
  4 | #
  5 | #    pip-compile --extra=dev --output-file=- setup.cfg
  6 | #
  7 | aiobotocore==2.1.2
  8 |     # via s3fs
  9 | aiohttp==3.8.1
 10 |     # via
 11 |     #   aiobotocore
 12 |     #   s3fs
 13 | aioitertools==0.10.0
 14 |     # via aiobotocore
 15 | aiosignal==1.2.0
 16 |     # via aiohttp
 17 | alabaster==0.7.12
 18 |     # via sphinx
 19 | appdirs==1.4.4
 20 |     # via pins (setup.cfg)
 21 | appnope==0.1.2
 22 |     # via
 23 |     #   ipykernel
 24 |     #   ipython
 25 | asttokens==2.0.5
 26 |     # via stack-data
 27 | async-timeout==4.0.2
 28 |     # via aiohttp
 29 | attrs==21.4.0
 30 |     # via
 31 |     #   aiohttp
 32 |     #   jsonschema
 33 |     #   markdown-it-py
 34 |     #   pytest
 35 | babel==2.9.1
 36 |     # via sphinx
 37 | backcall==0.2.0
 38 |     # via ipython
 39 | beautifulsoup4==4.10.0
 40 |     # via
 41 |     #   nbconvert
 42 |     #   pydata-sphinx-theme
 43 | bleach==4.1.0
 44 |     # via nbconvert
 45 | botocore==1.23.24
 46 |     # via aiobotocore
 47 | certifi==2021.10.8
 48 |     # via requests
 49 | charset-normalizer==2.0.12
 50 |     # via
 51 |     #   aiohttp
 52 |     #   requests
 53 | click==8.0.4
 54 |     # via pip-tools
 55 | debugpy==1.6.0
 56 |     # via ipykernel
 57 | decopatch==1.4.10
 58 |     # via pytest-cases
 59 | decorator==5.1.1
 60 |     # via ipython
 61 | defusedxml==0.7.1
 62 |     # via nbconvert
 63 | docutils==0.17.1
 64 |     # via
 65 |     #   nbsphinx
 66 |     #   pydata-sphinx-theme
 67 |     #   sphinx
 68 | entrypoints==0.4
 69 |     # via
 70 |     #   jupyter-client
 71 |     #   nbconvert
 72 | executing==0.8.3
 73 |     # via stack-data
 74 | frozenlist==1.3.0
 75 |     # via
 76 |     #   aiohttp
 77 |     #   aiosignal
 78 | fsspec==2022.02.0
 79 |     # via
 80 |     #   pins (setup.cfg)
 81 |     #   s3fs
 82 | idna==3.3
 83 |     # via
 84 |     #   requests
 85 |     #   yarl
 86 | imagesize==1.3.0
 87 |     # via sphinx
 88 | importlib-metadata==4.11.3
 89 |     # via sphinx
 90 | importlib-resources==5.4.0
 91 |     # via
 92 |     #   jsonschema
 93 |     #   pins (setup.cfg)
 94 | iniconfig==1.1.1
 95 |     # via pytest
 96 | ipykernel==6.9.2
 97 |     # via pins (setup.cfg)
 98 | ipython==8.1.1
 99 |     # via ipykernel
100 | jedi==0.18.1
101 |     # via ipython
102 | jinja2==3.1.0
103 |     # via
104 |     #   nbconvert
105 |     #   nbsphinx
106 |     #   pins (setup.cfg)
107 |     #   sphinx
108 | jmespath==0.10.0
109 |     # via botocore
110 | joblib==1.1.0
111 |     # via pins (setup.cfg)
112 | jsonschema==4.4.0
113 |     # via nbformat
114 | jupyter-client==7.1.2
115 |     # via
116 |     #   ipykernel
117 |     #   nbclient
118 | jupyter-core==4.9.2
119 |     # via
120 |     #   jupyter-client
121 |     #   nbconvert
122 |     #   nbformat
123 | jupyterlab-pygments==0.1.2
124 |     # via nbconvert
125 | jupytext==1.13.7
126 |     # via pins (setup.cfg)
127 | makefun==1.13.1
128 |     # via
129 |     #   decopatch
130 |     #   pytest-cases
131 | markdown-it-py==1.1.0
132 |     # via
133 |     #   jupytext
134 |     #   mdit-py-plugins
135 | markupsafe==2.1.1
136 |     # via jinja2
137 | matplotlib-inline==0.1.3
138 |     # via
139 |     #   ipykernel
140 |     #   ipython
141 | mdit-py-plugins==0.3.0
142 |     # via jupytext
143 | mistune==0.8.4
144 |     # via nbconvert
145 | multidict==6.0.2
146 |     # via
147 |     #   aiohttp
148 |     #   yarl
149 | nbclient==0.5.13
150 |     # via nbconvert
151 | nbconvert==6.4.4
152 |     # via nbsphinx
153 | nbformat==5.2.0
154 |     # via
155 |     #   jupytext
156 |     #   nbclient
157 |     #   nbconvert
158 |     #   nbsphinx
159 | nbsphinx==0.8.8
160 |     # via pins (setup.cfg)
161 | nest-asyncio==1.5.4
162 |     # via
163 |     #   ipykernel
164 |     #   jupyter-client
165 |     #   nbclient
166 | numpy==1.22.3
167 |     # via
168 |     #   pandas
169 |     #   siuba
170 | packaging==21.3
171 |     # via
172 |     #   bleach
173 |     #   pytest
174 |     #   sphinx
175 | pandas==1.4.1
176 |     # via
177 |     #   pins (setup.cfg)
178 |     #   siuba
179 | pandocfilters==1.5.0
180 |     # via nbconvert
181 | parso==0.8.3
182 |     # via jedi
183 | pep517==0.12.0
184 |     # via pip-tools
185 | pexpect==4.8.0
186 |     # via ipython
187 | pickleshare==0.7.5
188 |     # via ipython
189 | pip-tools==6.5.1
190 |     # via pins (setup.cfg)
191 | pluggy==1.0.0
192 |     # via pytest
193 | prompt-toolkit==3.0.28
194 |     # via ipython
195 | psutil==5.9.0
196 |     # via ipykernel
197 | ptyprocess==0.7.0
198 |     # via pexpect
199 | pure-eval==0.2.2
200 |     # via stack-data
201 | py==1.11.0
202 |     # via pytest
203 | pydata-sphinx-theme==0.8.0
204 |     # via pins (setup.cfg)
205 | pygments==2.11.2
206 |     # via
207 |     #   ipython
208 |     #   jupyterlab-pygments
209 |     #   nbconvert
210 |     #   sphinx
211 | pyparsing==3.0.7
212 |     # via packaging
213 | pyrsistent==0.18.1
214 |     # via jsonschema
215 | pytest==7.1.1
216 |     # via
217 |     #   pins (setup.cfg)
218 |     #   pytest-dotenv
219 | pytest-cases==3.6.11
220 |     # via pins (setup.cfg)
221 | pytest-dotenv==0.5.2
222 |     # via pins (setup.cfg)
223 | python-dateutil==2.8.2
224 |     # via
225 |     #   botocore
226 |     #   jupyter-client
227 |     #   pandas
228 | python-dotenv==0.20.0
229 |     # via pytest-dotenv
230 | pytz==2022.1
231 |     # via
232 |     #   babel
233 |     #   pandas
234 | pyyaml==6.0
235 |     # via
236 |     #   jupytext
237 |     #   pins (setup.cfg)
238 |     #   siuba
239 | pyzmq==22.3.0
240 |     # via jupyter-client
241 | requests==2.27.1
242 |     # via sphinx
243 | s3fs==2022.2.0
244 |     # via pins (setup.cfg)
245 | siuba==0.1.2
246 |     # via pins (setup.cfg)
247 | six==1.16.0
248 |     # via
249 |     #   bleach
250 |     #   python-dateutil
251 | snowballstemmer==2.2.0
252 |     # via sphinx
253 | soupsieve==2.3.1
254 |     # via beautifulsoup4
255 | sphinx==4.4.0
256 |     # via
257 |     #   nbsphinx
258 |     #   pins (setup.cfg)
259 |     #   pydata-sphinx-theme
260 | sphinxcontrib-applehelp==1.0.2
261 |     # via sphinx
262 | sphinxcontrib-devhelp==1.0.2
263 |     # via sphinx
264 | sphinxcontrib-htmlhelp==2.0.0
265 |     # via sphinx
266 | sphinxcontrib-jsmath==1.0.1
267 |     # via sphinx
268 | sphinxcontrib-qthelp==1.0.3
269 |     # via sphinx
270 | sphinxcontrib-serializinghtml==1.1.5
271 |     # via sphinx
272 | sqlalchemy==1.4.32
273 |     # via siuba
274 | stack-data==0.2.0
275 |     # via ipython
276 | testpath==0.6.0
277 |     # via nbconvert
278 | toml==0.10.2
279 |     # via jupytext
280 | tomli==2.0.1
281 |     # via
282 |     #   pep517
283 |     #   pytest
284 | tornado==6.1
285 |     # via
286 |     #   ipykernel
287 |     #   jupyter-client
288 | traitlets==5.1.1
289 |     # via
290 |     #   ipykernel
291 |     #   ipython
292 |     #   jupyter-client
293 |     #   jupyter-core
294 |     #   matplotlib-inline
295 |     #   nbclient
296 |     #   nbconvert
297 |     #   nbformat
298 |     #   nbsphinx
299 | typing-extensions==4.1.1
300 |     # via aioitertools
301 | urllib3==1.26.9
302 |     # via
303 |     #   botocore
304 |     #   requests
305 | wcwidth==0.2.5
306 |     # via prompt-toolkit
307 | webencodings==0.5.1
308 |     # via bleach
309 | wheel==0.37.1
310 |     # via pip-tools
311 | wrapt==1.14.0
312 |     # via aiobotocore
313 | xxhash==3.0.0
314 |     # via pins (setup.cfg)
315 | yarl==1.7.2
316 |     # via aiohttp
317 | zipp==3.7.0
318 |     # via
319 |     #   importlib-metadata
320 |     #   importlib-resources
321 | 
322 | # The following packages are considered to be unsafe in a requirements file:
323 | # pip
324 | # setuptools
325 | 


--------------------------------------------------------------------------------
/binder/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.9
2 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.1'
 2 | 
 3 | services:
 4 | 
 5 |   rsconnect:
 6 |     image: rstudio/rstudio-connect:2021.12.1
 7 |     restart: always
 8 |     ports:
 9 |       - 3939:3939
10 |     volumes:
11 |       - $PWD/script/setup-rsconnect/users.txt:/etc/users.txt
12 |       - $PWD/script/setup-rsconnect/rstudio-connect.gcfg:/etc/rstudio-connect/rstudio-connect.gcfg
13 |     # by default, mysql rounds to 4 decimals, but tests require more precision
14 |     privileged: true
15 |     environment:
16 |       RSTUDIO_CONNECT_HASTE: "enabled"
17 |       RSC_LICENSE: ${RSC_LICENSE}
18 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | /.quarto/
2 | 


--------------------------------------------------------------------------------
/docs/_extensions/machow/interlinks/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.pdf
3 | *_files/
4 | 


--------------------------------------------------------------------------------
/docs/_extensions/machow/interlinks/_extension.yml:
--------------------------------------------------------------------------------
1 | title: Interlinks
2 | author: Michael Chow
3 | version: 1.0.0
4 | quarto-required: ">=1.2.0"
5 | contributes:
6 |   filters:
7 |     - interlinks.lua
8 | 


--------------------------------------------------------------------------------
/docs/_extensions/machow/interlinks/interlinks.lua:
--------------------------------------------------------------------------------
  1 | local function read_json(filename)
  2 |     local file = io.open(filename, "r")
  3 |     if file == nil then
  4 |         return nil
  5 |     end
  6 |     local str = file:read("a")
  7 |     file:close()
  8 |     return quarto.json.decode(str)
  9 | end
 10 | 
 11 | local inventory = {}
 12 | 
 13 | function lookup(search_object)
 14 | 
 15 |     local results = {}
 16 |     for ii, inventory in ipairs(inventory) do
 17 |         for jj, item in ipairs(inventory.items) do
 18 |             -- e.g. :external+<inv_name>:<domain>:<role>:`<name>`
 19 |             if item.inv_name and item.inv_name ~= search_object.inv_name then
 20 |                 goto continue
 21 |             end
 22 | 
 23 |             if item.name ~= search_object.name then
 24 |                 goto continue
 25 |             end
 26 | 
 27 |             if search_object.role and item.role ~= search_object.role then
 28 |                 goto continue
 29 |             end
 30 | 
 31 |             if search_object.domain and item.domain ~= search_object.domain then
 32 |                 goto continue
 33 |             else
 34 |                 table.insert(results, item)
 35 | 
 36 |                 goto continue
 37 |             end
 38 | 
 39 |             ::continue::
 40 |         end
 41 |     end
 42 | 
 43 |     if #results == 1 then
 44 |         return results[1]
 45 |     end
 46 |     if #results > 1 then
 47 |         print("Found multiple matches for " .. search_object.name)
 48 |         quarto.utils.dump(results)
 49 |         return nil
 50 |     end
 51 |     if #results == 0 then
 52 |         print("Found no matches for object:")
 53 |         quarto.utils.dump(search_object)
 54 |     end
 55 | 
 56 |     return nil
 57 | end
 58 | 
 59 | function mysplit (inputstr, sep)
 60 |     if sep == nil then
 61 |             sep = "%s"
 62 |     end
 63 |     local t={}
 64 |     for str in string.gmatch(inputstr, "([^"..sep.."]+)") do
 65 |             table.insert(t, str)
 66 |     end
 67 |     return t
 68 | end
 69 | 
 70 | local function normalize_role(role)
 71 |     if role == "func" then
 72 |         return "function"
 73 |     end
 74 |     return role
 75 | end
 76 | 
 77 | local function build_search_object(str)
 78 |     local starts_with_colon = str:sub(1, 1) == ":"
 79 |     local search = {}
 80 |     if starts_with_colon then
 81 |         local t = mysplit(str, ":")
 82 |         if #t == 2 then
 83 |             -- e.g. :py:func:`my_func`
 84 |             search.role = normalize_role(t[1])
 85 |             search.name = t[2]:match("%%60(.*)%%60")
 86 |         elseif #t == 3 then
 87 |             -- e.g. :py:func:`my_func`
 88 |             search.domain = t[1]
 89 |             search.role = normalize_role(t[2])
 90 |             search.name = t[3]:match("%%60(.*)%%60")
 91 |         elseif #t == 4 then
 92 |             -- e.g. :ext+inv:py:func:`my_func`
 93 |             search.external = true
 94 | 
 95 |             search.inv_name = t[1]:match("external%+(.*)")
 96 |             search.domain = t[2]
 97 |             search.role = normalize_role(t[3])
 98 |             search.name = t[4]:match("%%60(.*)%%60")
 99 |         else
100 |             print("couldn't parse this link: " .. str)
101 |             return {}
102 |         end
103 |     else
104 |         search.name = str:match("%%60(.*)%%60")
105 |     end
106 | 
107 |     if search.name == nil then
108 |         print("couldn't parse this link: " .. str)
109 |         return {}
110 |     end
111 | 
112 |     if search.name:sub(1, 1) == "~" then
113 |         search.shortened = true
114 |         search.name = search.name:sub(2, -1)
115 |     end
116 |     return search
117 | end
118 | 
119 | function report_broken_link(link, search_object, replacement)
120 |     -- TODO: how to unescape html elements like [?
121 |     return pandoc.Code(pandoc.utils.stringify(link.content))
122 | end
123 | 
124 | function Link(link)
125 |     -- do not process regular links ----
126 |     if not link.target:match("%%60") then
127 |         return link
128 |     end
129 | 
130 |     -- lookup item ----
131 |     local search = build_search_object(link.target)
132 |     local item = lookup(search)
133 | 
134 |     -- determine replacement, used if no link text specified ----
135 |     local original_text = pandoc.utils.stringify(link.content)
136 |     local replacement = search.name
137 |     if search.shortened then
138 |         local t = mysplit(search.name, ".")
139 |         replacement = t[#t]
140 |     end
141 | 
142 |     -- set link text ----
143 |     if original_text == "" and replacement ~= nil then
144 |         link.content = pandoc.Code(replacement)
145 |     end
146 | 
147 |     -- report broken links ----
148 |     if item == nil then
149 |         return report_broken_link(link, search)
150 |     end
151 |     link.target = item.uri:gsub("%$$", search.name)
152 | 
153 | 
154 |     return link
155 | end
156 | 
157 | function fixup_json(json, prefix)
158 |     for _, item in ipairs(json.items) do
159 |         item.uri = prefix .. item.uri
160 |     end
161 |     table.insert(inventory, json)
162 | end
163 | 
164 | return {
165 |     {
166 |         Meta = function(meta)
167 |             local json
168 |             local prefix
169 |             for k, v in pairs(meta.interlinks.sources) do
170 |                 json = read_json(quarto.project.offset .. "/_inv/" .. k .. "_objects.json")
171 |                 prefix = pandoc.utils.stringify(v.url)
172 |                 fixup_json(json, prefix)
173 |             end
174 |             json = read_json(quarto.project.offset .. "/objects.json")
175 |             if json ~= nil then
176 |                 fixup_json(json, "/")
177 |             end
178 |         end
179 |     },
180 |     {
181 |         Link = Link
182 |     }
183 | }
184 | 


--------------------------------------------------------------------------------
/docs/_quarto.yml:
--------------------------------------------------------------------------------
  1 | project:
  2 |   type: website
  3 |   output-dir: _site
  4 | 
  5 | website:
  6 |   title: "pins for Python"
  7 |   description: "Pin, Discover, and Share Resources"
  8 |   page-navigation: true
  9 |   favicon: "favicon.ico"
 10 |   page-footer:
 11 |     center: |
 12 |       Proudly supported by
 13 |       [![](https://www.rstudio.com/assets/img/posit-logo-fullcolor-TM.svg){fig-alt="Posit PBC" width=65px}](https://posit.co/)
 14 |   navbar:
 15 |     background: light
 16 |     pinned: true
 17 |     logo: logo.png
 18 |     left:
 19 |       - text: "Get started"
 20 |         file: get_started.qmd
 21 |       - text: "Reference"
 22 |         file: reference/index.qmd
 23 |       - text: Learn more
 24 |         menu:
 25 |           - text: "Create consistent metadata for pins"
 26 |             file: customize-pins-metadata.qmd
 27 |           - text: "pins for R"
 28 |             href: https://pins.rstudio.com
 29 |             target: _blank
 30 |     right:
 31 |       - icon: github
 32 |         href: https://github.com/rstudio/pins-python
 33 |         aria-label: Pins python GitHub
 34 |   sidebar:
 35 |     style: "floating"
 36 |     collapse-level: 1
 37 |     contents:
 38 |       - section: Boards
 39 |         contents:
 40 |           - text: "`board_folder`"
 41 |             href: reference/board_folder.qmd
 42 |           - text: "`board_local`"
 43 |             href: reference/board_local.qmd
 44 |           - text: "`board_temp`"
 45 |             href: reference/board_temp.qmd
 46 |           - text: "`board_s3`"
 47 |             href: reference/board_s3.qmd
 48 |           - text: "`board_gcs`"
 49 |             href: reference/board_gcs.qmd
 50 |           - text: "`board_azure`"
 51 |             href: reference/board_azure.qmd
 52 |           - text: "`board_databricks`"
 53 |             href: reference/board_databricks.qmd
 54 |           - text: "`board_connect`"
 55 |             href: reference/board_connect.qmd
 56 |           - text: "`board_url`"
 57 |             href: reference/board_url.qmd
 58 |           - text: "`board`"
 59 |             href: reference/board.qmd
 60 |       - section: Pins
 61 |         contents:
 62 |           - text: "`pin_read`"
 63 |             href: reference/pin_read.qmd
 64 |           - text: "`pin_write`"
 65 |             href: reference/pin_write.qmd
 66 |           - text: "`pin_meta`"
 67 |             href: reference/pin_meta.qmd
 68 |           - text: "`pin_download`"
 69 |             href: reference/pin_download.qmd
 70 |           - text: "`pin_upload`"
 71 |             href: reference/pin_upload.qmd
 72 |           - text: "`pin_versions`"
 73 |             href: reference/pin_versions.qmd
 74 |           - text: "`pin_list`"
 75 |             href: reference/pin_list.qmd
 76 |           - text: "`pin_exists`"
 77 |             href: reference/pin_exists.qmd
 78 |           - text: "`pin_version_delete`"
 79 |             href: reference/pin_version_delete.qmd
 80 |           - text: "`pin_versions_prune`"
 81 |             href: reference/pin_versions_prune.qmd
 82 |           - text: "`pin_delete`"
 83 |             href: reference/pin_delete.qmd
 84 |           - text: "`pin_search`"
 85 |             href: reference/pin_search.qmd
 86 | 
 87 | format:
 88 |   html:
 89 |     sidebar: false
 90 | 
 91 | quartodoc:
 92 |   title: Reference
 93 |   package: pins
 94 |   sections:
 95 |     - title: Board constructors
 96 |       desc: "Functions to create a board object"
 97 |       contents:
 98 |         - board_folder
 99 |         - board_local
100 |         - board_temp
101 |         - board_s3
102 |         - board_gcs
103 |         - board_azure
104 |         - board_databricks
105 |         - board_connect
106 |         - board_url
107 |         - board
108 |     - title: Pin methods
109 |       desc: "Methods to handle pins on your board"
110 |       package: pins.boards.BaseBoard
111 |       contents:
112 |         - pin_read
113 |         - pin_write
114 |         - pin_meta
115 |         - pin_download
116 |         - pin_upload
117 |         - pin_versions
118 |         - pin_list
119 |         - pin_exists
120 |         - pin_version_delete
121 |         - pin_versions_prune
122 |         - pin_delete
123 |         - pin_search
124 | 
125 | filters:
126 |   - interlinks
127 | 
128 | interlinks:
129 |   sources: {}
130 | 


--------------------------------------------------------------------------------
/docs/customize-pins-metadata.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Create consistent metadata for pins
 3 | jupyter: python3
 4 | ---
 5 | 
 6 | The `metadata` argument in pins is flexible and can hold any kind of metadata that you can formulate as a `dict` (convertible to JSON).
 7 | In some situations, you may want to read and write with _consistent_ customized metadata;
 8 | you can create functions to wrap [](`~pins.boards.BaseBoard.pin_write`) and [](`~pins.boards.BaseBoard.pin_read`) for your particular use case.
 9 | 
10 | We'll begin by creating a temporary board for demonstration:
11 | 
12 | ```{python setup}
13 | import pins
14 | import pandas as pd
15 | 
16 | from pprint import pprint
17 | 
18 | board = pins.board_temp()
19 | ```
20 | 
21 | 
22 | # A function to store pandas Categoricals
23 | 
24 | Say you want to store a pandas Categorical object as JSON together with the _categories_ of the categorical in the metadata.
25 | 
26 | For example, here is a simple categorical and its categories:
27 | 
28 | ```{python}
29 | some_cat = pd.Categorical(["a", "a", "b"])
30 | 
31 | some_cat.categories
32 | ```
33 | 
34 | Notice that the categories attribute is just the unique values in the categorical.
35 | 
36 | We can write a function wrapping [](`~pins.boards.BaseBoard.pin_write`) that holds the categories in metadata, so we can easily re-create the categorical with them.
37 | 
38 | ```{python}
39 | def pin_write_cat_json(
40 |     board,
41 |     x: pd.Categorical,
42 |     name,
43 |     **kwargs
44 | ):
45 |     metadata = {"categories": x.categories.to_list()}
46 |     json_data = x.to_list()
47 |     board.pin_write(json_data, name = name, type = "json", metadata = metadata, **kwargs)
48 | ```
49 | 
50 | We can use this new function to write a pin as JSON with our specific metadata:
51 | 
52 | ```{python}
53 | some_cat = pd.Categorical(["a", "a", "b", "c"])
54 | pin_write_cat_json(board, some_cat, name = "some-cat")
55 | ```
56 | 
57 | ## A function to read categoricals
58 | 
59 | It's possible to read this pin using the regular [](`~pins.boards.BaseBoard.pin_read`) function, but the object we get is no longer a categorical!
60 | 
61 | ```{python}
62 | board.pin_read("some-cat")
63 | ```
64 | 
65 | However, notice that if we use [](`~pins.boards.BaseBoard.pin_meta`), the information we stored on categories is in the `.user` field.
66 | 
67 | ```{python}
68 | pprint(
69 |     board.pin_meta("some-cat")
70 | )
71 | ```
72 | 
73 | This enables us to write a special function for reading, to reconstruct the categorical, using the categories stashed in metadata:
74 | 
75 | ```{python}
76 | def pin_read_cat_json(board, name, version=None, hash=None, **kwargs):
77 |   data = board.pin_read(name = name, version = version, hash = hash, **kwargs)
78 |   meta = board.pin_meta(name = name, version = version, **kwargs)
79 |   return pd.Categorical(data, categories=meta.user["categories"])
80 | 
81 | pin_read_cat_json(board, "some-cat")
82 | ```
83 | 
84 | For an example of how this approach is used in a real project, look at look at how the vetiver package wraps these functions to [write](https://github.com/rstudio/vetiver-python/blob/main/vetiver/pin_read_write.py) and [read](https://github.com/rstudio/vetiver-python/blob/main/vetiver/vetiver_model.py) model binaries as pins.
85 | 


--------------------------------------------------------------------------------
/docs/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/docs/favicon.ico


--------------------------------------------------------------------------------
/docs/get_started.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Get started with pins
  3 | jupyter: python3
  4 | ---
  5 | 
  6 | ```{python}
  7 | # | include: false
  8 | import time
  9 | import pandas as pd
 10 | pd.options.display.max_rows = 25
 11 | ```
 12 | 
 13 | The pins package helps you publish data sets, models, and other Python objects, making it easy to share them across projects and with your colleagues.
 14 | You can pin objects to a variety of "boards", including local folders (to share on a networked drive or with DropBox), Posit Connect, Amazon S3,
 15 | Google Cloud Storage, Azure, and more.
 16 | This vignette will introduce you to the basics of pins.
 17 | 
 18 | ```{python}
 19 | from pins import board, board_local, board_folder, board_temp, board_url
 20 | ```
 21 | 
 22 | ## Getting started
 23 | 
 24 | Every pin lives in a pin *board*, so you must start by creating a pin board.
 25 | In this vignette I'll use a temporary board which is automatically deleted when your Python session is over:
 26 | 
 27 | ```{python}
 28 | board = board_temp()
 29 | ```
 30 | 
 31 | In real life, you'd pick a board depending on how you want to share the data.
 32 | Here are a few options:
 33 | 
 34 | 
 35 | ```python
 36 | board = board_local() # share data across R sessions on the same computer
 37 | board = board_folder("~/Dropbox") # share data with others using dropbox
 38 | board = board_folder("Z:\\my-team\pins") # share data using a shared network drive
 39 | board = board("file", "Z:\\my-team\pins") # share data using a shared network drive with caching
 40 | board = board_connect() # share data with Posit Connect
 41 | ```
 42 | 
 43 | 
 44 | ## Reading and writing data
 45 | 
 46 | Once you have a pin board, you can write data to it with the [](`~pins.boards.BaseBoard.pin_write`) method:
 47 | 
 48 | ```{python}
 49 | from pins.data import mtcars
 50 | 
 51 | meta = board.pin_write(mtcars, "mtcars", type="csv")
 52 | ```
 53 | 
 54 | The first argument is the object to save (usually a data frame, but it can be any Python object), and the second argument gives the "name" of the pin.
 55 | The name is basically equivalent to a file name; you'll use it when you later want to read the data from the pin.
 56 | The only rule for a pin name is that it can't contain slashes.
 57 | 
 58 | After you've pinned an object, you can read it back with [](`~pins.boards.BaseBoard.pin_read`):
 59 | 
 60 | ```{python}
 61 | board.pin_read("mtcars")
 62 | ```
 63 | 
 64 | You don't need to supply the file type when reading data from a pin because pins automatically stores the file type in the [metadata](#metadata).
 65 | 
 66 | ::: {.callout-note}
 67 | If you are using the Posit Connect board [](`~pins.board_connect`), then you must specify your pin name as
 68 | `"user_name/content_name"`. For example, `"hadley/sales-report"`.
 69 | :::
 70 | 
 71 | ## How and what to store as a pin
 72 | 
 73 | Above, we saved the data as a CSV, but you can choose another option depending on your goals:
 74 | 
 75 | -   `type = "csv"` uses `to_csv()` from pandas to create a CSV file. CSVs are plain text and can be read easily by many applications, but they only support simple columns (e.g. numbers, strings), can take up a lot of disk space, and can be slow to read.
 76 | -   `type = "parquet"` uses `to_parquet()` from pandas to create a Parquet file. [Parquet](https://parquet.apache.org/) is a modern, language-independent, column-oriented file format for efficient data storage and retrieval. Parquet is an excellent choice for storing tabular data.
 77 | -   `type = "arrow"` uses `to_feather()` from pandas to create an Arrow/Feather file.
 78 | -   `type = "joblib"` uses `joblib.dump()` to create a binary Python data file, such as for storing a trained model. See the [joblib docs](https://joblib.readthedocs.io/en/latest/) for more information.
 79 | -   `type = "json"` uses `json.dump()` to create a JSON file. Pretty much every programming language can read JSON files, but they only work well for nested lists.
 80 | 
 81 | Note that when the data lives elsewhere, pins takes care of downloading and caching so that it's only re-downloaded when needed.
 82 | That said, most boards transmit pins over HTTP, and this is going to be slow and possibly unreliable for very large pins.
 83 | As a general rule of thumb, we don't recommend using pins with files over 500 MB.
 84 | If you find yourself routinely pinning data larger that this, you might need to reconsider your data engineering pipeline.
 85 | 
 86 | Storing your data/object as a pin works well when you write from a single source or process. It is _not_ appropriate when multiple sources or processes need to write to the same pin; since the pins package reads and writes files, it cannot manage concurrent writes. It is also not appropriate for high frequency writes (multiple times per second).
 87 | 
 88 | - **Good** use for pins: an ETL pipeline that stores a model or summarized dataset once a day
 89 | - **Bad** use for pins: a Shiny app that collects data from users, who may be using the app at the same time
 90 | 
 91 | ## Metadata
 92 | 
 93 | 
 94 | Every pin is accompanied by some metadata that you can access with [](`~pins.boards.BaseBoard.pin_meta`):
 95 | 
 96 | ```{python}
 97 | board.pin_meta("mtcars")
 98 | ```
 99 | 
100 | This shows you the metadata that’s generated by default. This includes:
101 | 
102 | * `title`, a brief textual description of the dataset.
103 | * an optional `description`, where you can provide more details.
104 | * the date-time when the pin was `created`.
105 | * the `file_size`, in bytes, of the underlying files.
106 | * a unique `pin_hash` that you can supply to [](`~pins.boards.BaseBoard.pin_read`) to ensure that you’re reading exactly the data that you expect.
107 | 
108 | When creating the pin, you can override the default description or provide additional metadata that is stored with the data:
109 | 
110 | ```{python}
111 | board.pin_write(
112 |     mtcars,
113 |     name="mtcars2",
114 |     type="csv",
115 |     description = "Data extracted from the 1974 Motor Trend US magazine, and comprises fuel consumption and 10 aspects of automobile design and performance for 32 automobiles (1973–74 models).",
116 |     metadata = {
117 |         "source": "Henderson and Velleman (1981), Building multiple regression models interactively. Biometrics, 37, 391–411."
118 |     }
119 | )
120 | ```
121 | 
122 | ```{python}
123 | board.pin_meta("mtcars")
124 | ```
125 | 
126 | While we’ll do our best to keep the automatically generated metadata consistent over time, I’d recommend manually capturing anything you really care about in metadata.
127 | 
128 | 
129 | ## Versioning
130 | 
131 | By default, calls to [](`~pins.boards.BaseBoard.pin_write`) will usually create a new version:
132 | 
133 | ```{python}
134 | board2 = board_temp()
135 | board2.pin_write([1,2,3,4,5], name = "x", type = "json")
136 | board2.pin_write([1,2,3], name = "x", type = "json")
137 | board2.pin_write([1,2], name = "x", type = "json")
138 | board2.pin_versions("x")
139 | ```
140 | 
141 | The only exception is if the data is identical with the most recent version (compared via file hash):
142 | 
143 | ```{python}
144 | board2.pin_write([1], name = "x", type = "json")
145 | time.sleep(1.1) # later, let's try and write a new version of the same data...
146 | board2.pin_write([1], name = "x", type = "json")
147 | board2.pin_versions("x")
148 | ```
149 | 
150 | 
151 | However you can opt-out of this behaviour with `force_identical_write=True`:
152 | ```{python}
153 | time.sleep(1.1) # try again...
154 | board2.pin_write([1], name = "x", type = "json", force_identical_write=True)
155 | board2.pin_versions("x")
156 | ```
157 | 
158 | By default, [](`~pins.boards.BaseBoard.pin_read`) will return the most recent version:
159 | 
160 | ```{python}
161 | board2.pin_read("x")
162 | ```
163 | 
164 | But you can request an older version by supplying the `version` argument:
165 | 
166 | ```{python}
167 | version = board2.pin_versions("x").version[1]
168 | board2.pin_read("x", version = version)
169 | ```
170 | 
171 | ## Storing models
172 | 
173 | ::: {.callout-warning}
174 | The examples in this section use joblib to read and write data. Joblib uses the pickle format, and **pickle files are not secure**. Only read pickle files you trust. In order to read pickle files, set the `allow_pickle_read=True` argument. [Learn more about pickling](https://docs.python.org/3/library/pickle.html).
175 | :::
176 | 
177 | You can write a pin with `type="joblib"` to store arbitrary python objects, including fitted models from packages like [scikit-learn](https://scikit-learn.org/).
178 | 
179 | For example, suppose you wanted to store a custom `namedtuple` object.
180 | 
181 | ```{python}
182 | from collections import namedtuple
183 | 
184 | board3 = board_temp(allow_pickle_read=True)
185 | 
186 | Coords = namedtuple("Coords", ["x", "y"])
187 | coords = Coords(1, 2)
188 | 
189 | coords
190 | ```
191 | 
192 | Using `type="joblib"` lets you store and read back the custom `coords` object.
193 | 
194 | ```{python}
195 | board3.pin_write(coords, "my_coords", type="joblib")
196 | 
197 | board3.pin_read("my_coords")
198 | ```
199 | 
200 | 
201 | ## Caching
202 | 
203 | The primary purpose of pins is to make it easy to share data.
204 | But pins is also designed to help you spend as little time as possible downloading data.
205 | [](`~pins.boards.BaseBoard.pin_read`) and [](`~pins.boards.BaseBoard.pin_download`) automatically cache remote pins: they maintain a local copy of the data (so it's fast) but always check that it's up-to-date (so your analysis doesn't use stale data).
206 | 
207 | Wouldn't it be nice if you could take advantage of this feature for any dataset on the internet?
208 | That's the idea behind [](`~pins.board_url`); you can assemble your own board from datasets, wherever they live on the internet.
209 | For example, this code creates a board containing a single pin, `penguins`, that refers to some fun data I found on GitHub:
210 | 
211 | ```{python}
212 | my_data = board_url("", {
213 |   "penguins": "https://raw.githubusercontent.com/allisonhorst/palmerpenguins/master/inst/extdata/penguins_raw.csv"
214 | })
215 | ```
216 | 
217 | You can read this data by combining [](`~pins.boards.BaseBoard.pin_download`) with `read_csv` from pandas:
218 | 
219 | ```{python}
220 | fname = my_data.pin_download("penguins")
221 | 
222 | fname
223 | 
224 | ```
225 | 
226 | ```{python}
227 | import pandas as pd
228 | 
229 | pd.read_csv(fname[0]).head()
230 | ```
231 | 
232 | ```{python}
233 | my_data.pin_download("penguins")
234 | ```
235 | 


--------------------------------------------------------------------------------
/docs/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | jupyter: python3
  3 | ---
  4 | 
  5 | # pins <a href="https://rstudio.github.io/pins-python/"><img src="logo.png" align="right" height="138" /></a>
  6 | 
  7 | The pins package publishes data, models, and other Python objects, making it
  8 | easy to share them across projects and with your colleagues. You can pin
  9 | objects to a variety of pin *boards*, including folders (to share on a
 10 | networked drive or with services like DropBox), Posit Connect, Amazon
 11 | S3, and Google Cloud Storage.
 12 | Pins can be automatically versioned, making it straightforward to track changes,
 13 | re-run analyses on historical data, and undo mistakes.
 14 | 
 15 | You can use pins from R as well as Python. For example, you can use one language
 16 | to read a pin created with the other. Learn more about
 17 | [pins for R](https://pins.rstudio.com).
 18 | 
 19 | ## Installation
 20 | 
 21 | You can install the released version of pins from [PyPI](https://pypi.org/project/pins/):
 22 | 
 23 | ```bash
 24 | python -m pip install pins
 25 | ```
 26 | 
 27 | And the development version from [GitHub](https://github.com/rstudio/pins-python) with:
 28 | 
 29 | ```bash
 30 | python -m pip install git+https://github.com/rstudio/pins-python
 31 | ```
 32 | 
 33 | ## Usage
 34 | 
 35 | To use the pins package, you must first create a pin board. A good place
 36 | to start is [](`~pins.board_folder`), which stores pins in a directory you
 37 | specify. Here we'll use a special version of [](`~pins.board_folder`) called
 38 | [](`~pins.board_temp`) which creates a temporary board that’s automatically
 39 | deleted when your Python script or notebook session ends. This is great for examples, but
 40 | obviously you shouldn't use it for real work!
 41 | 
 42 | ```{python}
 43 | import pins
 44 | from pins.data import mtcars
 45 | 
 46 | board = pins.board_temp()
 47 | ```
 48 | 
 49 | You can "pin" (save) data to a board with the [](`~pins.boards.BaseBoard.pin_write`) method. It requires three
 50 | arguments: an object, a name, and a pin type:
 51 | 
 52 | ```{python}
 53 | board.pin_write(mtcars.head(), "mtcars", type="csv")
 54 | ```
 55 | 
 56 | Above, we saved the data as a CSV, but depending on
 57 | what you’re saving and who else you want to read it, you might use the
 58 | `type` argument to instead save it as a `joblib`, `parquet`, or `json` file.
 59 | 
 60 | You can later retrieve the pinned data with [](`~pins.boards.BaseBoard.pin_read`):
 61 | 
 62 | ```{python}
 63 | board.pin_read("mtcars")
 64 | ```
 65 | 
 66 | A board on your computer is good place to start, but the real power of
 67 | pins comes when you use a board that’s shared with multiple people. To
 68 | get started, you can use [](`~pins.board_folder`) with a directory on a shared
 69 | drive or in DropBox, or if you use [Posit
 70 | Connect](https://posit.co/products/enterprise/connect/) you can use
 71 | [](`~pins.board_connect`):
 72 | 
 73 | ```python
 74 | # Note that this uses one approach to connecting,
 75 | # the environment variables CONNECT_SERVER and CONNECT_API_KEY
 76 | 
 77 | board = pins.board_connect()
 78 | board.pin_write(tidy_sales_data, "hadley/sales-summary", type="csv")
 79 | ```
 80 | 
 81 | Then, someone else (or an automated report) can read and use your
 82 | pin:
 83 | 
 84 | ```python
 85 | board = board_connect()
 86 | board.pin_read("hadley/sales-summary")
 87 | ```
 88 | 
 89 | You can easily control who gets to access the data using the Posit
 90 | Connect permissions pane.
 91 | 
 92 | The pins package also includes boards that allow you to share data on
 93 | services like Amazon’s S3 ([](`~pins.board_s3`)), Google Cloud Storage ([](`~pins.board_gcs`)),
 94 | and Azure blob storage ([](`~pins.board_azure`)).
 95 | 
 96 | ## Contributing
 97 | 
 98 | - This project is released with a [Contributor Code of Conduct](https://www.contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms.
 99 | 
100 | - If you think you have encountered a bug, please [submit an issue](https://github.com/rstudio/pins-python/issues).
101 | 


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/docs/logo.png


--------------------------------------------------------------------------------
/pins/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | 
 3 | # Set version ----
 4 | from importlib_metadata import version as _v
 5 | 
 6 | __version__ = _v("pins")
 7 | 
 8 | del _v
 9 | 
10 | 
11 | # Imports ----
12 | from .cache import cache_prune, cache_info
13 | from .constructors import (
14 |     board_folder,
15 |     board_temp,
16 |     board_local,
17 |     board_github,
18 |     board_urls,  # DEPRECATED
19 |     board_url,
20 |     board_connect,
21 |     board_rsconnect,
22 |     board_azure,
23 |     board_s3,
24 |     board_gcs,
25 |     board_databricks,
26 |     board,
27 | )
28 | from .boards import board_deparse
29 | 


--------------------------------------------------------------------------------
/pins/_adaptors.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import json
  4 | from abc import abstractmethod
  5 | from typing import TYPE_CHECKING, Any, ClassVar, overload
  6 | 
  7 | from databackend import AbstractBackend
  8 | from typing_extensions import TypeAlias
  9 | 
 10 | if TYPE_CHECKING:
 11 |     import pandas as pd
 12 | 
 13 |     PandasDataFrame: TypeAlias = pd.DataFrame
 14 |     DataFrame: TypeAlias = PandasDataFrame
 15 | 
 16 | 
 17 | class AbstractPandasFrame(AbstractBackend):
 18 |     _backends = [("pandas", "DataFrame")]
 19 | 
 20 | 
 21 | AbstractDF: TypeAlias = AbstractPandasFrame
 22 | 
 23 | 
 24 | class Adaptor:
 25 |     def __init__(self, data: Any) -> None:
 26 |         self._d = data
 27 | 
 28 |     def write_json(self, file: str) -> None:
 29 |         with open(file, "w") as f:
 30 |             f.write(self.to_json())
 31 | 
 32 |     def to_json(self) -> str:
 33 |         import json
 34 | 
 35 |         return json.dumps(self._d)
 36 | 
 37 |     def write_joblib(self, file: str) -> None:
 38 |         import joblib
 39 | 
 40 |         joblib.dump(self._d, file)
 41 | 
 42 |     def write_csv(self, file: str) -> None:
 43 |         msg = f"Writing to CSV is not supported for {type(self._d)}"
 44 |         raise NotImplementedError(msg)
 45 | 
 46 |     def write_parquet(self, file: str) -> None:
 47 |         msg = f"Writing to Parquet is not supported for {type(self._d)}"
 48 |         raise NotImplementedError(msg)
 49 | 
 50 |     def write_feather(self, file: str) -> None:
 51 |         msg = f"Writing to Feather is not supported for {type(self._d)}"
 52 |         raise NotImplementedError(msg)
 53 | 
 54 |     @property
 55 |     def data_preview(self) -> str:
 56 |         # note that the R library uses jsonlite::toJSON
 57 |         import json
 58 | 
 59 |         # TODO(compat): set display none in index.html
 60 |         return json.dumps({})
 61 | 
 62 |     def default_title(self, name: str) -> str:
 63 |         # TODO(compat): title says CSV rather than data.frame
 64 |         # see https://github.com/machow/pins-python/issues/5
 65 |         return f"{name}: a pinned {self._obj_name}"
 66 | 
 67 |     @property
 68 |     def _obj_name(self) -> str:
 69 |         return f"{type(self._d).__qualname__} object"
 70 | 
 71 | 
 72 | class DFAdaptor(Adaptor):
 73 |     _d: ClassVar[DataFrame]
 74 | 
 75 |     def __init__(self, data: DataFrame) -> None:
 76 |         super().__init__(data)
 77 | 
 78 |     @property
 79 |     def df_type(self) -> str:
 80 |         # Consider over-riding this for specialized dataframes
 81 |         return "DataFrame"
 82 | 
 83 |     @property
 84 |     @abstractmethod
 85 |     def columns(self) -> list[Any]: ...
 86 | 
 87 |     @property
 88 |     @abstractmethod
 89 |     def shape(self) -> tuple[int, int]: ...
 90 | 
 91 |     @abstractmethod
 92 |     def head(self, n: int) -> DFAdaptor: ...
 93 | 
 94 |     @property
 95 |     def data_preview(self) -> str:
 96 |         # TODO(compat) is 100 hard-coded?
 97 |         # Note that we go df -> json -> dict, to take advantage of type conversions in the dataframe library
 98 |         data: list[dict[Any, Any]] = json.loads(self.head(100).to_json())
 99 |         columns = [
100 |             {"name": [col], "label": [col], "align": ["left"], "type": [""]}
101 |             for col in self.columns
102 |         ]
103 | 
104 |         # this reproduces R pins behavior, by omitting entries that would be null
105 |         data_no_nulls = [{k: v for k, v in row.items() if v is not None} for row in data]
106 | 
107 |         return json.dumps({"data": data_no_nulls, "columns": columns})
108 | 
109 |     @property
110 |     def _obj_name(self) -> str:
111 |         row, col = self.shape
112 |         return f"{row} x {col} {self.df_type}"
113 | 
114 | 
115 | class PandasAdaptor(DFAdaptor):
116 |     _d: ClassVar[PandasDataFrame]
117 | 
118 |     def __init__(self, data: AbstractPandasFrame) -> None:
119 |         super().__init__(data)
120 | 
121 |     @property
122 |     def columns(self) -> list[Any]:
123 |         return self._d.columns.tolist()
124 | 
125 |     @property
126 |     def shape(self) -> tuple[int, int]:
127 |         return self._d.shape
128 | 
129 |     def head(self, n: int) -> PandasAdaptor:
130 |         return PandasAdaptor(self._d.head(n))
131 | 
132 |     def to_json(self) -> str:
133 |         return self._d.to_json(orient="records")
134 | 
135 |     def write_csv(self, file: str) -> None:
136 |         self._d.to_csv(file, index=False)
137 | 
138 |     def write_parquet(self, file: str) -> None:
139 |         self._d.to_parquet(file)
140 | 
141 |     def write_feather(self, file: str) -> None:
142 |         self._d.to_feather(file)
143 | 
144 | 
145 | @overload
146 | def create_adaptor(obj: DataFrame) -> DFAdaptor: ...
147 | @overload
148 | def create_adaptor(obj: Any) -> Adaptor: ...
149 | def create_adaptor(obj: Any | DataFrame) -> Adaptor | DFAdaptor:
150 |     if isinstance(obj, AbstractPandasFrame):
151 |         return PandasAdaptor(obj)
152 |     elif isinstance(obj, Adaptor):
153 |         return obj
154 |     else:
155 |         return Adaptor(obj)
156 | 


--------------------------------------------------------------------------------
/pins/_types.py:
--------------------------------------------------------------------------------
1 | from io import IOBase
2 | from typing import Union
3 | 
4 | StrOrFile = Union[str, IOBase]
5 | 


--------------------------------------------------------------------------------
/pins/cache.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import logging
  4 | import os
  5 | import shutil
  6 | import time
  7 | import urllib.parse
  8 | from collections.abc import Iterator
  9 | from pathlib import Path
 10 | 
 11 | import humanize
 12 | from fsspec import register_implementation
 13 | from fsspec.implementations.cached import SimpleCacheFileSystem
 14 | 
 15 | from .config import get_cache_dir
 16 | from .utils import hash_name, inform
 17 | 
 18 | _log = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | # used if needed to preserve board path structure in the cache
 22 | PLACEHOLDER_VERSION = "v"
 23 | PLACEHOLDER_FILE = "file"
 24 | 
 25 | 
 26 | def touch_access_time(path, access_time: float | None = None, strict=True):
 27 |     """Update access time of file.
 28 | 
 29 |     Returns the new access time.
 30 |     """
 31 | 
 32 |     if access_time is None:
 33 |         access_time = time.time()
 34 | 
 35 |     p = Path(path)
 36 | 
 37 |     if not p.exists() and not strict:
 38 |         p.touch()
 39 | 
 40 |     stat = p.stat()
 41 |     os.utime(path, (access_time, stat.st_mtime))
 42 | 
 43 |     return access_time
 44 | 
 45 | 
 46 | def protocol_to_string(protocol):
 47 |     if isinstance(protocol, str):
 48 |         return protocol
 49 | 
 50 |     return protocol[0]
 51 | 
 52 | 
 53 | def prefix_cache(fs, board_base_path):
 54 |     if isinstance(fs, str):
 55 |         proto_name = fs
 56 |     else:
 57 |         proto_name = protocol_to_string(fs.protocol)
 58 |     base_hash = hash_name(board_base_path, False)
 59 | 
 60 |     return f"{proto_name}_{base_hash}"
 61 | 
 62 | 
 63 | class HashMapper:
 64 |     def __init__(self, hash_prefix):
 65 |         self.hash_prefix = hash_prefix
 66 | 
 67 |     def __call__(self, path: str) -> str:
 68 |         if self.hash_prefix is not None:
 69 |             # optionally make the name relative to a parent path
 70 |             # using the hash of parent path as a prefix, to flatten a bit
 71 |             _hash = Path(path).relative_to(Path(self.hash_prefix))
 72 |             return _hash
 73 | 
 74 |         else:
 75 |             raise NotImplementedError()
 76 | 
 77 | 
 78 | class PinsAccessTimeCacheMapper:
 79 |     def __init__(self, hash_prefix):
 80 |         self.hash_prefix = hash_prefix
 81 | 
 82 |     def __call__(self, path):
 83 |         # hash full path, and put anything after the final / at the end, just
 84 |         # to make it easier to browse.
 85 |         # this has
 86 |         base_name = hash_name(path, False)
 87 |         suffix = Path(path).name
 88 |         return f"{base_name}_{suffix}"
 89 | 
 90 | 
 91 | class PinsRscCacheMapper:
 92 |     """Modifies the PinsCache to allow hash_prefix to be an RSC server url.
 93 | 
 94 |     Note that this class also modifies the first / in a path to be a -, so that
 95 |     pin contents will not be put into subdirectories, for e.g. michael/mtcars/data.txt.
 96 |     """
 97 | 
 98 |     def __init__(self, hash_prefix):
 99 |         self.hash_prefix = hash_prefix
100 | 
101 |     def __call__(self, path):
102 |         # the main change in this function is that, for same_name, it returns
103 |         # the full path
104 |         # change pin path of form <user>/<content> to <user>+<content>
105 |         _hash = path.replace("/", "+", 1)
106 |         return _hash
107 | 
108 | 
109 | class PinsCache(SimpleCacheFileSystem):
110 |     protocol = "pinscache"
111 | 
112 |     def __init__(self, *args, hash_prefix=None, mapper=HashMapper, **kwargs):
113 |         super().__init__(*args, **kwargs)
114 |         self.hash_prefix = hash_prefix
115 |         self._mapper = mapper(hash_prefix)
116 | 
117 |     def hash_name(self, path, *args, **kwargs):
118 |         return self._mapper(path)
119 | 
120 |     def _open(self, path, *args, **kwargs):
121 |         # For some reason, the open method of SimpleCacheFileSystem doesn't
122 |         # call _make_local_details, so we need to patch in here.
123 |         # Note that methods like .cat() do call it. Other Caches don't have this issue.
124 |         path = self._strip_protocol(path)
125 |         self._make_local_details(path)
126 | 
127 |         return super()._open(path, *args, **kwargs)
128 | 
129 |     def _make_local_details(self, path):
130 |         # modifies method to create any parent directories needed by the cached file
131 |         # note that this is called in ._open(), at the point it's known the file
132 |         # will be cached
133 |         fn = super()._make_local_details(path)
134 |         _log.info(f"cache file: {fn}")
135 |         Path(fn).parent.mkdir(parents=True, exist_ok=True)
136 | 
137 |         return fn
138 | 
139 |     # same as upstream, brought in to preserve backwards compatibility
140 |     def _check_file(self, path):
141 |         self._check_cache()
142 |         sha = self._mapper(path)
143 |         for storage in self.storage:
144 |             fn = os.path.join(storage, sha)
145 |             if os.path.exists(fn):
146 |                 return fn
147 | 
148 | 
149 | class PinsUrlCache(PinsCache):
150 |     protocol = "pinsurlcache"
151 | 
152 |     def hash_name(self, path, same_name):
153 |         # strip final arg from path
154 |         # note that R pins uses fs::path_file, and I'm not sure exactly how it
155 |         # behaves for the many forms url paths can take.
156 |         # e.g. path_file(.../extdata/) -> extdata
157 |         # e.g. path_file(.../extdata?123) -> extdata?123
158 |         path_parts = urllib.parse.urlparse(path)[2]
159 | 
160 |         # strip off final whitespace and / (if it exists)
161 |         # TODO(compat): python pins currently not keeping query part of url
162 |         final_part = path_parts.rstrip().rstrip("/").rsplit("/", 1)[-1]
163 | 
164 |         # TODO: what happens in R pins if no final part?
165 |         if final_part == "":
166 |             final_part = PLACEHOLDER_FILE
167 | 
168 |         # hash url
169 |         prefix = hash_name(path, False)
170 | 
171 |         # note that we include an extra version folder, so it conforms with
172 |         # pin board path form: <board_path>/<pin_name>/<version_name>/<file>
173 |         proto_name = protocol_to_string(self.fs.protocol)
174 |         full_prefix = "_".join([proto_name, prefix])
175 |         return str(Path(full_prefix) / PLACEHOLDER_VERSION / final_part)
176 | 
177 | 
178 | class PinsAccessTimeCache(SimpleCacheFileSystem):
179 |     name = "pinsaccesstimecache"
180 | 
181 |     def __init__(
182 |         self, *args, hash_prefix=None, mapper=PinsAccessTimeCacheMapper, **kwargs
183 |     ):
184 |         super().__init__(*args, **kwargs)
185 |         self.hash_prefix = hash_prefix
186 |         self._mapper = mapper(hash_prefix)
187 | 
188 |     def hash_name(self, path, *args, **kwargs):
189 |         return self._mapper(path)
190 | 
191 |     def _open(self, path, mode="rb", **kwargs):
192 |         f = super()._open(path, mode=mode, **kwargs)
193 |         fn = self._check_file(path)
194 | 
195 |         if fn is None:
196 |             raise ValueError(f"Cached file should exist for path, but none found: {path}")
197 | 
198 |         touch_access_time(fn)
199 | 
200 |         return f
201 | 
202 |     # same as upstream, brought in to preserve backwards compatibility
203 |     def _check_file(self, path):
204 |         self._check_cache()
205 |         sha = self._mapper(path)
206 |         for storage in self.storage:
207 |             fn = os.path.join(storage, sha)
208 |             if os.path.exists(fn):
209 |                 return fn
210 | 
211 | 
212 | class CachePruner:
213 |     """Prunes the cache directory, across multiple boards.
214 | 
215 |     Note
216 |     ----
217 | 
218 |     `pins` assumes that all boards cache using these rules:
219 | 
220 |     * path structure: `<cache_root>/<board_hash>/<pin>/<version>`.
221 |     * each version has a data.txt file in it.
222 |     """
223 | 
224 |     meta_path = "data.txt"
225 | 
226 |     def __init__(self, cache_dir: str | Path):
227 |         self.cache_dir = Path(cache_dir)
228 | 
229 |     def versions(self) -> Iterator[Path]:
230 |         for p_version in self.cache_dir.glob("*/*"):
231 |             if p_version.is_dir() and (p_version / self.meta_path).exists():
232 |                 yield p_version
233 | 
234 |     def should_prune_version(self, days, path: str | Path):
235 |         path = Path(path)
236 | 
237 |         expiry_time_sec = days * 60 * 60 * 24
238 |         prune_before = time.time() - expiry_time_sec
239 | 
240 |         p_meta = path / self.meta_path
241 | 
242 |         if not p_meta.exists():
243 |             raise FileNotFoundError(f"No metadata file: {p_meta.absolute()}")
244 | 
245 |         access_time = p_meta.stat().st_atime
246 |         return access_time < prune_before
247 | 
248 |     def old_versions(self, days):
249 |         return [p for p in self.versions() if self.should_prune_version(days, p)]
250 | 
251 |     def prune(self, days=30):
252 |         to_prune = self.old_versions(days)
253 |         size = sum(map(disk_usage, to_prune))
254 | 
255 |         # TODO: clean this up, general approach to prompting
256 |         confirmed = prompt_cache_prune(to_prune, size)
257 |         if confirmed:
258 |             for path in to_prune:
259 |                 delete_version(to_prune)
260 | 
261 |         _log.info("Skipping cache deletion")
262 | 
263 | 
264 | def delete_version(path: str | Path):
265 |     path = Path(path)
266 |     shutil.rmtree(str(path.absolute()))
267 | 
268 | 
269 | def disk_usage(path):
270 |     return sum(p.stat().st_size for p in path.glob("**/*") if p.is_file() or p.is_dir())
271 | 
272 | 
273 | def prompt_cache_prune(to_prune, size) -> bool:
274 |     _log.info(f"Pruning items: {to_prune}")
275 |     human_size = humanize.naturalsize(size, binary=True)
276 |     resp = input(
277 |         f"Delete {len(to_prune)} pin versions, freeing {human_size}?"
278 |         "\n1: Yes"
279 |         "\n2: No"
280 |         "\n\nSelection: "
281 |     )
282 |     return resp == "1"
283 | 
284 | 
285 | def cache_info():
286 |     cache_root = get_cache_dir()
287 | 
288 |     cache_boards = list(Path(cache_root).glob("*"))
289 | 
290 |     print(f"Cache info: {cache_root}")
291 |     for p_board in cache_boards:
292 |         du = disk_usage(p_board)
293 |         human_size = humanize.naturalsize(du, binary=True)
294 |         rel_path = p_board.relative_to(cache_root)
295 |         print(f"* {rel_path}: {human_size}")
296 | 
297 | 
298 | def cache_prune(days=30, cache_root=None, prompt=True):
299 |     if cache_root is None:
300 |         cache_root = get_cache_dir()
301 | 
302 |     final_delete = []
303 |     for p_board in Path(cache_root).glob("*"):
304 |         pruner = CachePruner(p_board)
305 |         final_delete.extend(pruner.old_versions(days))
306 | 
307 |     size = sum(map(disk_usage, final_delete))
308 | 
309 |     if not final_delete:
310 |         inform(_log, "No stale pins found")
311 | 
312 |     if prompt:
313 |         confirmed = prompt_cache_prune(final_delete, size)
314 |     else:
315 |         confirmed = True
316 | 
317 |     if confirmed:
318 |         inform(_log, "Deleting pins from cache.")
319 |         for p in final_delete:
320 |             delete_version(p)
321 |     else:
322 |         inform(_log, "Skipping deletion of pins from cache.")
323 | 
324 | 
325 | # TODO: swap to use entrypoint
326 | register_implementation("pinscache", PinsCache)
327 | 


--------------------------------------------------------------------------------
/pins/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from types import SimpleNamespace
 3 | 
 4 | import appdirs
 5 | 
 6 | PINS_NAME = "pins-py"
 7 | PINS_ENV_DATA_DIR = "PINS_DATA_DIR"
 8 | PINS_ENV_CACHE_DIR = "PINS_CACHE_DIR"
 9 | PINS_ENV_INSECURE_READ = "PINS_ALLOW_PICKLE_READ"
10 | PINS_ENV_ALLOW_RSC_SHORT_NAME = "PINS_ALLOW_RSC_SHORT_NAME"
11 | PINS_ENV_FEATURE_PREVIEW = "PINS_FEATURE_PREVIEW"
12 | 
13 | pins_options = SimpleNamespace(quiet=False)
14 | 
15 | 
16 | def _interpret_int(env_var_name):
17 |     env_var = os.environ.get(env_var_name, "0")
18 |     try:
19 |         env_int = int(env_var)
20 |     except ValueError:
21 |         raise ValueError(
22 |             f"{env_var_name} must be '0' or '1', but was set to " f"{repr(env_var)}."
23 |         )
24 | 
25 |     flag = bool(env_int)
26 |     return flag
27 | 
28 | 
29 | def get_data_dir():
30 |     return os.environ.get(PINS_ENV_DATA_DIR, appdirs.user_data_dir(PINS_NAME))
31 | 
32 | 
33 | def get_cache_dir():
34 |     return os.environ.get(PINS_ENV_CACHE_DIR, appdirs.user_cache_dir(PINS_NAME))
35 | 
36 | 
37 | def get_allow_pickle_read(flag):
38 |     if flag is None:
39 |         return _interpret_int(PINS_ENV_INSECURE_READ)
40 | 
41 |     return flag
42 | 
43 | 
44 | def get_allow_rsc_short_name():
45 |     return _interpret_int(PINS_ENV_ALLOW_RSC_SHORT_NAME)
46 | 
47 | 
48 | def get_feature_preview():
49 |     return _interpret_int(PINS_ENV_FEATURE_PREVIEW)
50 | 


--------------------------------------------------------------------------------
/pins/data/__init__.py:
--------------------------------------------------------------------------------
 1 | from importlib_resources import files as _files
 2 | 
 3 | sources = {
 4 |     "mtcars": _files("pins") / "data/mtcars.csv",
 5 | }
 6 | 
 7 | 
 8 | def __dir__():
 9 |     return list(sources)
10 | 
11 | 
12 | def __getattr__(k):
13 |     import pandas as pd
14 | 
15 |     f_path = sources.get("mtcars")
16 | 
17 |     return pd.read_csv(f_path)
18 | 


--------------------------------------------------------------------------------
/pins/data/mtcars.csv:
--------------------------------------------------------------------------------
 1 | mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
 2 | 21,6,160,110,3.9,2.62,16.46,0,1,4,4
 3 | 21,6,160,110,3.9,2.875,17.02,0,1,4,4
 4 | 22.8,4,108,93,3.85,2.32,18.61,1,1,4,1
 5 | 21.4,6,258,110,3.08,3.215,19.44,1,0,3,1
 6 | 18.7,8,360,175,3.15,3.44,17.02,0,0,3,2
 7 | 18.1,6,225,105,2.76,3.46,20.22,1,0,3,1
 8 | 14.3,8,360,245,3.21,3.57,15.84,0,0,3,4
 9 | 24.4,4,146.7,62,3.69,3.19,20,1,0,4,2
10 | 22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
11 | 19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4
12 | 17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4
13 | 16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3
14 | 17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3
15 | 15.2,8,275.8,180,3.07,3.78,18,0,0,3,3
16 | 10.4,8,472,205,2.93,5.25,17.98,0,0,3,4
17 | 10.4,8,460,215,3,5.424,17.82,0,0,3,4
18 | 14.7,8,440,230,3.23,5.345,17.42,0,0,3,4
19 | 32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1
20 | 30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2
21 | 33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1
22 | 21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1
23 | 15.5,8,318,150,2.76,3.52,16.87,0,0,3,2
24 | 15.2,8,304,150,3.15,3.435,17.3,0,0,3,2
25 | 13.3,8,350,245,3.73,3.84,15.41,0,0,3,4
26 | 19.2,8,400,175,3.08,3.845,17.05,0,0,3,2
27 | 27.3,4,79,66,4.08,1.935,18.9,1,1,4,1
28 | 26,4,120.3,91,4.43,2.14,16.7,0,1,5,2
29 | 30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2
30 | 15.8,8,351,264,4.22,3.17,14.5,0,1,5,4
31 | 19.7,6,145,175,3.62,2.77,15.5,0,1,5,6
32 | 15,8,301,335,3.54,3.57,14.6,0,1,5,8
33 | 21.4,4,121,109,4.11,2.78,18.6,1,1,4,2
34 | 


--------------------------------------------------------------------------------
/pins/databricks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/databricks/__init__.py


--------------------------------------------------------------------------------
/pins/databricks/fs.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | from io import BytesIO
  3 | from pathlib import Path, PurePath
  4 | 
  5 | from fsspec import AbstractFileSystem
  6 | 
  7 | from pins.errors import PinsError
  8 | 
  9 | 
 10 | class DatabricksFs(AbstractFileSystem):
 11 |     protocol = "dbc"
 12 | 
 13 |     def ls(self, path, detail=False, **kwargs):
 14 |         return self._databricks_ls(path, detail)
 15 | 
 16 |     def exists(self, path: str, **kwargs):
 17 |         return self._databricks_exists(path)
 18 | 
 19 |     def open(self, path: str, mode: str = "rb", *args, **kwargs):
 20 |         if mode != "rb":
 21 |             raise NotImplementedError
 22 |         return self._databricks_open(path)
 23 | 
 24 |     def get(self, rpath, lpath, recursive=False, **kwargs):
 25 |         self._databricks_get(rpath, lpath, recursive, **kwargs)
 26 | 
 27 |     def mkdir(self, path, create_parents=True, **kwargs):
 28 |         if not create_parents:
 29 |             raise NotImplementedError
 30 |         self._databricks_mkdir(path)
 31 | 
 32 |     def put(
 33 |         self,
 34 |         lpath,
 35 |         rpath,
 36 |         recursive=True,
 37 |         maxdepth=None,
 38 |         **kwargs,
 39 |     ):
 40 |         if not recursive:
 41 |             raise NotImplementedError
 42 |         if maxdepth is not None:
 43 |             raise NotImplementedError
 44 |         self._databricks_put(lpath, rpath)
 45 | 
 46 |     def rm(self, path, recursive=True, maxdepth=None) -> None:
 47 |         if not recursive:
 48 |             raise NotImplementedError
 49 |         if maxdepth is not None:
 50 |             raise NotImplementedError
 51 |         if self._databricks_exists(path):
 52 |             self._databricks_rm_dir(path)
 53 | 
 54 |     @staticmethod
 55 |     def _databricks_put(lpath, rpath):
 56 |         from databricks.sdk import WorkspaceClient
 57 | 
 58 |         w = WorkspaceClient()
 59 |         path = Path(lpath).absolute()
 60 |         orig_path = path
 61 | 
 62 |         def _upload_files(path):
 63 |             contents = Path(path)
 64 |             for item in contents.iterdir():
 65 |                 abs_path = PurePath(path).joinpath(item)
 66 |                 is_file = Path(abs_path).is_file()
 67 |                 if is_file:
 68 |                     rel_path = abs_path.relative_to(orig_path)
 69 |                     db_path = PurePath(rpath).joinpath(rel_path)
 70 |                     file = open(abs_path, "rb")
 71 |                     w.files.upload(str(db_path), BytesIO(file.read()), overwrite=True)
 72 |                 else:
 73 |                     _upload_files(abs_path)
 74 | 
 75 |         _upload_files(path)
 76 | 
 77 |     def _databricks_get(self, board, rpath, lpath, recursive=False, **kwargs):
 78 |         from databricks.sdk import WorkspaceClient
 79 | 
 80 |         w = WorkspaceClient()
 81 |         file_type = self._databricks_is_type(rpath)
 82 |         if file_type == "file":
 83 |             board.fs.get(rpath, lpath, **kwargs)
 84 |             return
 85 | 
 86 |         def _get_files(path, recursive, **kwargs):
 87 |             raw_contents = w.files.list_directory_contents(path)
 88 |             contents = list(raw_contents)
 89 |             details = list(map(self._databricks_content_details, contents))
 90 |             for item in details:
 91 |                 item_path = item.get("path")
 92 |                 if item.get("is_directory"):
 93 |                     if recursive:
 94 |                         _get_files(item_path, recursive=recursive, **kwargs)
 95 |                 else:
 96 |                     rel_path = PurePath(item_path).relative_to(rpath)
 97 |                     target_path = PurePath(lpath).joinpath(rel_path)
 98 |                     board.fs.get(item_path, str(target_path))
 99 | 
100 |         _get_files(rpath, recursive, **kwargs)
101 | 
102 |     def _databricks_open(self, path):
103 |         from databricks.sdk import WorkspaceClient
104 | 
105 |         if not self._databricks_exists(path):
106 |             raise PinsError(f"File or directory does not exist at path: {path}")
107 |         w = WorkspaceClient()
108 |         resp = w.files.download(path)
109 |         f = BytesIO()
110 |         shutil.copyfileobj(resp.contents, f)
111 |         f.seek(0)
112 |         return f
113 | 
114 |     def _databricks_exists(self, path: str):
115 |         if self._databricks_is_type(path) == "nothing":
116 |             return False
117 |         else:
118 |             return True
119 | 
120 |     @staticmethod
121 |     def _databricks_is_type(path: str):
122 |         from databricks.sdk import WorkspaceClient
123 |         from databricks.sdk.errors import NotFound
124 | 
125 |         w = WorkspaceClient()
126 |         try:
127 |             w.files.get_metadata(path)
128 |         except NotFound:
129 |             try:
130 |                 w.files.get_directory_metadata(path)
131 |             except NotFound:
132 |                 return "nothing"
133 |             else:
134 |                 return "directory"
135 |         else:
136 |             return "file"
137 | 
138 |     def _databricks_ls(self, path, detail):
139 |         from databricks.sdk import WorkspaceClient
140 | 
141 |         if not self._databricks_exists(path):
142 |             raise PinsError(f"File or directory does not exist at path: {path}")
143 |         w = WorkspaceClient()
144 |         if self._databricks_is_type(path) == "file":
145 |             if detail:
146 |                 return [dict(name=path, size=None, type="file")]
147 |             else:
148 |                 return path
149 | 
150 |         contents_raw = w.files.list_directory_contents(path)
151 |         contents = list(contents_raw)
152 |         items = []
153 |         for item in contents:
154 |             item = self._databricks_content_details(item)
155 |             item_path = item.get("path")
156 |             item_path = item_path.rstrip("/")
157 |             if detail:
158 |                 if item.get("is_directory"):
159 |                     item_type = "directory"
160 |                 else:
161 |                     item_type = "file"
162 |                 items.append(dict(name=item_path, size=None, type=item_type))
163 |             else:
164 |                 items.append(item_path)
165 |         return items
166 | 
167 |     def _databricks_rm_dir(self, path):
168 |         from databricks.sdk import WorkspaceClient
169 | 
170 |         w = WorkspaceClient()
171 |         raw_contents = w.files.list_directory_contents(path)
172 |         contents = list(raw_contents)
173 |         details = list(map(self._databricks_content_details, contents))
174 |         for item in details:
175 |             item_path = item.get("path")
176 |             if item.get("is_directory"):
177 |                 self._databricks_rm_dir(item_path)
178 |             else:
179 |                 w.files.delete(item_path)
180 |         w.files.delete_directory(path)
181 | 
182 |     @staticmethod
183 |     def _databricks_mkdir(path):
184 |         from databricks.sdk import WorkspaceClient
185 | 
186 |         w = WorkspaceClient()
187 |         w.files.create_directory(path)
188 | 
189 |     @staticmethod
190 |     def _databricks_content_details(item):
191 |         details = {
192 |             "path": item.path,
193 |             "name": item.name,
194 |             "is_directory": item.is_directory,
195 |         }
196 |         return details
197 | 


--------------------------------------------------------------------------------
/pins/drivers.py:
--------------------------------------------------------------------------------
  1 | from collections.abc import Sequence
  2 | from pathlib import Path
  3 | from typing import Any
  4 | 
  5 | from pins._adaptors import Adaptor, create_adaptor
  6 | 
  7 | from .config import PINS_ENV_INSECURE_READ, get_allow_pickle_read
  8 | from .errors import PinsInsecureReadError
  9 | from .meta import Meta
 10 | 
 11 | # TODO: move IFileSystem out of boards, to fix circular import
 12 | # from .boards import IFileSystem
 13 | 
 14 | 
 15 | UNSAFE_TYPES = frozenset(["joblib"])
 16 | REQUIRES_SINGLE_FILE = frozenset(["csv", "joblib"])
 17 | 
 18 | 
 19 | def load_path(filename: str, path_to_version, pin_type=None):
 20 |     # file path creation ------------------------------------------------------
 21 |     if pin_type == "table":
 22 |         # this type contains an rds and csv files named data.{ext}, so we match
 23 |         # R pins behavior and hardcode the name
 24 |         filename = "data.csv"
 25 | 
 26 |     if path_to_version is not None:
 27 |         if isinstance(path_to_version, str):
 28 |             path_to_version = path_to_version.rstrip("/")
 29 |         path_to_file = f"{path_to_version}/{filename}"
 30 |     else:
 31 |         # BoardUrl doesn't have versions, and the file is the full url
 32 |         path_to_file = filename
 33 | 
 34 |     return path_to_file
 35 | 
 36 | 
 37 | def load_file(filename: str, fs, path_to_version, pin_type):
 38 |     return fs.open(load_path(filename, path_to_version, pin_type))
 39 | 
 40 | 
 41 | def load_data(
 42 |     meta: Meta,
 43 |     fs,
 44 |     path_to_version: "str | None" = None,
 45 |     allow_pickle_read: "bool | None" = None,
 46 | ):
 47 |     """Return loaded data, based on meta type.
 48 |     Parameters
 49 |     ----------
 50 |     meta: Meta
 51 |         Information about the stored data (e.g. its type).
 52 |     fs: IFileSystem
 53 |         An abstract filesystem with a method to .open() files.
 54 |     path_to_version:
 55 |         A filepath used as the parent directory the data to-be-loaded lives in.
 56 |     """
 57 | 
 58 |     # TODO: extandable loading with deferred importing
 59 |     if meta.type in UNSAFE_TYPES and not get_allow_pickle_read(allow_pickle_read):
 60 |         raise PinsInsecureReadError(
 61 |             f"Reading pin type {meta.type} involves reading a pickle file, so is NOT secure."
 62 |             f"Set the allow_pickle_read=True when creating the board, or the "
 63 |             f"{PINS_ENV_INSECURE_READ}=1 environment variable.\n"
 64 |             "See:\n"
 65 |             "  * https://docs.python.org/3/library/pickle.html \n"
 66 |             "  * https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations"
 67 |         )
 68 | 
 69 |     with load_file(meta.file, fs, path_to_version, meta.type) as f:
 70 |         if meta.type == "csv":
 71 |             import pandas as pd
 72 | 
 73 |             return pd.read_csv(f)
 74 | 
 75 |         elif meta.type == "arrow":
 76 |             import pandas as pd
 77 | 
 78 |             return pd.read_feather(f)
 79 | 
 80 |         elif meta.type == "feather":
 81 |             import pandas as pd
 82 | 
 83 |             return pd.read_feather(f)
 84 | 
 85 |         elif meta.type == "parquet":
 86 |             import pandas as pd
 87 | 
 88 |             return pd.read_parquet(f)
 89 | 
 90 |         elif meta.type == "table":
 91 |             import pandas as pd
 92 | 
 93 |             return pd.read_csv(f)
 94 | 
 95 |         elif meta.type == "joblib":
 96 |             import joblib
 97 | 
 98 |             return joblib.load(f)
 99 | 
100 |         elif meta.type == "json":
101 |             import json
102 | 
103 |             return json.load(f)
104 | 
105 |         elif meta.type == "file":
106 |             raise NotImplementedError(
107 |                 "Methods like `.pin_read()` are not able to read 'file' type pins."
108 |                 " Use `.pin_download()` to download the file."
109 |             )
110 | 
111 |         elif meta.type == "rds":
112 |             try:
113 |                 import rdata  # pyright: ignore[reportMissingImports]
114 | 
115 |                 return rdata.read_rds(f)
116 |             except ModuleNotFoundError:
117 |                 raise ModuleNotFoundError(
118 |                     "Install the 'rdata' package to attempt to convert 'rds' files into Python objects."
119 |                 )
120 | 
121 |     raise NotImplementedError(f"No driver for type {meta.type}")
122 | 
123 | 
124 | def save_data(
125 |     obj: "Adaptor | Any", fname, pin_type=None, apply_suffix: bool = True
126 | ) -> "str | Sequence[str]":
127 |     # TODO: extensible saving with deferred importing
128 |     # TODO: how to encode arguments to saving / loading drivers?
129 |     #       e.g. pandas index options
130 |     # TODO: would be useful to have singledispatch func for a "default saver"
131 |     #       as argument to board, and then type dispatchers for explicit cases
132 |     #       of saving / loading objects different ways.
133 | 
134 |     if isinstance(obj, Adaptor):
135 |         adaptor, obj = obj, obj._d
136 |     else:
137 |         adaptor = create_adaptor(obj)
138 | 
139 |     if apply_suffix:
140 |         if pin_type == "file":
141 |             suffix = "".join(Path(obj).suffixes)
142 |         else:
143 |             suffix = f".{pin_type}"
144 |     else:
145 |         suffix = ""
146 | 
147 |     if isinstance(fname, list):
148 |         final_name = fname
149 |     else:
150 |         final_name = f"{fname}{suffix}"
151 | 
152 |     if pin_type == "csv":
153 |         adaptor.write_csv(final_name)
154 |     elif pin_type == "arrow":
155 |         # NOTE: R pins accepts the type arrow, and saves it as feather.
156 |         #       we allow reading this type, but raise an error for writing.
157 |         adaptor.write_feather(final_name)
158 |     elif pin_type == "feather":
159 |         msg = (
160 |             'Saving data as type "feather" no longer supported. Use type "arrow" instead.'
161 |         )
162 |         raise NotImplementedError(msg)
163 |     elif pin_type == "parquet":
164 |         adaptor.write_parquet(final_name)
165 |     elif pin_type == "joblib":
166 |         adaptor.write_joblib(final_name)
167 |     elif pin_type == "json":
168 |         adaptor.write_json(final_name)
169 |     elif pin_type == "file":
170 |         import contextlib
171 |         import shutil
172 | 
173 |         if isinstance(obj, list):
174 |             for file, final in zip(obj, final_name):
175 |                 with contextlib.suppress(shutil.SameFileError):
176 |                     shutil.copyfile(str(file), final)
177 |             return obj
178 |         # ignore the case where the source is the same as the target
179 |         else:
180 |             with contextlib.suppress(shutil.SameFileError):
181 |                 shutil.copyfile(str(obj), final_name)
182 | 
183 |     else:
184 |         raise NotImplementedError(f"Cannot save type: {pin_type}")
185 | 
186 |     return final_name
187 | 
188 | 
189 | def default_title(obj: Any, name: str) -> str:
190 |     # Kept for backward compatibility only.
191 |     return create_adaptor(obj).default_title(name)
192 | 


--------------------------------------------------------------------------------
/pins/errors.py:
--------------------------------------------------------------------------------
 1 | class PinsError(Exception):
 2 |     pass
 3 | 
 4 | 
 5 | class PinsVersionError(PinsError):
 6 |     pass
 7 | 
 8 | 
 9 | class PinsInsecureReadError(PinsError):
10 |     pass
11 | 


--------------------------------------------------------------------------------
/pins/meta.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from collections.abc import Mapping, Sequence
  4 | from dataclasses import InitVar, asdict, dataclass, field, fields
  5 | from pathlib import Path
  6 | from typing import Any, ClassVar
  7 | 
  8 | import yaml
  9 | 
 10 | from ._types import IOBase, StrOrFile
 11 | from .versions import Version, VersionRaw, guess_version
 12 | 
 13 | META_FILENAME = "data.txt"
 14 | DEFAULT_API_VERSION = 1
 15 | 
 16 | 
 17 | @dataclass
 18 | class MetaRaw:
 19 |     """Absolute minimum metadata for a pin.
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     file:
 24 |         All relevant files contained in the pin. Note that these be absolute paths
 25 |         to fetch from the target filesystem.
 26 |     type:
 27 |         The type of pin data stored. This is used to determine how to read / write it.
 28 |     """
 29 | 
 30 |     file: str | Sequence[str] | None
 31 |     type: str
 32 |     name: str
 33 | 
 34 | 
 35 | @dataclass
 36 | class Meta:
 37 |     """Represent metadata for a pin version.
 38 | 
 39 |     Parameters
 40 |     ----------
 41 |     title:
 42 |         A title for the pin.
 43 |     description:
 44 |         A detailed description of the pin contents.
 45 |     tags:
 46 |         Optional tags applied to the pin.
 47 |     created:
 48 |         Datetime the pin was created (TODO: document format).
 49 |     pin_hash:
 50 |         A hash of the pin.
 51 |     file:
 52 |         All relevant files in the pin. Should be relative to this pin's folder.
 53 |     file_size:
 54 |         The total size of the files in the pin.
 55 |     type:
 56 |         The type of pin data stored. This is used to determine how to read / write it.
 57 |     api_version:
 58 |         The internal version of the metadata format.
 59 |     name:
 60 |         TODO - where is this in R pins?
 61 |     user:
 62 |         A dictionary of additional metadata that may be specified by the user.
 63 |     local:
 64 |         A dictionary of additional metadata that may be added by the board, depending
 65 |         on the backend used. E.g. Posit Connect content id, url, etc..
 66 | 
 67 |     """
 68 | 
 69 |     _excluded: ClassVar[set[str]] = {"name", "version", "local"}
 70 | 
 71 |     title: str | None
 72 |     description: str | None
 73 | 
 74 |     # TODO(defer): different from R pins, which has a local field
 75 |     created: str
 76 |     pin_hash: str
 77 | 
 78 |     file: str | Sequence[str]
 79 |     file_size: int
 80 |     type: str
 81 | 
 82 |     api_version: int
 83 | 
 84 |     # In the metadata yaml, the created field uses a custom format, so
 85 |     # we need a version object in order to render it. You can think of
 86 |     # the version here as "the thing that was used to create version_name,
 87 |     # pin_hash, created, etc.."
 88 |     version: VersionRaw
 89 | 
 90 |     tags: list[str] | None = None
 91 |     name: str | None = None
 92 |     user: Mapping = field(default_factory=dict)
 93 |     local: Mapping = field(default_factory=dict)
 94 | 
 95 |     unknown_fields: InitVar[dict | None] = None
 96 | 
 97 |     def __post_init__(self, unknown_fields: dict | None):
 98 |         unknown_fields = {} if unknown_fields is None else unknown_fields
 99 | 
100 |         self._unknown_fields = unknown_fields
101 | 
102 |     def __getattr__(self, k):
103 |         try:
104 |             return self._unknown_fields[k]
105 |         except KeyError:
106 |             raise AttributeError(f"No metadata field not found: {k}")
107 | 
108 |     def to_dict(self) -> dict[str, Any]:
109 |         data = asdict(self)
110 | 
111 |         return data
112 | 
113 |     def to_pin_dict(self):
114 |         d = self.to_dict()
115 | 
116 |         for k in self._excluded:
117 |             del d[k]
118 | 
119 |         # TODO: once tag writing is implemented, delete this line
120 |         del d["tags"]
121 | 
122 |         return d
123 | 
124 |     @classmethod
125 |     def from_pin_dict(cls, data, pin_name, version, local=None) -> Meta:
126 |         # TODO: re-arrange Meta argument positions to reflect what's been
127 |         # learned about default arguments. e.g. title was not used at some
128 |         # point in api_version 1
129 |         all_field_names = {entry.name for entry in fields(Meta)}
130 | 
131 |         keep_fields = all_field_names - cls._excluded
132 | 
133 |         extra = {"title": None} if "title" not in data else {}
134 |         local = {} if local is None else local
135 | 
136 |         meta_data = {k: v for k, v in data.items() if k in keep_fields}
137 |         unknown = {k: v for k, v in data.items() if k not in keep_fields}
138 | 
139 |         return cls(
140 |             **meta_data,
141 |             **extra,
142 |             name=pin_name,
143 |             version=version,
144 |             local=local,
145 |             unknown_fields=unknown,
146 |         )
147 | 
148 |     def to_pin_yaml(self, f: IOBase | None = None) -> str | None:
149 |         data = self.to_pin_dict()
150 | 
151 |         return yaml.dump(data, f)
152 | 
153 | 
154 | @dataclass
155 | class MetaV0:
156 |     file: str | Sequence[str]
157 |     type: str
158 |     description: str | None
159 | 
160 |     name: str
161 | 
162 |     version: VersionRaw
163 | 
164 |     # holds raw data.txt contents
165 |     original_fields: dict = field(default_factory=dict)
166 |     user: dict = field(default_factory=dict, init=False)
167 |     local: Mapping = field(default_factory=dict)
168 | 
169 |     title: ClassVar[None] = None
170 |     created: ClassVar[None] = None
171 |     pin_hash: ClassVar[None] = None
172 |     file_size: ClassVar[None] = None
173 |     api_version: ClassVar[None] = None
174 | 
175 |     def to_dict(self):
176 |         return asdict(self)
177 | 
178 |     @classmethod
179 |     def from_pin_dict(cls, data, pin_name, version, local=None) -> MetaV0:
180 |         # could infer from dataclasses.fields(), but seems excessive.
181 |         req_fields = {"type", "description"}
182 | 
183 |         # Note that we need to .get(), since fields may not be in metadata
184 |         req_inputs = {k: data.get(k) for k in req_fields}
185 |         req_inputs["file"] = data["path"]
186 | 
187 |         local = {} if local is None else local
188 |         return cls(
189 |             **req_inputs,
190 |             name=pin_name,
191 |             original_fields=data,
192 |             version=version,
193 |             local=local,
194 |         )
195 | 
196 |     def to_pin_dict(self):
197 |         raise NotImplementedError("v0 pins metadata are read only.")
198 | 
199 |     def to_pin_yaml(self, *args, **kwargs):
200 |         self.to_pin_dict()
201 | 
202 | 
203 | class MetaFactory:
204 |     """Responsible for creating and loading (e.g. from yaml) of meta objects."""
205 | 
206 |     def get_meta_name(self, *args, **kwargs) -> str:
207 |         return META_FILENAME
208 | 
209 |     def get_version_for_meta(self, api_version) -> Version:
210 |         if api_version != 1:
211 |             raise NotImplementedError(f"Unsupported api_version: {api_version}")
212 | 
213 |         return Version
214 | 
215 |     def create(
216 |         self,
217 |         base_folder: str | Path,
218 |         files: Sequence[StrOrFile],
219 |         type,
220 |         # TODO: when files is a string name should be okay as None
221 |         name,
222 |         title,
223 |         description=None,
224 |         created=None,
225 |         user=None,
226 |     ) -> Meta:
227 |         if title is None:
228 |             raise NotImplementedError("title arguments required")
229 |         if isinstance(files, str):
230 |             from pathlib import Path
231 | 
232 |             version = Version.from_files([files], created)
233 |             p_file = Path(files)
234 |             file_size = p_file.stat().st_size
235 |             file_name = str(Path(files).relative_to(Path(base_folder)))
236 | 
237 |         elif isinstance(files, IOBase):
238 |             # TODO: in theory can calculate size from a file object, but let's
239 |             # wait until it's clear how calculating file size fits into pins
240 |             # e.g. in combination with folders, etc..
241 | 
242 |             # from os import fstat
243 |             #
244 |             # version = Version.from_files([files], created)
245 |             # files_size = fstat(files.fileno()).st_size
246 | 
247 |             raise NotImplementedError("Cannot create from file object.")
248 |         else:
249 |             if isinstance(files, (list, tuple)):
250 |                 from pathlib import Path
251 | 
252 |                 file_name = [Path(f).name for f in files]
253 |                 file_size = [Path(f).stat().st_size for f in files]
254 |                 version = Version.from_files(files, created)
255 | 
256 |         return Meta(
257 |             title=title,
258 |             description=description,
259 |             file=file_name,  # TODO: FINISH
260 |             file_size=file_size,
261 |             pin_hash=version.hash,
262 |             created=version.render_created(),
263 |             type=type,
264 |             api_version=DEFAULT_API_VERSION,
265 |             name=name,
266 |             user=user if user is not None else {},
267 |             version=version,
268 |         )
269 | 
270 |     def create_raw(self, files: Sequence[StrOrFile], type: str, name: str) -> MetaRaw:
271 |         return MetaRaw(files, type, name)
272 | 
273 |     def read_pin_yaml(
274 |         self,
275 |         f: IOBase,
276 |         pin_name: str,
277 |         version: str | VersionRaw,
278 |         local=None,
279 |     ) -> Meta:
280 |         if isinstance(version, str):
281 |             version_obj = guess_version(version)
282 |         else:
283 |             version_obj = version
284 | 
285 |         data = yaml.safe_load(f)
286 | 
287 |         api_version = data.get("api_version", 0)
288 |         if api_version >= 2:
289 |             raise NotImplementedError(
290 |                 f"api_version {api_version} by this version of the pins library"
291 |             )
292 |         elif api_version == 0:
293 |             cls_meta = MetaV0
294 |         else:
295 |             cls_meta = Meta
296 | 
297 |         return cls_meta.from_pin_dict(data, pin_name, version=version_obj, local=local)
298 | 


--------------------------------------------------------------------------------
/pins/rsconnect/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/rsconnect/__init__.py


--------------------------------------------------------------------------------
/pins/rsconnect/html/highlight.js-9.15.9/qtcreator_light.css:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Qt Creator light color scheme
 4 | 
 5 | */
 6 | 
 7 | 
 8 | .hljs {
 9 |   display: block;
10 |   overflow-x: auto;
11 |   background: #ffffff;
12 | }
13 | 
14 | .hljs,
15 | .hljs-subst,
16 | .hljs-tag,
17 | .hljs-title {
18 |   color: #000000;
19 | }
20 | 
21 | .hljs-strong,
22 | .hljs-emphasis {
23 |   color: #000000;
24 | }
25 | 
26 | .hljs-bullet,
27 | .hljs-quote,
28 | .hljs-number,
29 | .hljs-regexp,
30 | .hljs-literal {
31 |   color: #000080;
32 | }
33 | 
34 | .hljs-code
35 | .hljs-selector-class {
36 |   color: #800080;
37 | }
38 | 
39 | .hljs-emphasis,
40 | .hljs-stronge,
41 | .hljs-type {
42 |   font-style: italic;
43 | }
44 | 
45 | .hljs-keyword,
46 | .hljs-selector-tag,
47 | .hljs-function,
48 | .hljs-section,
49 | .hljs-symbol,
50 | .hljs-name {
51 |   color: #808000;
52 | }
53 | 
54 | .hljs-attribute {
55 |   color: #800000;
56 | }
57 | 
58 | .hljs-variable,
59 | .hljs-params,
60 | .hljs-class .hljs-title {
61 |   color: #0055AF;
62 | }
63 | 
64 | .hljs-string,
65 | .hljs-selector-id,
66 | .hljs-selector-attr,
67 | .hljs-selector-pseudo,
68 | .hljs-type,
69 | .hljs-built_in,
70 | .hljs-builtin-name,
71 | .hljs-template-tag,
72 | .hljs-template-variable,
73 | .hljs-addition,
74 | .hljs-link {
75 |   color: #008000;
76 | }
77 | 
78 | .hljs-comment,
79 | .hljs-meta,
80 | .hljs-deletion {
81 |   color: #008000;
82 | }
83 | 


--------------------------------------------------------------------------------
/pins/rsconnect/html/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <link rel="stylesheet" href="pagedtable-1.1/pagedtable.css">
 5 |     <script language="javascript" src="pagedtable-1.1/pagedtable.js"></script>
 6 |     <link rel="stylesheet" href="highlight.js-9.15.9/qtcreator_light.css">
 7 |     <script src="highlight.js-9.15.9/highlight.js"></script>
 8 |     <style>
 9 |       body {
10 |         font-size: 16px;
11 |         font-family: 'Lato', sans-serif;
12 |         color: #333;
13 |       }
14 |       section {
15 |         border-left: solid 6px #dddddd;
16 |         padding: 0.5em 0 0.5em 10px;
17 |         margin-bottom: 1em;
18 |       }
19 |       pre {
20 |         margin: 0;
21 |         padding: 1em;
22 |       }
23 |       h3 {
24 |         font-weight: normal;
25 |         color: #888;
26 |         margin: 0 0 0.5em 0;
27 |       }
28 |     </style>
29 |   </head>
30 |   <body>
31 | 
32 |     <section>
33 |        <h3>{{pin_name}}</h3>
34 |        {% if pin_metadata %}
35 |        <p>
36 |          {% if date %}<b>Last updated from Python:</b> {{ date }} &bull;{% endif %}
37 |          <b>Format:</b> {{ pin_metadata.type }} &bull;
38 |          <b>API:</b> v{{ pin_metadata.api_version }}
39 |        </p>
40 |        <p>{{ pin_metadata.description }}</p>
41 |        <p>Download data: {{ pin_files }}</p>
42 |        <details>
43 |          <summary>Raw metadata</summary>
44 |          <pre>{{ pin_metadata.to_pin_yaml() }}</pre>
45 |        </details>
46 |        {% endif %}
47 |      </section>
48 | 
49 |     <section>
50 |     <h3>Python Code</h3>
51 | 
52 |     <pre id="pin-python" class="pin-code"><code class="python">from pins import board_connect
53 | from dotenv import load_dotenv
54 | load_dotenv()
55 | 
56 | board = {{board_deparse}}
57 | board.pin_read("{{pin_name}}")</code></pre>
58 | 
59 |     <script type="text/javascript">
60 |       hljs.registerLanguage("python", highlight_python);
61 |       hljs.registerLanguage("r", highlight_r);
62 |       hljs.initHighlightingOnLoad();
63 |     </script>
64 |     </section>
65 | 
66 |     <section style="{{ show_r_style }}">
67 |       <h3>R Code</h3>
68 |         <pre id="pin-r" class="pin-code"><code class="r">library(pins)
69 | 
70 | board <- board_connect(auth = "envvar")
71 | pin_read(board, "{{pin_name}}")</code></pre>
72 |         </section>
73 | 
74 |     <section style="{{ data_preview_style }}">
75 |       <h3>Preview <small>(up to 100 rows)</small></h3>
76 |       <div data-pagedtable style="height: 25em;">
77 |         <script data-pagedtable-source type="application/json">
78 |           {{ data_preview }}
79 |         </script>
80 |       </div>
81 |     </section>
82 |   </body>
83 | </html>
84 | 


--------------------------------------------------------------------------------
/pins/rsconnect/html/pagedtable-1.1/pagedtable.css:
--------------------------------------------------------------------------------
  1 | .pagedtable {
  2 |   overflow: auto;
  3 |   padding-left: 8px;
  4 |   padding-right: 8px;
  5 | }
  6 | 
  7 | .pagedtable table {
  8 |   width: 100%;
  9 |   max-width: 100%;
 10 |   margin: 0;
 11 |   border-bottom: 1px solid #dddddd;
 12 |   font-weight: 100;
 13 |   line-height: 24px;
 14 | }
 15 | 
 16 | .pagedtable td, .pagedtable th {
 17 |   padding: 2px 4px 3px 4px;
 18 | }
 19 | 
 20 | .pagedtable th {
 21 |   border: none;
 22 |   border-bottom: 1px solid #dddddd;
 23 | 
 24 |   min-width: 45px;
 25 |   font-weight: normal;
 26 | }
 27 | 
 28 | .pagedtable-empty th {
 29 |   display: none;
 30 | }
 31 | 
 32 | .pagedtable td {
 33 |   white-space: nowrap;
 34 |   overflow: hidden;
 35 |   text-overflow: ellipsis;
 36 | }
 37 | 
 38 | .pagedtable .even {
 39 |   background-color: #fafafa;
 40 | }
 41 | 
 42 | .pagedtable-padding-col {
 43 |   display: none;
 44 | }
 45 | 
 46 | .pagedtable a {
 47 |   -webkit-touch-callout: none;
 48 |   -webkit-user-select: none;
 49 |   -khtml-user-select: none;
 50 |   -moz-user-select: none;
 51 |   -ms-user-select: none;
 52 |   user-select: none;
 53 | }
 54 | 
 55 | .pagedtable-index-nav {
 56 |   cursor: pointer;
 57 |   padding: 0 5px 0 5px;
 58 |   float: right;
 59 |   border: 0;
 60 | }
 61 | 
 62 | .pagedtable-index-nav-disabled {
 63 |   cursor: default;
 64 |   text-decoration: none;
 65 |   color: #999;
 66 | }
 67 | 
 68 | a.pagedtable-index-nav-disabled:hover {
 69 |   text-decoration: none;
 70 |   color: #999;
 71 | }
 72 | 
 73 | .pagedtable-indexes {
 74 |   cursor: pointer;
 75 |   float: right;
 76 |   border: 0;
 77 | }
 78 | 
 79 | .pagedtable-index-current {
 80 |   cursor: default;
 81 |   text-decoration: none;
 82 |   color: #333;
 83 |   border: 0;
 84 | }
 85 | 
 86 | a.pagedtable-index-current:hover {
 87 |   text-decoration: none;
 88 |   color: #333;
 89 | }
 90 | 
 91 | .pagedtable-index {
 92 |   width: 30px;
 93 |   display: inline-block;
 94 |   text-align: center;
 95 |   border: 0;
 96 | }
 97 | 
 98 | .pagedtable-index-separator-left {
 99 |   display: inline-block;
100 |   color: #333;
101 |   font-size: 9px;
102 |   padding: 0 0 0 0;
103 |   cursor: default;
104 | }
105 | 
106 | .pagedtable-index-separator-right {
107 |   display: inline-block;
108 |   color: #333;
109 |   font-size: 9px;
110 |   padding: 0 4px 0 0;
111 |   cursor: default;
112 | }
113 | 
114 | .pagedtable-footer {
115 |   padding-top: 6px;
116 |   padding-bottom: 5px;
117 | }
118 | 
119 | .pagedtable-not-empty .pagedtable-footer {
120 | }
121 | 
122 | .pagedtable-info {
123 |   overflow: hidden;
124 |   color: #999;
125 |   white-space: nowrap;
126 |   text-overflow: ellipsis;
127 | }
128 | 
129 | .pagedtable-header-name {
130 |   overflow: hidden;
131 |   text-overflow: ellipsis;
132 | }
133 | 
134 | .pagedtable-header-type {
135 |   color: #999;
136 |   height: 0px;
137 | }
138 | 
139 | .pagedtable-na-cell {
140 |   font-style: italic;
141 |   opacity: 0.3;
142 | }
143 | 


--------------------------------------------------------------------------------
/pins/tests/.gitignore:
--------------------------------------------------------------------------------
1 | rsconnect_api_keys.json
2 | 


--------------------------------------------------------------------------------
/pins/tests/_snapshots/test_board_pin_write_rsc_index_html/data.txt:
--------------------------------------------------------------------------------
 1 | api_version: 1
 2 | created: 20200113T235859Z
 3 | description: some description
 4 | file: test_rsc_pin.csv
 5 | file_size: 19
 6 | pin_hash: a6cf5331bf3de6c6
 7 | title: some pin
 8 | type: csv
 9 | user: {}
10 | 


--------------------------------------------------------------------------------
/pins/tests/_snapshots/test_board_pin_write_rsc_index_html/highlight.js-9.15.9/qtcreator_light.css:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Qt Creator light color scheme
 4 | 
 5 | */
 6 | 
 7 | 
 8 | .hljs {
 9 |   display: block;
10 |   overflow-x: auto;
11 |   background: #ffffff;
12 | }
13 | 
14 | .hljs,
15 | .hljs-subst,
16 | .hljs-tag,
17 | .hljs-title {
18 |   color: #000000;
19 | }
20 | 
21 | .hljs-strong,
22 | .hljs-emphasis {
23 |   color: #000000;
24 | }
25 | 
26 | .hljs-bullet,
27 | .hljs-quote,
28 | .hljs-number,
29 | .hljs-regexp,
30 | .hljs-literal {
31 |   color: #000080;
32 | }
33 | 
34 | .hljs-code
35 | .hljs-selector-class {
36 |   color: #800080;
37 | }
38 | 
39 | .hljs-emphasis,
40 | .hljs-stronge,
41 | .hljs-type {
42 |   font-style: italic;
43 | }
44 | 
45 | .hljs-keyword,
46 | .hljs-selector-tag,
47 | .hljs-function,
48 | .hljs-section,
49 | .hljs-symbol,
50 | .hljs-name {
51 |   color: #808000;
52 | }
53 | 
54 | .hljs-attribute {
55 |   color: #800000;
56 | }
57 | 
58 | .hljs-variable,
59 | .hljs-params,
60 | .hljs-class .hljs-title {
61 |   color: #0055AF;
62 | }
63 | 
64 | .hljs-string,
65 | .hljs-selector-id,
66 | .hljs-selector-attr,
67 | .hljs-selector-pseudo,
68 | .hljs-type,
69 | .hljs-built_in,
70 | .hljs-builtin-name,
71 | .hljs-template-tag,
72 | .hljs-template-variable,
73 | .hljs-addition,
74 | .hljs-link {
75 |   color: #008000;
76 | }
77 | 
78 | .hljs-comment,
79 | .hljs-meta,
80 | .hljs-deletion {
81 |   color: #008000;
82 | }
83 | 


--------------------------------------------------------------------------------
/pins/tests/_snapshots/test_board_pin_write_rsc_index_html/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <link rel="stylesheet" href="pagedtable-1.1/pagedtable.css">
 5 |     <script language="javascript" src="pagedtable-1.1/pagedtable.js"></script>
 6 |     <link rel="stylesheet" href="highlight.js-9.15.9/qtcreator_light.css">
 7 |     <script src="highlight.js-9.15.9/highlight.js"></script>
 8 |     <style>
 9 |       body {
10 |         font-size: 16px;
11 |         font-family: 'Lato', sans-serif;
12 |         color: #333;
13 |       }
14 |       section {
15 |         border-left: solid 6px #dddddd;
16 |         padding: 0.5em 0 0.5em 10px;
17 |         margin-bottom: 1em;
18 |       }
19 |       pre {
20 |         margin: 0;
21 |         padding: 1em;
22 |       }
23 |       h3 {
24 |         font-weight: normal;
25 |         color: #888;
26 |         margin: 0 0 0.5em 0;
27 |       }
28 |     </style>
29 |   </head>
30 |   <body>
31 | 
32 |     <section>
33 |        <h3>derek/test_rsc_pin</h3>
34 |        
35 |        <p>
36 |          <b>Last updated from Python:</b> 2020-01-13 23:58:59 &bull;
37 |          <b>Format:</b> csv &bull;
38 |          <b>API:</b> v1
39 |        </p>
40 |        <p>some description</p>
41 |        <p>Download data: <a href="test_rsc_pin.csv">test_rsc_pin.csv</a></p>
42 |        <details>
43 |          <summary>Raw metadata</summary>
44 |          <pre>api_version: 1
45 | created: 20200113T235859Z
46 | description: some description
47 | file: test_rsc_pin.csv
48 | file_size: 19
49 | pin_hash: a6cf5331bf3de6c6
50 | title: some pin
51 | type: csv
52 | user: {}
53 | </pre>
54 |        </details>
55 |        
56 |      </section>
57 | 
58 |     <section>
59 |     <h3>Python Code</h3>
60 | 
61 |     <pre id="pin-python" class="pin-code"><code class="python">from pins import board_connect
62 | from dotenv import load_dotenv
63 | load_dotenv()
64 | 
65 | board = board_connect(server_url='http://localhost:3939')
66 | board.pin_read("derek/test_rsc_pin")</code></pre>
67 | 
68 |     <script type="text/javascript">
69 |       hljs.registerLanguage("python", highlight_python);
70 |       hljs.registerLanguage("r", highlight_r);
71 |       hljs.initHighlightingOnLoad();
72 |     </script>
73 |     </section>
74 | 
75 |     <section style="">
76 |       <h3>R Code</h3>
77 |         <pre id="pin-r" class="pin-code"><code class="r">library(pins)
78 | 
79 | board <- board_connect(auth = "envvar")
80 | pin_read(board, "derek/test_rsc_pin")</code></pre>
81 |         </section>
82 | 
83 |     <section style="">
84 |       <h3>Preview <small>(up to 100 rows)</small></h3>
85 |       <div data-pagedtable style="height: 25em;">
86 |         <script data-pagedtable-source type="application/json">
87 |           {"data": [{"x": 1.0, "y": "a"}, {"x": 2.0, "y": "b"}, {"y": "c"}], "columns": [{"name": ["x"], "label": ["x"], "align": ["left"], "type": [""]}, {"name": ["y"], "label": ["y"], "align": ["left"], "type": [""]}]}
88 |         </script>
89 |       </div>
90 |     </section>
91 |   </body>
92 | </html>


--------------------------------------------------------------------------------
/pins/tests/_snapshots/test_board_pin_write_rsc_index_html/pagedtable-1.1/pagedtable.css:
--------------------------------------------------------------------------------
  1 | .pagedtable {
  2 |   overflow: auto;
  3 |   padding-left: 8px;
  4 |   padding-right: 8px;
  5 | }
  6 | 
  7 | .pagedtable table {
  8 |   width: 100%;
  9 |   max-width: 100%;
 10 |   margin: 0;
 11 |   border-bottom: 1px solid #dddddd;
 12 |   font-weight: 100;
 13 |   line-height: 24px;
 14 | }
 15 | 
 16 | .pagedtable td, .pagedtable th {
 17 |   padding: 2px 4px 3px 4px;
 18 | }
 19 | 
 20 | .pagedtable th {
 21 |   border: none;
 22 |   border-bottom: 1px solid #dddddd;
 23 | 
 24 |   min-width: 45px;
 25 |   font-weight: normal;
 26 | }
 27 | 
 28 | .pagedtable-empty th {
 29 |   display: none;
 30 | }
 31 | 
 32 | .pagedtable td {
 33 |   white-space: nowrap;
 34 |   overflow: hidden;
 35 |   text-overflow: ellipsis;
 36 | }
 37 | 
 38 | .pagedtable .even {
 39 |   background-color: #fafafa;
 40 | }
 41 | 
 42 | .pagedtable-padding-col {
 43 |   display: none;
 44 | }
 45 | 
 46 | .pagedtable a {
 47 |   -webkit-touch-callout: none;
 48 |   -webkit-user-select: none;
 49 |   -khtml-user-select: none;
 50 |   -moz-user-select: none;
 51 |   -ms-user-select: none;
 52 |   user-select: none;
 53 | }
 54 | 
 55 | .pagedtable-index-nav {
 56 |   cursor: pointer;
 57 |   padding: 0 5px 0 5px;
 58 |   float: right;
 59 |   border: 0;
 60 | }
 61 | 
 62 | .pagedtable-index-nav-disabled {
 63 |   cursor: default;
 64 |   text-decoration: none;
 65 |   color: #999;
 66 | }
 67 | 
 68 | a.pagedtable-index-nav-disabled:hover {
 69 |   text-decoration: none;
 70 |   color: #999;
 71 | }
 72 | 
 73 | .pagedtable-indexes {
 74 |   cursor: pointer;
 75 |   float: right;
 76 |   border: 0;
 77 | }
 78 | 
 79 | .pagedtable-index-current {
 80 |   cursor: default;
 81 |   text-decoration: none;
 82 |   color: #333;
 83 |   border: 0;
 84 | }
 85 | 
 86 | a.pagedtable-index-current:hover {
 87 |   text-decoration: none;
 88 |   color: #333;
 89 | }
 90 | 
 91 | .pagedtable-index {
 92 |   width: 30px;
 93 |   display: inline-block;
 94 |   text-align: center;
 95 |   border: 0;
 96 | }
 97 | 
 98 | .pagedtable-index-separator-left {
 99 |   display: inline-block;
100 |   color: #333;
101 |   font-size: 9px;
102 |   padding: 0 0 0 0;
103 |   cursor: default;
104 | }
105 | 
106 | .pagedtable-index-separator-right {
107 |   display: inline-block;
108 |   color: #333;
109 |   font-size: 9px;
110 |   padding: 0 4px 0 0;
111 |   cursor: default;
112 | }
113 | 
114 | .pagedtable-footer {
115 |   padding-top: 6px;
116 |   padding-bottom: 5px;
117 | }
118 | 
119 | .pagedtable-not-empty .pagedtable-footer {
120 | }
121 | 
122 | .pagedtable-info {
123 |   overflow: hidden;
124 |   color: #999;
125 |   white-space: nowrap;
126 |   text-overflow: ellipsis;
127 | }
128 | 
129 | .pagedtable-header-name {
130 |   overflow: hidden;
131 |   text-overflow: ellipsis;
132 | }
133 | 
134 | .pagedtable-header-type {
135 |   color: #999;
136 |   height: 0px;
137 | }
138 | 
139 | .pagedtable-na-cell {
140 |   font-style: italic;
141 |   opacity: 0.3;
142 | }
143 | 


--------------------------------------------------------------------------------
/pins/tests/_snapshots/test_board_pin_write_rsc_index_html/test_rsc_pin.csv:
--------------------------------------------------------------------------------
1 | x,y
2 | 1.0,a
3 | 2.0,b
4 | ,c
5 | 


--------------------------------------------------------------------------------
/pins/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | from importlib_resources import files
 7 | from pytest import mark as m
 8 | 
 9 | from pins.tests.helpers import (
10 |     BoardBuilder,
11 |     DbcBoardBuilder,
12 |     RscBoardBuilder,
13 |     Snapshot,
14 |     rm_env,
15 | )
16 | 
17 | EXAMPLE_REL_PATH = "pins/tests/pins-compat"
18 | PATH_TO_EXAMPLE_BOARD = files("pins") / "tests/pins-compat"
19 | PATH_TO_EXAMPLE_BOARD_DBC = "/Volumes/workshops/my-board/my-volume/test"
20 | PATH_TO_EXAMPLE_VERSION = PATH_TO_EXAMPLE_BOARD / "df_csv/20220214T163720Z-9bfad/"
21 | EXAMPLE_PIN_NAME = "df_csv"
22 | 
23 | PATH_TO_MANIFEST_BOARD = files("pins") / "tests/pin-board"
24 | 
25 | # parameters that can be used more than once per session
26 | params_safe = [
27 |     pytest.param(lambda: BoardBuilder("file"), id="file", marks=m.fs_file),
28 |     pytest.param(lambda: BoardBuilder("s3"), id="s3", marks=m.fs_s3),
29 |     pytest.param(lambda: BoardBuilder("gcs"), id="gcs", marks=m.fs_gcs),
30 |     pytest.param(lambda: BoardBuilder("abfs"), id="abfs", marks=m.fs_abfs),
31 |     pytest.param(lambda: DbcBoardBuilder("dbc"), id="dbc", marks=m.fs_dbc),
32 | ]
33 | 
34 | # rsc should only be used once, because users are created at docker setup time
35 | param_rsc = pytest.param(lambda: RscBoardBuilder("rsc"), id="rsc", marks=m.fs_rsc)
36 | 
37 | params_backend = [*params_safe, param_rsc]
38 | 
39 | 
40 | @pytest.fixture(params=params_backend, scope="session")
41 | def backend(request):
42 |     backend = request.param()
43 |     yield backend
44 |     backend.teardown()
45 | 
46 | 
47 | @pytest.fixture(scope="session")
48 | def http_example_board_path():
49 |     # backend = BoardBuilder("s3")
50 |     # yield backend.create_tmp_board(str(PATH_TO_EXAMPLE_BOARD.absolute())).board
51 |     # backend.teardown()
52 |     # TODO: could putting it in a publicly available bucket folder
53 |     return (
54 |         "https://raw.githubusercontent.com/machow/pins-python/main/pins/tests/pins-compat"
55 |     )
56 | 
57 | 
58 | @pytest.fixture
59 | def snapshot(request):
60 |     p_snap = files("pins") / "tests/_snapshots" / request.node.originalname
61 |     snap = Snapshot(p_snap, request.config.getoption("--snapshot-update"))
62 | 
63 |     return snap
64 | 
65 | 
66 | @pytest.fixture
67 | def df():
68 |     import pandas as pd
69 | 
70 |     return pd.DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]})
71 | 
72 | 
73 | @pytest.fixture
74 | def tmp_cache():
75 |     with rm_env("PINS_CACHE_DIR"):
76 |         with tempfile.TemporaryDirectory() as tmp_dir:
77 |             os.environ["PINS_CACHE_DIR"] = str(tmp_dir)
78 |             yield Path(tmp_dir)
79 | 
80 | 
81 | @pytest.fixture
82 | def tmp_data_dir():
83 |     with rm_env("PINS_DATA_DIR"):
84 |         with tempfile.TemporaryDirectory() as tmp_dir:
85 |             os.environ["PINS_DATA_DIR"] = str(tmp_dir)
86 |             yield Path(tmp_dir)
87 | 
88 | 
89 | def pytest_addoption(parser):
90 |     parser.addoption("--snapshot-update", action="store_true")
91 | 


--------------------------------------------------------------------------------
/pins/tests/example-bundle/data.txt:
--------------------------------------------------------------------------------
 1 | api_version: 1
 2 | created: 20220304T153828Z
 3 | description: null
 4 | file: data_frame.csv
 5 | file_size: 15
 6 | name: data_frame.csv
 7 | pin_hash: c65b0e9785abaa60
 8 | title: some title
 9 | type: csv
10 | user: {}
11 | 


--------------------------------------------------------------------------------
/pins/tests/example-bundle/data_frame.csv:
--------------------------------------------------------------------------------
1 | ,x
2 | 0,1
3 | 1,2
4 | 2,3
5 | 


--------------------------------------------------------------------------------
/pins/tests/example-bundle/index.html:
--------------------------------------------------------------------------------
1 | <html><body>yo</body></html>
2 | 


--------------------------------------------------------------------------------
/pins/tests/example-bundle/manifest.json:
--------------------------------------------------------------------------------
1 | {"version": 1, "local": "en_US", "platform": "3.5.1", "metadata": {"appmode": "static", "primary_rmd": null, "primary_html": "index.html", "content_category": "pin", "has_parameters": false}, "packages": null, "files": ["index.html", "manifest.json", "data_frame.csv", "data.txt"], "users": null}
2 | 


--------------------------------------------------------------------------------
/pins/tests/pin-board/_pins.yaml:
--------------------------------------------------------------------------------
1 | x:
2 | - x/20221215T180351Z-c3943/
3 | 'y':
4 | - y/20221215T180357Z-9ae7a/
5 | - y/20221215T180400Z-b81d5/
6 | 


--------------------------------------------------------------------------------
/pins/tests/pin-board/x/20221215T180351Z-c3943/data.txt:
--------------------------------------------------------------------------------
 1 | file: x.json
 2 | file_size: 23
 3 | pin_hash: c3943ca5a9aab2df
 4 | type: json
 5 | title: 'x: a pinned integer vector'
 6 | description: ~
 7 | tags: ~
 8 | created: 20221215T180351Z
 9 | api_version: 1.0
10 | 


--------------------------------------------------------------------------------
/pins/tests/pin-board/x/20221215T180351Z-c3943/x.json:
--------------------------------------------------------------------------------
1 | [1,2,3,4,5,6,7,8,9,10]
2 | 


--------------------------------------------------------------------------------
/pins/tests/pin-board/y/20221215T180357Z-9ae7a/data.txt:
--------------------------------------------------------------------------------
 1 | file: y.rds
 2 | file_size: 61
 3 | pin_hash: 9ae7a970010c84e0
 4 | type: rds
 5 | title: 'y: a pinned integer vector'
 6 | description: ~
 7 | tags: ~
 8 | created: 20221215T180357Z
 9 | api_version: 1.0
10 | 


--------------------------------------------------------------------------------
/pins/tests/pin-board/y/20221215T180357Z-9ae7a/y.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/tests/pin-board/y/20221215T180357Z-9ae7a/y.rds


--------------------------------------------------------------------------------
/pins/tests/pin-board/y/20221215T180400Z-b81d5/data.txt:
--------------------------------------------------------------------------------
 1 | file: y.json
 2 | file_size: 53
 3 | pin_hash: b81d5bea9e760608
 4 | type: json
 5 | title: 'y: a pinned integer vector'
 6 | description: ~
 7 | tags: ~
 8 | created: 20221215T180400Z
 9 | api_version: 1.0
10 | 


--------------------------------------------------------------------------------
/pins/tests/pin-board/y/20221215T180400Z-b81d5/y.json:
--------------------------------------------------------------------------------
1 | [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
2 | 


--------------------------------------------------------------------------------
/pins/tests/pins-compat/df_arrow/20220214T163720Z-ad0c1/data.txt:
--------------------------------------------------------------------------------
1 | file: df_arrow.arrow
2 | file_size: 1282
3 | pin_hash: ad0c1a5a64ad7ca7
4 | type: arrow
5 | title: 'df_arrow: a pinned 2 x 2 data frame'
6 | description: ~
7 | created: 20220214T163720Z
8 | api_version: 1.0
9 | 


--------------------------------------------------------------------------------
/pins/tests/pins-compat/df_arrow/20220214T163720Z-ad0c1/df_arrow.arrow:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/tests/pins-compat/df_arrow/20220214T163720Z-ad0c1/df_arrow.arrow


--------------------------------------------------------------------------------
/pins/tests/pins-compat/df_csv/20220214T163718Z-eceac/data.txt:
--------------------------------------------------------------------------------
1 | file: df_csv.csv
2 | file_size: 20
3 | pin_hash: eceac651f7d06360
4 | type: csv
5 | title: 'df_csv: a pinned 2 x 2 data frame'
6 | description: ~
7 | created: 20220214T163718Z
8 | api_version: 1.0
9 | 


--------------------------------------------------------------------------------
/pins/tests/pins-compat/df_csv/20220214T163718Z-eceac/df_csv.csv:
--------------------------------------------------------------------------------
1 | "x","y"
2 | 1,"a"
3 | 2,"b"
4 | 


--------------------------------------------------------------------------------
/pins/tests/pins-compat/df_csv/20220214T163720Z-9bfad/data.txt:
--------------------------------------------------------------------------------
1 | file: df_csv.csv
2 | file_size: 28
3 | pin_hash: 9bfad6d1a322a904
4 | type: csv
5 | title: 'df_csv: a pinned 2 x 3 data frame'
6 | description: ~
7 | created: 20220214T163720Z
8 | api_version: 1.0
9 | 


--------------------------------------------------------------------------------
/pins/tests/pins-compat/df_csv/20220214T163720Z-9bfad/df_csv.csv:
--------------------------------------------------------------------------------
1 | "x","y","z"
2 | 1,"a",3
3 | 2,"b",4
4 | 


--------------------------------------------------------------------------------
/pins/tests/pins-compat/df_rds/20220214T163720Z-35b15/data.txt:
--------------------------------------------------------------------------------
1 | file: df_rds.rds
2 | file_size: 116
3 | pin_hash: 35b1570263448755
4 | type: rds
5 | title: 'df_rds: a pinned 2 x 2 data frame'
6 | description: ~
7 | created: 20220214T163720Z
8 | api_version: 1.0
9 | 


--------------------------------------------------------------------------------
/pins/tests/pins-compat/df_rds/20220214T163720Z-35b15/df_rds.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/tests/pins-compat/df_rds/20220214T163720Z-35b15/df_rds.rds


--------------------------------------------------------------------------------
/pins/tests/pins-compat/df_unversioned/20220214T163720Z-35b15/data.txt:
--------------------------------------------------------------------------------
1 | file: df_unversioned.rds
2 | file_size: 116
3 | pin_hash: 35b1570263448755
4 | type: rds
5 | title: 'df_unversioned: a pinned 2 x 2 data frame'
6 | description: ~
7 | created: 20220214T163720Z
8 | api_version: 1.0
9 | 


--------------------------------------------------------------------------------
/pins/tests/pins-compat/df_unversioned/20220214T163720Z-35b15/df_unversioned.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/tests/pins-compat/df_unversioned/20220214T163720Z-35b15/df_unversioned.rds


--------------------------------------------------------------------------------
/pins/tests/pins-old-types/a-table/v/data.csv:
--------------------------------------------------------------------------------
1 | "a","b"
2 | 1,"x"
3 | 2,"y"
4 | 


--------------------------------------------------------------------------------
/pins/tests/pins-old-types/a-table/v/data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rstudio/pins-python/bdb58e1cba26bae899c73c19871366b417edf113/pins/tests/pins-old-types/a-table/v/data.rds


--------------------------------------------------------------------------------
/pins/tests/pins-old-types/a-table/v/data.txt:
--------------------------------------------------------------------------------
 1 | path:
 2 | - data.csv
 3 | - data.rds
 4 | type: table
 5 | rows: 2
 6 | cols: 2
 7 | columns:
 8 |   a: integer
 9 |   b: character
10 | 


--------------------------------------------------------------------------------
/pins/tests/test_adaptors.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import joblib
  4 | import pandas as pd
  5 | import pytest
  6 | from pandas.testing import assert_frame_equal
  7 | 
  8 | from pins._adaptors import (
  9 |     AbstractPandasFrame,
 10 |     Adaptor,
 11 |     DFAdaptor,
 12 |     PandasAdaptor,
 13 |     create_adaptor,
 14 | )
 15 | 
 16 | 
 17 | class TestCreateAdaptor:
 18 |     def test_pandas(self):
 19 |         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
 20 |         adaptor = create_adaptor(df)
 21 |         assert isinstance(adaptor, Adaptor)
 22 |         assert isinstance(adaptor, PandasAdaptor)
 23 | 
 24 |     def test_non_df(self):
 25 |         adaptor = create_adaptor(42)
 26 |         assert isinstance(adaptor, Adaptor)
 27 |         assert not isinstance(adaptor, PandasAdaptor)
 28 |         assert not isinstance(adaptor, DFAdaptor)
 29 | 
 30 |     def test_already_adaptor(self):
 31 |         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
 32 |         adaptor = create_adaptor(df)
 33 |         assert isinstance(adaptor, PandasAdaptor)
 34 |         assert create_adaptor(adaptor) is adaptor
 35 | 
 36 | 
 37 | class TestAdaptor:
 38 |     def test_write_json(self, tmp_path: Path):
 39 |         data = {"a": 1, "b": 2}
 40 |         adaptor = Adaptor(data)
 41 |         file = tmp_path / "file.json"
 42 |         adaptor.write_json(file)
 43 |         assert file.read_text() == '{"a": 1, "b": 2}'
 44 | 
 45 |     def test_write_joblib(self, tmp_path: Path):
 46 |         data = {"a": 1, "b": 2}
 47 |         adaptor = Adaptor(data)
 48 |         file = tmp_path / "file.joblib"
 49 |         adaptor.write_joblib(file)
 50 | 
 51 |         # Dump independently and check contents
 52 |         expected_file = tmp_path / "expected.joblib"
 53 |         joblib.dump(data, expected_file)
 54 |         assert expected_file.read_bytes() == file.read_bytes()
 55 | 
 56 |     def test_write_csv(self):
 57 |         with pytest.raises(NotImplementedError):
 58 |             adaptor = Adaptor(42)
 59 |             adaptor.write_csv("file.csv")
 60 | 
 61 |     def test_write_parquet(self):
 62 |         with pytest.raises(NotImplementedError):
 63 |             adaptor = Adaptor(42)
 64 |             adaptor.write_parquet("file.parquet")
 65 | 
 66 |     def test_write_feather(self):
 67 |         with pytest.raises(NotImplementedError):
 68 |             adaptor = Adaptor(42)
 69 |             adaptor.write_feather("file.feather")
 70 | 
 71 |     class TestDataPreview:
 72 |         def test_int(self):
 73 |             adaptor = Adaptor(42)
 74 |             assert adaptor.data_preview == "{}"
 75 | 
 76 |         def test_dict(self):
 77 |             data = {"a": 1, "b": 2}
 78 |             adaptor = Adaptor(data)
 79 |             assert adaptor.data_preview == "{}"
 80 | 
 81 |     def test_default_title(self):
 82 |         adaptor = Adaptor(42)
 83 |         assert adaptor.default_title("my_data") == "my_data: a pinned int object"
 84 | 
 85 | 
 86 | class TestPandasAdaptor:
 87 |     def test_df_type(self):
 88 |         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
 89 |         adaptor = PandasAdaptor(df)
 90 |         assert adaptor.df_type == "DataFrame"
 91 | 
 92 |     def test_columns(self):
 93 |         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
 94 |         adaptor = PandasAdaptor(df)
 95 |         assert isinstance(adaptor, DFAdaptor)
 96 |         assert isinstance(adaptor, PandasAdaptor)
 97 |         assert adaptor.columns == ["a", "b"]
 98 | 
 99 |     def test_shape(self):
100 |         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
101 |         adaptor = PandasAdaptor(df)
102 |         assert isinstance(adaptor, DFAdaptor)
103 |         assert isinstance(adaptor, PandasAdaptor)
104 |         assert adaptor.shape == (3, 2)
105 | 
106 |     def test_head(self):
107 |         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
108 |         adaptor = PandasAdaptor(df)
109 |         head1_df = pd.DataFrame({"a": [1], "b": [4]})
110 |         expected = create_adaptor(head1_df)
111 |         assert isinstance(adaptor, DFAdaptor)
112 |         assert isinstance(adaptor.head(1), DFAdaptor)
113 |         assert isinstance(adaptor.head(1), PandasAdaptor)
114 |         assert_frame_equal(adaptor.head(1)._d, expected._d)
115 | 
116 |     def test_to_json(self):
117 |         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
118 |         adaptor = PandasAdaptor(df)
119 |         assert isinstance(adaptor, DFAdaptor)
120 |         assert adaptor.to_json() == """[{"a":1,"b":4},{"a":2,"b":5},{"a":3,"b":6}]"""
121 | 
122 |     def test_write_csv(self, tmp_path: Path):
123 |         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
124 |         adaptor = PandasAdaptor(df)
125 |         file = tmp_path / "file.csv"
126 |         adaptor.write_csv(file)
127 |         assert file.read_text() == "a,b\n1,4\n2,5\n3,6\n"
128 | 
129 |     def test_write_parquet(self, tmp_path: Path):
130 |         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
131 |         adaptor = PandasAdaptor(df)
132 |         file = tmp_path / "file.parquet"
133 |         adaptor.write_parquet(file)
134 |         assert_frame_equal(pd.read_parquet(file), df)
135 | 
136 |     def test_write_feather(self, tmp_path: Path):
137 |         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
138 |         adaptor = PandasAdaptor(df)
139 |         file = tmp_path / "file.feather"
140 |         adaptor.write_feather(file)
141 |         assert_frame_equal(pd.read_feather(file), df)
142 | 
143 |     def test_data_preview(self):
144 |         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
145 |         adaptor = PandasAdaptor(df)
146 |         expected = (
147 |             '{"data": [{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}], '
148 |             '"columns": [{"name": ["a"], "label": ["a"], "align": ["left"], "type": [""]}, '
149 |             '{"name": ["b"], "label": ["b"], "align": ["left"], "type": [""]}]}'
150 |         )
151 |         assert adaptor.data_preview == expected
152 | 
153 |     def test_default_title(self):
154 |         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
155 |         adaptor = PandasAdaptor(df)
156 |         assert adaptor.default_title("my_df") == "my_df: a pinned 3 x 2 DataFrame"
157 | 
158 | 
159 | class TestAbstractBackends:
160 |     class TestAbstractPandasFrame:
161 |         def test_isinstance(self):
162 |             df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
163 |             assert isinstance(df, AbstractPandasFrame)
164 | 
165 |         def test_not_isinstance(self):
166 |             assert not isinstance(42, AbstractPandasFrame)
167 | 


--------------------------------------------------------------------------------
/pins/tests/test_cache.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from pathlib import Path
  3 | 
  4 | import pytest
  5 | from fsspec import filesystem
  6 | 
  7 | from pins.cache import (
  8 |     CachePruner,
  9 |     PinsCache,
 10 |     PinsUrlCache,
 11 |     cache_prune,
 12 |     touch_access_time,
 13 | )
 14 | 
 15 | # NOTE: windows time.time() implementation appears to have 16 millisecond precision, so
 16 | # we need to add a small delay, in order to avoid prune checks appearing to happen at the
 17 | # exact same moment something earlier was created / accessed.
 18 | # see: https://stackoverflow.com/a/1938096/1144523
 19 | 
 20 | 
 21 | # Utilities ===================================================================
 22 | 
 23 | 
 24 | def _sleep():
 25 |     # time-based issues keep arising erratically in windows checks, so try to shoot
 26 |     # well past
 27 |     time.sleep(0.3)
 28 | 
 29 | 
 30 | @pytest.fixture
 31 | def some_file(tmp_path):
 32 |     p = tmp_path / "some_file.txt"
 33 |     p.touch()
 34 |     return p
 35 | 
 36 | 
 37 | def test_touch_access_time_manual(some_file):
 38 |     some_file.stat().st_atime
 39 | 
 40 |     access_time = time.time() - 60 * 60 * 24
 41 |     touch_access_time(some_file, access_time)
 42 | 
 43 |     assert some_file.stat().st_atime == access_time
 44 | 
 45 | 
 46 | def test_touch_access_time_auto(some_file):
 47 |     orig_access = some_file.stat().st_atime
 48 | 
 49 |     _sleep()
 50 |     new_time = touch_access_time(some_file)
 51 | 
 52 |     assert some_file.stat().st_atime == new_time
 53 |     assert orig_access < new_time
 54 | 
 55 | 
 56 | # Cache Classes ===============================================================
 57 | 
 58 | # Boards w/ default cache =====================================================
 59 | 
 60 | 
 61 | def test_pins_cache_hash_name_preserves():
 62 |     cache = PinsCache(fs=filesystem("file"), hash_prefix="")
 63 |     assert cache.hash_name("a/b/c.txt") == Path("a/b/c.txt")
 64 | 
 65 | 
 66 | def test_pins_cache_url_hash_name():
 67 |     cache = PinsUrlCache(fs=filesystem("file"))
 68 |     hashed = cache.hash_name("http://example.com/a.txt", True)
 69 | 
 70 |     p_hash = Path(hashed)
 71 | 
 72 |     # should have form <url_hash>/<version_placeholder>/<filename>
 73 |     assert p_hash.name == "a.txt"
 74 | 
 75 |     # count parent dirs, excluding root (e.g. "." or "/")
 76 |     n_parents = len(p_hash.parents) - 1
 77 |     assert n_parents == 2
 78 | 
 79 | 
 80 | @pytest.mark.skip("TODO")
 81 | def test_pins_cache_open():
 82 |     # check that opening works and creates the cached file
 83 |     pass
 84 | 
 85 | 
 86 | # Cache pruning ===============================================================
 87 | 
 88 | 
 89 | @pytest.fixture
 90 | def a_cache(tmp_path):
 91 |     return tmp_path / "board_cache"
 92 | 
 93 | 
 94 | def create_metadata(p, access_time):
 95 |     p.mkdir(parents=True, exist_ok=True)
 96 |     meta = p / "data.txt"
 97 |     meta.touch()
 98 |     touch_access_time(meta, access_time)
 99 | 
100 | 
101 | @pytest.fixture
102 | def pin1_v1(a_cache):  # current
103 |     v1 = a_cache / "a_pin" / "version_1"
104 |     create_metadata(v1, time.time())
105 | 
106 |     return v1
107 | 
108 | 
109 | @pytest.fixture
110 | def pin1_v2(a_cache):
111 |     v2 = a_cache / "a_pin" / "version_2"
112 |     create_metadata(v2, time.time() - 60 * 60 * 24)  # one day ago
113 | 
114 |     return v2
115 | 
116 | 
117 | @pytest.fixture
118 | def pin2_v3(a_cache):
119 |     v3 = a_cache / "other_pin" / "version_3"
120 |     create_metadata(v3, time.time() - 60 * 60 * 48)  # two days ago
121 | 
122 |     return v3
123 | 
124 | 
125 | def test_cache_pruner_old_versions_none(a_cache, pin1_v1):
126 |     _sleep()
127 | 
128 |     pruner = CachePruner(a_cache)
129 | 
130 |     old = pruner.old_versions(days=1)
131 | 
132 |     assert len(old) == 0
133 | 
134 | 
135 | def test_cache_pruner_old_versions_days0(a_cache, pin1_v1):
136 |     _sleep()
137 | 
138 |     pruner = CachePruner(a_cache)
139 |     old = pruner.old_versions(days=0)
140 | 
141 |     assert len(old) == 1
142 |     assert old[0] == pin1_v1
143 | 
144 | 
145 | def test_cache_pruner_old_versions_some(a_cache, pin1_v1, pin1_v2):
146 |     _sleep()
147 | 
148 |     # create: tmp_dir/pin1/version1
149 | 
150 |     pruner = CachePruner(a_cache)
151 | 
152 |     old = pruner.old_versions(days=1)
153 | 
154 |     assert len(old) == 1
155 |     assert old[0] == pin1_v2
156 | 
157 | 
158 | def test_cache_pruner_old_versions_multi_pins(a_cache, pin1_v2, pin2_v3):
159 |     _sleep()
160 | 
161 |     pruner = CachePruner(a_cache)
162 |     old = pruner.old_versions(days=1)
163 | 
164 |     assert len(old) == 2
165 |     assert set(old) == {pin1_v2, pin2_v3}
166 | 
167 | 
168 | def test_cache_prune_prompt(a_cache, pin1_v1, pin2_v3, monkeypatch):
169 |     _sleep()
170 | 
171 |     cache_prune(days=1, cache_root=a_cache.parent, prompt=False)
172 | 
173 |     versions = list(a_cache.glob("*/*"))
174 | 
175 |     # pin2_v3 deleted
176 |     assert len(versions) == 1
177 | 


--------------------------------------------------------------------------------
/pins/tests/test_compat.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | 
  3 | import pytest
  4 | 
  5 | from pins.errors import PinsError
  6 | from pins.tests.conftest import (
  7 |     PATH_TO_EXAMPLE_BOARD,
  8 |     PATH_TO_EXAMPLE_BOARD_DBC,
  9 |     PATH_TO_MANIFEST_BOARD,
 10 | )
 11 | from pins.tests.helpers import skip_if_dbc, xfail_fs
 12 | 
 13 | NOT_A_PIN = "not_a_pin_abcdefg"
 14 | PIN_CSV = "df_csv"
 15 | 
 16 | # set up board ----
 17 | 
 18 | 
 19 | @pytest.fixture(scope="session")
 20 | def board(backend):
 21 |     board = backend.create_tmp_board(str(PATH_TO_EXAMPLE_BOARD.absolute()))
 22 |     if board.fs.protocol == "dbc":
 23 |         board = backend.create_tmp_board(str(PATH_TO_EXAMPLE_BOARD_DBC))
 24 |     yield board
 25 | 
 26 |     backend.teardown_board(board)
 27 | 
 28 | 
 29 | @pytest.fixture(scope="session")
 30 | def board_manifest(backend):
 31 |     # skip on rsconnect, since it can't add a manifest and the pin names
 32 |     # are too short for use to upload (rsc requires names > 3 characters)
 33 |     if backend.fs_name in ["rsc", "dbc"]:
 34 |         pytest.skip()
 35 | 
 36 |     board = backend.create_tmp_board(str(PATH_TO_MANIFEST_BOARD.absolute()))
 37 | 
 38 |     yield board
 39 | 
 40 |     backend.teardown_board(board)
 41 | 
 42 | 
 43 | # pin_list --------------------------------------------------------------------
 44 | 
 45 | 
 46 | def test_compat_pin_list(board):
 47 |     src_sorted = sorted(board.pin_list())
 48 |     dst_sorted = ["df_arrow", "df_csv", "df_rds", "df_unversioned"]
 49 | 
 50 |     if board.fs.protocol == "rsc":
 51 |         # rsc backend uses <user_name>/<content_name> for full name
 52 |         dst_sorted = [f"{board.user_name}/{content}" for content in dst_sorted]
 53 |     if board.fs.protocol == "dbc":
 54 |         # TODO: update to match when not read-only
 55 |         dst_sorted = [
 56 |             "cool_pin",
 57 |             "cool_pin2",
 58 |             "cool_pin3",
 59 |             "data",
 60 |             "df_csv",
 61 |             "reviews",
 62 |             "reviews2",
 63 |             "reviews3",
 64 |         ]
 65 | 
 66 |     assert src_sorted == dst_sorted
 67 | 
 68 | 
 69 | # pin_versions ----------------------------------------------------------------
 70 | 
 71 | 
 72 | def test_compat_pin_versions(board):
 73 |     if board.fs.protocol == "rsc":
 74 |         pytest.skip("RSC uses bundle ids as pin versions")
 75 |     versions = board.pin_versions("df_csv", as_df=False)
 76 |     v_strings = list(v.version for v in versions)
 77 |     # TODO: update when dbc is not read-only
 78 |     if board.fs.protocol == "dbc":
 79 |         v_strings == ["20250410T083026Z-a173c"]
 80 |     else:
 81 |         assert v_strings == ["20220214T163718Z-eceac", "20220214T163720Z-9bfad"]
 82 | 
 83 | 
 84 | @pytest.mark.skip("Used to diagnose os listdir ordering")
 85 | def test_compat_os_listdir():
 86 |     import os
 87 | 
 88 |     res = os.listdir(PATH_TO_EXAMPLE_BOARD / "df_csv")
 89 |     dst = ["20220214T163718Z-eceac", "20220214T163720Z-9bfad"]
 90 | 
 91 |     assert res == dst
 92 | 
 93 | 
 94 | # pin_exists --------------------------------------------------------------------
 95 | 
 96 | 
 97 | def test_compat_pin_exists_succeed(board):
 98 |     assert board.pin_exists(PIN_CSV)
 99 | 
100 | 
101 | def test_compat_pin_exists_fails(board):
102 |     assert board.pin_exists(NOT_A_PIN) is False
103 | 
104 | 
105 | # pin_meta --------------------------------------------------------------------
106 | 
107 | 
108 | def test_compat_pin_meta(board):
109 |     # Note that this fetches the latest of 2 versions
110 |     meta = board.pin_meta(PIN_CSV)
111 | 
112 |     if board.fs.protocol == "rsc":
113 |         # TODO: afaik the bundle id is largely non-deterministic, so not possible
114 |         # to test, but should think a bit more about it.
115 |         assert meta.name == "derek/df_csv"
116 |     # TODO: update when dbc boards are not read-only
117 |     elif board.fs.protocol == "dbc":
118 |         assert meta.title == "df_csv: a pinned 3 x 2 DataFrame"
119 |         assert meta.description is None
120 |         assert meta.created == "20250410T083026Z"
121 |         assert meta.file == "df_csv.csv"
122 |         assert meta.file_size == 16
123 |         assert meta.pin_hash == "a173cd6a53908980"
124 |         assert meta.type == "csv"
125 |         return
126 |     else:
127 |         assert meta.version.version == "20220214T163720Z-9bfad"
128 |         assert meta.version.created == datetime.datetime(2022, 2, 14, 16, 37, 20)
129 |         assert meta.version.hash == "9bfad"
130 | 
131 |         assert meta.name == "df_csv"
132 | 
133 |     assert meta.title == "df_csv: a pinned 2 x 3 data frame"
134 |     assert meta.description is None
135 |     assert meta.created == "20220214T163720Z"
136 |     assert meta.file == "df_csv.csv"
137 |     assert meta.file_size == 28
138 |     assert meta.pin_hash == "9bfad6d1a322a904"
139 |     assert meta.type == "csv"
140 | 
141 |     # TODO(question): coding api_version as a yaml float intentional?
142 |     assert meta.api_version == 1.0
143 |     assert meta.user == {}
144 | 
145 | 
146 | def test_compat_pin_meta_pin_missing(board):
147 |     with pytest.raises(PinsError) as exc_info:
148 |         board.pin_meta(NOT_A_PIN)
149 | 
150 |     assert f"{NOT_A_PIN} does not exist" in exc_info.value.args[0]
151 | 
152 | 
153 | @xfail_fs("rsc")
154 | def test_compat_pin_meta_version_arg(board):
155 |     # note that in RSConnect the version is the bundle id
156 |     # TODO: update when dbc is not read-only
157 |     if board.fs.protocol == "dbc":
158 |         meta = board.pin_meta(PIN_CSV, "20250410T083026Z-a173c")
159 |         assert meta.version.version == "20250410T083026Z-a173c"
160 |         assert meta.version.hash == "a173c"
161 |     else:
162 |         meta = board.pin_meta(PIN_CSV, "20220214T163718Z-eceac")
163 |         assert meta.version.version == "20220214T163718Z-eceac"
164 |         assert meta.version.hash == "eceac"
165 | 
166 | 
167 | def test_compat_pin_meta_version_arg_error(board):
168 |     bad_version = "123"
169 |     with pytest.raises(PinsError) as exc_info:
170 |         board.pin_meta(PIN_CSV, bad_version)
171 | 
172 |     msg = exc_info.value.args[0]
173 |     assert PIN_CSV in msg
174 |     assert bad_version in msg
175 | 
176 | 
177 | # pin_read ----
178 | 
179 | 
180 | def test_compat_pin_read(board):
181 |     import pandas as pd
182 | 
183 |     p_data = PATH_TO_EXAMPLE_BOARD / "df_csv" / "20220214T163720Z-9bfad" / "df_csv.csv"
184 | 
185 |     src_df = board.pin_read("df_csv")
186 | 
187 |     # TODO: update when dbc boards are not read-only
188 |     if board.fs.protocol == "dbc":
189 |         dst_df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
190 |     else:
191 |         dst_df = pd.read_csv(p_data)
192 | 
193 |     assert isinstance(src_df, pd.DataFrame)
194 |     assert src_df.equals(dst_df)
195 | 
196 | 
197 | @skip_if_dbc
198 | def test_compat_pin_read_supported_rds(board):
199 |     pytest.importorskip("rdata")
200 |     import pandas as pd
201 | 
202 |     src_df = board.pin_read("df_rds")
203 | 
204 |     assert isinstance(src_df, pd.DataFrame)
205 | 
206 | 
207 | # pin_write ----
208 | 
209 | # manifest -----
210 | 
211 | 
212 | def test_board_pin_write_manifest_name_error(board_manifest):
213 |     if board_manifest.fs.protocol == "rsc":
214 |         pytest.skip()
215 | 
216 |     with pytest.raises(ValueError) as exc_info:
217 |         board_manifest.pin_write([1], "_pins.yaml", type="json")
218 | 
219 |     assert "name '_pins.yaml' is reserved for internal use." in exc_info.value.args[0]
220 | 
221 | 
222 | def test_board_manifest_pin_list_no_internal_name(board_manifest):
223 |     assert set(board_manifest.pin_list()) == {"x", "y"}
224 | 
225 | 
226 | def test_board_manifest_pin_exist_internal_name_errors(board_manifest):
227 |     with pytest.raises(ValueError) as exc_info:
228 |         board_manifest.pin_exists("_pins.yaml")
229 | 
230 |     assert "reserved for internal use." in exc_info.value.args[0]
231 | 
232 | 
233 | def test_board_manifest_pin_read_internal_errors(board_manifest):
234 |     with pytest.raises(ValueError) as exc_info:
235 |         board_manifest.pin_read("_pins.yaml")
236 | 
237 |     assert "reserved for internal use." in exc_info.value.args[0]
238 | 
239 | 
240 | def test_board_manifest_pin_search(board_manifest):
241 |     res = board_manifest.pin_search("x", as_df=False)
242 | 
243 |     assert len(res) == 1
244 |     assert res[0].name == "x"
245 | 


--------------------------------------------------------------------------------
/pins/tests/test_compat_old_types.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from importlib_resources import files
 3 | 
 4 | from pins import board_folder
 5 | 
 6 | OLD_BOARD = files("pins") / "tests" / "pins-old-types"
 7 | DST_DF = pd.DataFrame({"a": [1, 2], "b": ["x", "y"]})
 8 | 
 9 | 
10 | def test_compat_old_types_load_table():
11 |     board = board_folder(OLD_BOARD)
12 |     src_df = board.pin_read("a-table")
13 | 
14 |     assert isinstance(src_df, pd.DataFrame)
15 |     assert src_df.equals(DST_DF)
16 | 


--------------------------------------------------------------------------------
/pins/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | from pins import config
 6 | from pins.tests.helpers import rm_env
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def env_unset():
11 |     with rm_env(
12 |         config.PINS_ENV_DATA_DIR,
13 |         config.PINS_ENV_CACHE_DIR,
14 |         config.PINS_ENV_INSECURE_READ,
15 |     ):
16 |         yield
17 | 
18 | 
19 | def test_allow_pickle_read_no_env(env_unset):
20 |     assert config.get_allow_pickle_read(True) is True
21 |     assert config.get_allow_pickle_read(False) is False
22 | 
23 | 
24 | def test_allow_pickle_read_env_1(env_unset):
25 |     os.environ[config.PINS_ENV_INSECURE_READ] = "1"
26 | 
27 |     assert config.get_allow_pickle_read(True) is True
28 |     assert config.get_allow_pickle_read(False) is False
29 |     assert config.get_allow_pickle_read(None) is True
30 | 
31 | 
32 | def test_allow_pickle_read_env_0(env_unset):
33 |     os.environ[config.PINS_ENV_INSECURE_READ] = "0"
34 | 
35 |     assert config.get_allow_pickle_read(True) is True
36 |     assert config.get_allow_pickle_read(False) is False
37 |     assert config.get_allow_pickle_read(None) is False
38 | 


--------------------------------------------------------------------------------
/pins/tests/test_constructors.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | 
  4 | import pandas as pd
  5 | import pytest
  6 | from pandas.testing import assert_frame_equal
  7 | 
  8 | from pins import constructors as c
  9 | from pins.tests.conftest import (
 10 |     EXAMPLE_REL_PATH,
 11 |     PATH_TO_EXAMPLE_BOARD,
 12 |     PATH_TO_EXAMPLE_VERSION,
 13 | )
 14 | from pins.tests.helpers import rm_env, skip_if_dbc
 15 | 
 16 | 
 17 | @pytest.fixture
 18 | def df_csv():
 19 |     return pd.read_csv(PATH_TO_EXAMPLE_VERSION / "df_csv.csv")
 20 | 
 21 | 
 22 | def check_dir_writable(p_dir):
 23 |     assert p_dir.parent.exists()
 24 |     assert os.access(p_dir.parent.absolute(), os.W_OK)
 25 | 
 26 | 
 27 | def check_cache_file_path(p_file, p_cache):
 28 |     rel_path = p_file.relative_to(p_cache)
 29 | 
 30 |     # parents has every entry you'd get if you called .parents all the way to some root.
 31 |     # for a relative path, the root is likely ".", so we subtract 1 to get the number
 32 |     # of parent directories.
 33 |     # note this essentially counts slashes, in a inter-OS friendly way.
 34 |     n_parents = len(rel_path.parents) - 1
 35 |     assert n_parents == 2
 36 | 
 37 | 
 38 | def construct_from_board(board):
 39 |     prot = board.fs.protocol
 40 |     fs_name = prot if isinstance(prot, str) else prot[0]
 41 | 
 42 |     if fs_name in ["file", ("file", "local")]:
 43 |         board = c.board_folder(board.board)
 44 |     elif fs_name == "dbc":
 45 |         board = c.board_databricks(board.board)
 46 |     elif fs_name == "rsc":
 47 |         board = c.board_rsconnect(
 48 |             server_url=board.fs.api.server_url, api_key=board.fs.api.api_key
 49 |         )
 50 |     elif fs_name == "abfs":
 51 |         board = c.board_azure(board.board)
 52 |     elif fs_name == "gs":
 53 |         board = c.board_gcs(board.board)
 54 |     else:
 55 |         board = getattr(c, f"board_{fs_name}")(board.board)
 56 | 
 57 |     return board
 58 | 
 59 | 
 60 | # End-to-end constructor tests
 61 | 
 62 | 
 63 | # there are two facets of boards: reading and writing.
 64 | # copied from test_compat
 65 | @pytest.mark.skip_on_github
 66 | def test_constructor_board_url_data(tmp_cache, http_example_board_path, df_csv):
 67 |     board = c.board_url(
 68 |         http_example_board_path,
 69 |         # could derive from example version path
 70 |         pin_paths={"df_csv": "df_csv/20220214T163720Z-9bfad/"},
 71 |     )
 72 | 
 73 |     df = board.pin_read("df_csv")
 74 | 
 75 |     # check data ----
 76 |     assert_frame_equal(df, df_csv)
 77 | 
 78 | 
 79 | @pytest.mark.xfail
 80 | @pytest.mark.skip_on_github
 81 | def test_constructor_board_url_cache(
 82 |     tmp_cache, http_example_board_path, df_csv, tmp_path
 83 | ):
 84 |     # TODO: downloading a pin does not put files in the same directory, since
 85 |     # in this case we are hashing on the full url.
 86 | 
 87 |     board = c.board_url(
 88 |         http_example_board_path,
 89 |         # could derive from example version path
 90 |         pin_paths={"df_csv": "df_csv/20220214T163718Z-eceac/"},
 91 |     )
 92 | 
 93 |     board.pin_read("df_csv")
 94 | 
 95 |     # cannot write or view pin versions
 96 | 
 97 |     with pytest.raises(NotImplementedError):
 98 |         board.pin_write(df_csv)
 99 |     with pytest.raises(NotImplementedError):
100 |         board.pin_versions("df_csv")
101 |     with pytest.raises(NotImplementedError):
102 |         board.pin_version_delete(name="df_csv", version="20220214T163718Z")
103 |     with pytest.raises(NotImplementedError):
104 |         df = pd.DataFrame({"x": [1, 2, 3]})
105 |         path = tmp_path / "data.csv"
106 |         df.to_csv(path, index=False)
107 |         board.pin_upload(path, "cool_pin")
108 | 
109 |     # check cache ----
110 |     http_dirs = list(tmp_cache.glob("http_*"))
111 | 
112 |     assert len(http_dirs) == 1
113 | 
114 |     # there are two files in the flat cache (metadata, and the csv)
115 |     parent = http_dirs[0]
116 |     res = list(parent.rglob("*"))
117 |     assert len(res) == 2
118 | 
119 |     # validate that it creates an empty metadata file
120 |     assert len(x for x in res if x.endswith("df_csv.csv")) == 1
121 |     assert len(x for x in res if x.endswith("data.txt")) == 1
122 | 
123 |     assert len(list(parent.glob("**/*"))) == 2
124 | 
125 | 
126 | @pytest.mark.skip_on_github
127 | def test_constructor_board_url_file(tmp_cache, http_example_board_path):
128 |     # TODO: downloading a pin does not put files in the same directory, since
129 |     # in this case we are hashing on the full url.
130 | 
131 |     board = c.board_url(
132 |         http_example_board_path,
133 |         # could derive from example version path
134 |         pin_paths={"df_csv": "df_csv/20220214T163718Z-eceac/df_csv.csv"},
135 |     )
136 | 
137 |     board.pin_download("df_csv")
138 | 
139 |     # check cache ----
140 |     http_dirs = list(tmp_cache.glob("http_*"))
141 | 
142 |     assert len(http_dirs) == 1
143 | 
144 |     # there are two files in the flat cache (metadata, and the csv)
145 |     parent = http_dirs[0]
146 |     res = list(parent.rglob("*"))
147 |     assert len(res) == 1
148 | 
149 |     assert str(res[0]).endswith("df_csv.csv")
150 | 
151 |     new_board = eval(c.board_deparse(board), c.__dict__)
152 |     assert new_board.pin_list() == board.pin_list()
153 | 
154 | 
155 | @pytest.mark.skip_on_github
156 | def test_constructor_board_github(tmp_cache, http_example_board_path, df_csv):
157 |     board = c.board_github("rstudio", "pins-python", EXAMPLE_REL_PATH)  # noqa
158 | 
159 |     df = board.pin_read("df_csv")
160 |     assert_frame_equal(df, df_csv)
161 | 
162 |     cache_options = list(tmp_cache.glob("github_*"))
163 |     assert len(cache_options) == 1
164 |     cache_dir = cache_options[0]
165 | 
166 |     res = list(cache_dir.rglob("**/*.csv"))
167 |     assert len(res) == 1
168 | 
169 |     check_cache_file_path(res[0], cache_dir)
170 | 
171 | 
172 | @pytest.fixture(scope="function")
173 | def board(backend):
174 |     # TODO: copied from test_compat.py
175 | 
176 |     board = backend.create_tmp_board(str(PATH_TO_EXAMPLE_BOARD.absolute()))
177 |     yield board
178 |     backend.teardown_board(board)
179 | 
180 | 
181 | @skip_if_dbc  # passes, but skipping since this cannot clean itself up properly
182 | def test_constructor_boards(board, df_csv, tmp_cache):
183 |     # TODO: would be nice to have fixtures for each board constructor
184 |     # doesn't need to copy over pins-compat content
185 | 
186 |     # create board from constructor -------------------------------------------
187 |     board = construct_from_board(board)
188 | 
189 |     # read a pin and check its contents ---------------------------------------
190 | 
191 |     df = board.pin_read("df_csv")
192 | 
193 |     # check data
194 |     # TODO: update when dbc boards are not read-only
195 |     if board.fs.protocol == "dbc":
196 |         pass
197 |     else:
198 |         assert_frame_equal(df, df_csv)
199 | 
200 |     # check the cache structure -----------------------------------------------
201 | 
202 |     # check cache
203 |     if board.fs.protocol in ["file", ("file", "local")]:
204 |         # no caching for local file boards
205 |         pass
206 |     else:
207 |         # check path structure ----
208 | 
209 |         options = list(tmp_cache.glob("*"))
210 |         assert len(options) == 1
211 | 
212 |         cache_dir = options[0]
213 |         res = list(cache_dir.rglob("*/*.csv"))
214 |         assert len(res) == 1
215 | 
216 |         check_cache_file_path(res[0], cache_dir)
217 | 
218 |         # check cache touch on access time ----
219 | 
220 |         meta = board.pin_meta("df_csv")
221 |         p_cache_meta = (
222 |             Path(board._get_cache_path(meta.name, meta.version.version)) / "data.txt"
223 |         )
224 |         orig_access = p_cache_meta.stat().st_atime
225 | 
226 |         board.pin_meta("df_csv")
227 | 
228 |         new_access = p_cache_meta.stat().st_atime
229 | 
230 |         assert orig_access < new_access
231 | 
232 | 
233 | @pytest.fixture(scope="function")
234 | def board2(backend):
235 |     board2 = backend.create_tmp_board()
236 |     yield board2
237 |     backend.teardown_board(board2)
238 | 
239 | 
240 | @skip_if_dbc
241 | def test_constructor_boards_multi_user(board2, df_csv, tmp_cache):
242 |     prot = board2.fs.protocol
243 |     fs_name = prot if isinstance(prot, str) else prot[0]
244 | 
245 |     if fs_name == "rsc":
246 |         # TODO: RSConnect writes pin names like <user>/<name>, so would need to
247 |         # modify test
248 |         pytest.skip()
249 |     elif fs_name == "abfs":
250 |         fs_name = "azure"
251 | 
252 |     first = construct_from_board(board2)
253 | 
254 |     first.pin_write(df_csv, "df_csv", type="csv")
255 |     assert first.pin_list() == ["df_csv"]
256 | 
257 |     second = construct_from_board(board2)
258 |     second.pin_write(df_csv, "another_df_csv", type="csv")
259 | 
260 |     assert sorted(second.pin_list()) == sorted(["df_csv", "another_df_csv"])
261 | 
262 | 
263 | # Board particulars ===========================================================
264 | 
265 | 
266 | @pytest.mark.skip_on_github
267 | def test_board_constructor_local_default_writable():
268 |     with rm_env("PINS_DATA_DIR"):
269 |         board = c.board_local()
270 |         p_board = Path(board.board)
271 | 
272 |         check_dir_writable(p_board)
273 |         assert p_board.name == "pins-py"
274 | 
275 | 
276 | def test_board_constructor_temp_writable():
277 |     with rm_env("PINS_DATA_DIR"):
278 |         board = c.board_temp()
279 |         p_board = Path(board.board)
280 | 
281 |         check_dir_writable(p_board)
282 |         assert len(list(p_board.glob("*"))) == 0
283 | 
284 | 
285 | def test_board_constructor_folder(tmp_path: Path, df):
286 |     board = c.board_folder(str(tmp_path))
287 |     board.pin_write(df, "some_df", type="csv")
288 | 
289 |     assert (tmp_path / "some_df").exists()
290 |     df2 = board.pin_read("some_df")
291 | 
292 |     assert df.equals(df2)
293 | 
294 | 
295 | # Deparsing ===================================================================
296 | 
297 | 
298 | def test_board_deparse(board):
299 |     prot = board.fs.protocol
300 | 
301 |     with rm_env("CONNECT_API_KEY"):
302 |         if prot == "rsc":
303 |             os.environ["CONNECT_API_KEY"] = board.fs.api.api_key
304 | 
305 |         new_board = eval(c.board_deparse(board), c.__dict__)
306 |         new_board.pin_list()
307 | 


--------------------------------------------------------------------------------
/pins/tests/test_drivers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from pathlib import Path
  4 | 
  5 | import fsspec
  6 | import pandas as pd
  7 | import pytest
  8 | 
  9 | from pins._adaptors import create_adaptor
 10 | from pins.config import PINS_ENV_INSECURE_READ
 11 | from pins.drivers import default_title, load_data, load_path, save_data
 12 | from pins.errors import PinsInsecureReadError
 13 | from pins.meta import MetaRaw
 14 | from pins.tests.helpers import rm_env
 15 | 
 16 | 
 17 | @pytest.fixture
 18 | def some_joblib(tmp_path: Path):
 19 |     import joblib
 20 | 
 21 |     p_obj = tmp_path / "some.joblib"
 22 |     joblib.dump({"a": 1}, p_obj)
 23 | 
 24 |     return p_obj
 25 | 
 26 | 
 27 | # default title ---------------------------------------------------------------
 28 | 
 29 | 
 30 | class ExC:
 31 |     class D:
 32 |         pass
 33 | 
 34 | 
 35 | @pytest.mark.parametrize(
 36 |     "obj, dst_title",
 37 |     [
 38 |         (pd.DataFrame({"x": [1, 2]}), "somename: a pinned 2 x 1 DataFrame"),
 39 |         (pd.DataFrame({"x": [1], "y": [2]}), "somename: a pinned 1 x 2 DataFrame"),
 40 |         (ExC(), "somename: a pinned ExC object"),
 41 |         (ExC().D(), "somename: a pinned ExC.D object"),
 42 |         ([1, 2, 3], "somename: a pinned list object"),
 43 |     ],
 44 | )
 45 | def test_default_title(obj, dst_title):
 46 |     res = default_title(obj, "somename")
 47 |     assert res == dst_title
 48 | 
 49 | 
 50 | @pytest.mark.parametrize(
 51 |     "type_",
 52 |     [
 53 |         "csv",
 54 |         "arrow",
 55 |         "parquet",
 56 |         "joblib",
 57 |     ],
 58 | )
 59 | def test_driver_roundtrip(tmp_path: Path, type_):
 60 |     # TODO: I think this test highlights the challenge of getting the flow
 61 |     # between metadata, drivers, and the metafactory right.
 62 |     # There is the name of the data (relative to the pin directory), and the full
 63 |     # name of data in its temporary directory.
 64 |     import pandas as pd
 65 | 
 66 |     df = pd.DataFrame({"x": [1, 2, 3]})
 67 | 
 68 |     fname = "some_df"
 69 |     full_file = f"{fname}.{type_}"
 70 | 
 71 |     p_obj = tmp_path / fname
 72 |     res_fname = save_data(df, p_obj, type_)
 73 | 
 74 |     assert Path(res_fname).name == full_file
 75 | 
 76 |     meta = MetaRaw(full_file, type_, "my_pin")
 77 |     obj = load_data(meta, fsspec.filesystem("file"), tmp_path, allow_pickle_read=True)
 78 | 
 79 |     assert df.equals(obj)
 80 | 
 81 | 
 82 | @pytest.mark.parametrize(
 83 |     "type_",
 84 |     [
 85 |         "json",
 86 |     ],
 87 | )
 88 | def test_driver_roundtrip_json(tmp_path: Path, type_):
 89 |     df = {"x": [1, 2, 3]}
 90 | 
 91 |     fname = "some_df"
 92 |     full_file = f"{fname}.{type_}"
 93 | 
 94 |     p_obj = tmp_path / fname
 95 |     res_fname = save_data(df, p_obj, type_)
 96 | 
 97 |     assert Path(res_fname).name == full_file
 98 | 
 99 |     meta = MetaRaw(full_file, type_, "my_pin")
100 |     obj = load_data(meta, fsspec.filesystem("file"), tmp_path, allow_pickle_read=True)
101 | 
102 |     assert df == obj
103 | 
104 | 
105 | def test_driver_feather_write_error(tmp_path: Path):
106 |     import pandas as pd
107 | 
108 |     df = pd.DataFrame({"x": [1, 2, 3]})
109 | 
110 |     fname = "some_df"
111 | 
112 |     p_obj = tmp_path / fname
113 | 
114 |     with pytest.raises(NotImplementedError) as exc_info:
115 |         save_data(df, p_obj, "feather")
116 | 
117 |     assert '"feather" no longer supported.' in exc_info.value.args[0]
118 | 
119 | 
120 | def test_driver_feather_read_backwards_compat(tmp_path: Path):
121 |     import pandas as pd
122 | 
123 |     df = pd.DataFrame({"x": [1, 2, 3]})
124 | 
125 |     fname = "some_df"
126 |     full_file = f"{fname}.feather"
127 | 
128 |     df.to_feather(tmp_path / full_file)
129 | 
130 |     obj = load_data(
131 |         MetaRaw(full_file, "feather", "my_pin"), fsspec.filesystem("file"), tmp_path
132 |     )
133 | 
134 |     assert df.equals(obj)
135 | 
136 | 
137 | def test_driver_pickle_read_fail_explicit(some_joblib):
138 |     meta = MetaRaw(some_joblib.name, "joblib", "my_pin")
139 |     with pytest.raises(PinsInsecureReadError):
140 |         load_data(
141 |             meta, fsspec.filesystem("file"), some_joblib.parent, allow_pickle_read=False
142 |         )
143 | 
144 | 
145 | def test_driver_pickle_read_fail_default(some_joblib):
146 |     meta = MetaRaw(some_joblib.name, "joblib", "my_pin")
147 |     with rm_env(PINS_ENV_INSECURE_READ), pytest.raises(PinsInsecureReadError):
148 |         load_data(
149 |             meta, fsspec.filesystem("file"), some_joblib.parent, allow_pickle_read=False
150 |         )
151 | 
152 | 
153 | def test_driver_apply_suffix_false(tmp_path: Path):
154 |     import pandas as pd
155 | 
156 |     df = pd.DataFrame({"x": [1, 2, 3]})
157 | 
158 |     fname = "some_df"
159 |     type_ = "csv"
160 | 
161 |     p_obj = tmp_path / fname
162 |     res_fname = save_data(df, p_obj, type_, apply_suffix=False)
163 | 
164 |     assert Path(res_fname).name == "some_df"
165 | 
166 | 
167 | class TestSaveData:
168 |     def test_accepts_pandas_df(self, tmp_path: Path):
169 |         import pandas as pd
170 | 
171 |         df = pd.DataFrame({"x": [1, 2, 3]})
172 |         result = save_data(df, tmp_path / "some_df", "csv")
173 |         assert Path(result) == tmp_path / "some_df.csv"
174 | 
175 |     def test_accepts_adaptor(self, tmp_path: Path):
176 |         import pandas as pd
177 | 
178 |         df = pd.DataFrame({"x": [1, 2, 3]})
179 |         adaptor = create_adaptor(df)
180 |         result = save_data(adaptor, tmp_path / "some_df", "csv")
181 |         assert Path(result) == tmp_path / "some_df.csv"
182 | 
183 | 
184 | class TestLoadFile:
185 |     def test_str_file(self):
186 |         class _MockMetaStrFile:
187 |             file: str = "a"
188 |             type: str = "csv"
189 | 
190 |         assert load_path(_MockMetaStrFile().file, None, _MockMetaStrFile().type) == "a"
191 | 
192 |     def test_table(self):
193 |         class _MockMetaTable:
194 |             file: str = "a"
195 |             type: str = "table"
196 | 
197 |         assert load_path(_MockMetaTable().file, None, _MockMetaTable().type) == "data.csv"
198 | 
199 |     def test_version(self):
200 |         class _MockMetaTable:
201 |             file: str = "a"
202 |             type: str = "csv"
203 | 
204 |         assert load_path(_MockMetaTable().file, "v1", _MockMetaTable().type) == "v1/a"
205 | 


--------------------------------------------------------------------------------
/pins/tests/test_meta.py:
--------------------------------------------------------------------------------
  1 | import tempfile
  2 | from datetime import datetime
  3 | from io import StringIO
  4 | 
  5 | import pytest
  6 | import yaml
  7 | 
  8 | from pins.meta import Meta, MetaFactory
  9 | from pins.versions import Version
 10 | 
 11 | META_DEFAULTS = {
 12 |     "title": "some title",
 13 |     "description": "some description",
 14 |     "file": "some_file.csv",
 15 |     "file_size": 3,
 16 |     "pin_hash": "abcdef",
 17 |     "created": "20001230T124647Z",
 18 |     "type": "csv",
 19 |     "api_version": 1,
 20 |     "version": Version(datetime(2000, 12, 30, 12, 46, 47), "abcdef"),
 21 | }
 22 | 
 23 | 
 24 | @pytest.fixture
 25 | def meta():
 26 |     return Meta(**META_DEFAULTS)
 27 | 
 28 | 
 29 | @pytest.mark.xfail
 30 | def test_meta_to_dict_is_recursive(meta):
 31 |     d_meta = meta.to_dict()
 32 |     assert d_meta["version"] == meta.version.to_dict()
 33 | 
 34 | 
 35 | def test_meta_to_pin_dict_roundtrip(meta):
 36 |     d_meta = meta.to_pin_dict()
 37 |     meta2 = Meta.from_pin_dict(d_meta, meta.name, meta.version)
 38 |     assert meta == meta2
 39 | 
 40 | 
 41 | def test_meta_unknown_fields():
 42 |     m = Meta(**META_DEFAULTS, unknown_fields={"some_other_field": 1})
 43 | 
 44 |     assert m.some_other_field == 1
 45 | 
 46 |     with pytest.raises(AttributeError):
 47 |         m.should_not_exist_here
 48 | 
 49 |     assert "unknown_fields" not in m.to_pin_dict()
 50 |     assert "some_other_field" not in m.to_pin_dict()
 51 | 
 52 | 
 53 | def test_meta_factory_create():
 54 |     mf = MetaFactory()
 55 |     with tempfile.TemporaryDirectory() as tmp_dir:
 56 |         tmp_file = f"{tmp_dir}/some_name"
 57 |         with open(tmp_file, "wb") as f:
 58 |             f.write(b"test")
 59 | 
 60 |         kwargs = {
 61 |             "title": "some title",
 62 |             "description": "some description",
 63 |             "user": {},
 64 |             "type": "csv",
 65 |             "name": "some_name",
 66 |         }
 67 | 
 68 |         meta = mf.create(tmp_dir, tmp_file, **kwargs)
 69 | 
 70 |         # test that kwargs are passed through ----
 71 |         for k, v in kwargs.items():
 72 |             assert getattr(meta, k) == v
 73 | 
 74 |         # test calculated fields ----
 75 |         # TODO(compat): should append suffix to name attr (like in R pins)?
 76 |         #       otherwise, will break cross compat?
 77 |         assert meta.file == "some_name"
 78 |         assert meta.file_size == 4
 79 | 
 80 | 
 81 | def test_meta_factory_read_yaml_roundtrip(meta):
 82 |     pin_yaml = meta.to_pin_yaml()
 83 | 
 84 |     mf = MetaFactory()
 85 |     meta2 = mf.read_pin_yaml(StringIO(pin_yaml), meta.name, meta.version)
 86 | 
 87 |     assert meta == meta2
 88 | 
 89 | 
 90 | def test_meta_factory_roundtrip_unknown(meta):
 91 |     meta_dict = meta.to_pin_dict()
 92 |     meta_dict["some_other_field"] = 1
 93 | 
 94 |     pin_yaml = yaml.dump(meta_dict)
 95 | 
 96 |     mf = MetaFactory()
 97 | 
 98 |     meta2 = mf.read_pin_yaml(StringIO(pin_yaml), meta.name, meta.version)
 99 | 
100 |     assert meta2 == meta
101 |     assert meta2.some_other_field == 1
102 | 


--------------------------------------------------------------------------------
/pins/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from pins.config import pins_options
 4 | from pins.utils import inform
 5 | 
 6 | 
 7 | @pytest.fixture
 8 | def quiet():
 9 |     orig = pins_options.quiet
10 |     pins_options.quiet = True
11 |     yield
12 |     pins_options.quiet = orig
13 | 
14 | 
15 | def test_inform(capsys):
16 |     msg = "a message"
17 |     inform(None, msg)
18 |     captured = capsys.readouterr()
19 |     assert captured.err == msg + "\n"
20 | 
21 | 
22 | def test_inform_quiet(quiet, capsys):
23 |     inform(None, "a message")
24 |     captured = capsys.readouterr()
25 |     assert captured.err == ""
26 | 


--------------------------------------------------------------------------------
/pins/tests/test_versions.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from io import BytesIO
 3 | 
 4 | import pytest
 5 | import xxhash
 6 | 
 7 | from pins.errors import PinsVersionError
 8 | from pins.versions import Version
 9 | 
10 | EXAMPLE_DATE = datetime(2021, 1, 2, 13, 58, 59)
11 | 
12 | 
13 | @pytest.fixture
14 | def bytes_():
15 |     return BytesIO(b"123"), xxhash.xxh64(b"123").hexdigest()
16 | 
17 | 
18 | def test_version_from_string():
19 |     version = Version.from_string("20220209T220116Z-baf3f")
20 |     assert str(version.created) == "2022-02-09 22:01:16"
21 |     assert version.hash == "baf3f"
22 | 
23 | 
24 | def test_version_from_string_too_many_hyphens():
25 |     with pytest.raises(
26 |         PinsVersionError, match="version string can only have 1 '-', but contains 2"
27 |     ):
28 |         Version.from_string("20220209T220116Z-baf3f-")
29 | 
30 | 
31 | def test_version_from_string_too_few_hyphens():
32 |     with pytest.raises(
33 |         PinsVersionError, match="version string can only have 1 '-', but contains 0"
34 |     ):
35 |         Version.from_string("20220209T220116Zbaf3f")
36 | 
37 | 
38 | def test_version_from_string_baddate():
39 |     with pytest.raises(PinsVersionError, match="Invalid date part of version: bug"):
40 |         Version.from_string("bug-baf3f")
41 | 
42 | 
43 | def test_version_hash_file(bytes_):
44 |     f_bytes, digest = bytes_
45 |     assert Version.hash_file(f_bytes) == digest
46 | 
47 | 
48 | def test_version_from_files(bytes_):
49 |     f_bytes, digest = bytes_
50 |     v = Version.from_files([f_bytes], EXAMPLE_DATE)
51 | 
52 |     assert v.hash == digest
53 |     assert v.created == EXAMPLE_DATE
54 | 


--------------------------------------------------------------------------------
/pins/utils.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import os
 3 | import sys
 4 | from functools import update_wrapper
 5 | from types import MethodType
 6 | from warnings import warn
 7 | 
 8 | from .config import pins_options
 9 | 
10 | 
11 | def inform(log, msg):
12 |     if log is not None:
13 |         log.info(msg)
14 | 
15 |     if not pins_options.quiet:
16 |         print(msg, file=sys.stderr)
17 | 
18 | 
19 | def warn_deprecated(msg):
20 |     warn(msg, DeprecationWarning)
21 | 
22 | 
23 | def hash_name(path, same_name):
24 |     if same_name:
25 |         _hash = os.path.basename(path)
26 |     else:
27 |         _hash = hashlib.sha256(path.encode()).hexdigest()
28 |     return _hash
29 | 
30 | 
31 | class ExtendMethodDoc:
32 |     # Note that the indentation assumes these are top-level method docstrings,
33 |     # so are indented 8 spaces (after the initial sentence).
34 |     template = """\
35 | {current_doc}
36 | 
37 |         Parent method documentation:
38 | 
39 |         {parent_doc}
40 |         """
41 | 
42 |     def __init__(self, func):
43 |         self.func = func
44 | 
45 |         # allows sphinx to add the method signature to the docs
46 |         # this is pretty benign, since it's very hard to call a descriptor
47 |         # after class initialization (where __set_name__ is called).
48 |         self.__call__ = func
49 | 
50 |     def __set_name__(self, owner, name):
51 |         bound_parent_meth = getattr(super(owner, owner), name)
52 | 
53 |         self._parent_doc = bound_parent_meth.__doc__
54 |         self._orig_doc = self.func.__doc__
55 | 
56 |         if self._orig_doc is not None:
57 |             # update the docstring of the subclass method to include parent doc.
58 |             self.func.__doc__ = self.template.format(
59 |                 current_doc=self._orig_doc, parent_doc=self._parent_doc
60 |             )
61 | 
62 |         # make descriptor look like wrapped function
63 |         update_wrapper(
64 |             self, self.func, ("__doc__", "__name__", "__module__", "__qualname__")
65 |         )
66 | 
67 |     def __get__(self, obj, objtype=None):
68 |         if obj is None:
69 |             # accessing from class, return descriptor itself.
70 |             return self
71 | 
72 |         # accessing from instance
73 |         return MethodType(self.func, obj)
74 | 
75 |     def __call__(self, *args, **kwargs):
76 |         # this is defined, so that callable(ExtendMethodDoc(...)) is True,
77 |         # which allows all the inspect machinery to give sphinx the __call__
78 |         # attribute we set in __init__.
79 |         raise NotImplementedError()
80 | 
81 | 
82 | # based off fsspec.isfilelike
83 | def isfilelike(file) -> bool:
84 |     for attr in ["read", "close", "tell"]:
85 |         if not hasattr(file, attr):
86 |             return False
87 |     return True
88 | 


--------------------------------------------------------------------------------
/pins/versions.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import logging
  4 | from collections.abc import Mapping, Sequence
  5 | from dataclasses import asdict, dataclass
  6 | from datetime import datetime
  7 | from pathlib import Path
  8 | 
  9 | from xxhash import xxh64
 10 | 
 11 | from ._types import IOBase, StrOrFile
 12 | from .errors import PinsVersionError
 13 | 
 14 | _log = logging.getLogger(__name__)
 15 | 
 16 | VERSION_TIME_FORMAT = "%Y%m%dT%H%M%SZ"
 17 | 
 18 | 
 19 | class _VersionBase:
 20 |     pass
 21 | 
 22 | 
 23 | @dataclass
 24 | class VersionRaw(_VersionBase):
 25 |     version: str
 26 | 
 27 |     def to_dict(self) -> Mapping:
 28 |         return asdict(self)
 29 | 
 30 | 
 31 | @dataclass
 32 | class Version(_VersionBase):
 33 |     created: datetime
 34 |     hash: str
 35 | 
 36 |     def to_dict(self) -> Mapping:
 37 |         # properties not automatically added, so need to handle manually
 38 |         res = asdict(self)
 39 |         res["version"] = self.version
 40 | 
 41 |         return res
 42 | 
 43 |     @property
 44 |     def version(self) -> str:
 45 |         date_part = self.created.strftime(VERSION_TIME_FORMAT)
 46 |         hash_part = self.hash[:5]
 47 |         return f"{date_part}-{hash_part}"
 48 | 
 49 |     @staticmethod
 50 |     def parse_created(x):
 51 |         return datetime.strptime(x, VERSION_TIME_FORMAT)
 52 | 
 53 |     def render_created(self):
 54 |         return self.created.strftime(VERSION_TIME_FORMAT)
 55 | 
 56 |     @staticmethod
 57 |     def hash_file(f: IOBase, block_size: int = -1) -> str:
 58 |         # TODO: what kind of things implement the "buffer API"?
 59 |         hasher = xxh64()
 60 |         buf = f.read(block_size)
 61 |         while len(buf) > 0:
 62 |             hasher.update(buf)
 63 |             buf = f.read(block_size)
 64 | 
 65 |         return hasher.hexdigest()
 66 | 
 67 |     @classmethod
 68 |     def from_string(cls, version: str) -> Version:
 69 |         parts = version.split("-")
 70 | 
 71 |         if len(parts) != 2:
 72 |             raise PinsVersionError(
 73 |                 f"version string can only have 1 '-', but contains {len(parts) - 1}"
 74 |             )
 75 | 
 76 |         dt_string, hash_ = parts
 77 | 
 78 |         # TODO: the datetime from pins is not timezone aware, but it looks like
 79 |         # R pins parses as UTC, then unsets the UTC part?
 80 |         try:
 81 |             created = cls.parse_created(dt_string)
 82 |         except ValueError:
 83 |             raise PinsVersionError(f"Invalid date part of version: {dt_string}")
 84 | 
 85 |         obj = cls(created, hash_)
 86 | 
 87 |         if obj.version != version:
 88 |             raise ValueError(
 89 |                 "Version parsing failed. Received version string {version}, but "
 90 |                 "output version is {cls.version}."
 91 |             )
 92 | 
 93 |         return obj
 94 | 
 95 |     @classmethod
 96 |     def from_files(
 97 |         cls, files: Sequence[StrOrFile], created: datetime | None = None
 98 |     ) -> Version:
 99 |         hashes = []
100 |         for f in files:
101 |             hash_ = cls.hash_file(open(f, "rb") if isinstance(f, (str, Path)) else f)
102 |             hashes.append(hash_)
103 | 
104 |         if created is None:
105 |             created = datetime.now()
106 | 
107 |         if len(hashes) > 1:
108 |             # Combine the hashes into a single string
109 |             combined_hashes = "".join(hashes)
110 | 
111 |             # Create an xxh64 hash of the combined string
112 |             hashes = [xxh64(combined_hashes).hexdigest()]
113 | 
114 |         return cls(created, hashes[0])
115 | 
116 |     @classmethod
117 |     def from_meta_fields(cls, created: str, hash: str):
118 |         created_dt = cls.parse_created(created)
119 |         return cls(created_dt, hash)
120 | 
121 | 
122 | def guess_version(x: str):
123 |     try:
124 |         return Version.from_string(x)
125 |     except PinsVersionError:
126 |         return VersionRaw(x)
127 | 
128 | 
129 | def version_setup(board, name, new_version, versioned):
130 |     if board.pin_exists(name):
131 |         versions_df = board.pin_versions(name, as_df=True)
132 |         versions = versions_df["version"].to_list()
133 |         old_version = versions[-1]
134 |         n_versions = len(versions)
135 | 
136 |     else:
137 |         n_versions = 0
138 | 
139 |     # if pin does not have version specified, see if multiple pins on board/board's version
140 |     if versioned is None:
141 |         versioned = True if n_versions > 1 else board.versioned
142 | 
143 |     if versioned or n_versions == 0:
144 |         _log.info(f"Creating new version '{new_version}'")
145 |     elif n_versions == 1:
146 |         _log.info(f"Replacing version '{old_version}' with '{new_version}'")
147 |         board.pin_version_delete(name, old_version)
148 |     else:
149 |         raise PinsVersionError(
150 |             "Pin is versioned, but you have requested a write without versions."
151 |             "To un-version a pin, you must delete it"
152 |         )
153 | 
154 |     return new_version
155 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "pins"
  3 | description = "Publish data sets, models, and other python objects, making it easy to share them across projects and with your colleagues."
  4 | readme.content-type = "text/markdown"
  5 | readme.file = "README.md"
  6 | urls.Documentation = "https://rstudio.github.io/pins-python"
  7 | urls.Homepage = "https://github.com/rstudio/pins-python"
  8 | authors = [{ name = "Michael Chow", email = "michael.chow@posit.co" }]
  9 | maintainers = [{ name = "Isabel Zimmerman", email = "isabel.zimmerman@posit.co" }]
 10 | keywords = ["data", "tidyverse"]
 11 | classifiers = [
 12 |     "Programming Language :: Python :: 3 :: Only",
 13 |     "Programming Language :: Python :: 3.9",
 14 |     "Programming Language :: Python :: 3.10",
 15 |     "Programming Language :: Python :: 3.11",
 16 |     "Programming Language :: Python :: 3.12",
 17 |     "Programming Language :: Python :: 3.13",
 18 |     "License :: OSI Approved :: MIT License",
 19 | ]
 20 | requires-python = ">=3.9"
 21 | dynamic = ["version"]
 22 | dependencies = [
 23 |     "appdirs<2",                # Using appdirs rather than platformdirs is deliberate, see https://github.com/rstudio/pins-python/pull/239
 24 |     "fsspec>=2022.2",
 25 |     "humanize>=1",
 26 |     "importlib-metadata>=4.4",
 27 |     "importlib-resources>=1.3",
 28 |     "jinja2>=2.10",
 29 |     "joblib>=0.12",
 30 |     "pandas>=0.23",
 31 |     "pyyaml>=3.13",
 32 |     "requests",
 33 |     "xxhash>=1",
 34 |     "databackend>=0.0.3",
 35 |     "typing_extensions"
 36 | ]
 37 | 
 38 | [project.optional-dependencies]
 39 | aws = ["s3fs"]
 40 | azure = ["adlfs"]
 41 | check = [
 42 |     "pre-commit",
 43 |     "pyright==1.1.372", # Pinned; manually sync with .github/workflows/code-checks.yml
 44 |     "ruff==0.5.4", # Pinned; manually sync with pre-commit-config.yaml
 45 |     "types-appdirs",
 46 |     "databricks-sdk"
 47 | ]
 48 | databricks = ["databricks-sdk"]
 49 | doc = [
 50 |     "ipykernel",
 51 |     "ipython<=8.12",
 52 |     "nbclient",
 53 |     "nbformat",
 54 |     "quartodoc",
 55 | ]
 56 | gcs = ["gcsfs"]
 57 | test = [
 58 |     "adlfs>=2024.4.1",
 59 |     "fastparquet",
 60 |     "gcsfs",
 61 |     "pip-tools",
 62 |     "pyarrow",
 63 |     "pytest==7.1.3",
 64 |     "pytest-cases",
 65 |     "pytest-dotenv",
 66 |     "pytest-parallel",
 67 |     "s3fs",
 68 |     "rdata",
 69 |     "databricks-sdk",
 70 | ]
 71 | 
 72 | [build-system]
 73 | requires = ["setuptools>=45", "setuptools-scm>=6.2", "wheel"]
 74 | build-backend = "setuptools.build_meta"
 75 | 
 76 | [tool.setuptools]
 77 | include-package-data = true
 78 | 
 79 | [tool.setuptools.packages]
 80 | find = { namespaces = false }
 81 | 
 82 | [tool.setuptools_scm]
 83 | 
 84 | [tool.distutils.bdist_wheel]
 85 | universal = 1
 86 | 
 87 | [tool.pytest.ini_options]
 88 | markers = [
 89 |     "fs_file: mark test to only run on local filesystem",
 90 |     "fs_s3: mark test to only run on AWS S3 bucket filesystem",
 91 |     "fs_gcs: mark test to only run on Google Cloud Storage bucket filesystem",
 92 |     "fs_abfs: mark test to only run on Azure Datalake filesystem",
 93 |     "fs_rsc: mark test to only run on Posit Connect filesystem",
 94 |     "fs_dbc: mark test to only run on Databricks Volume filesystem",
 95 |     "skip_on_github: skip this test if running on GitHub",
 96 | ]
 97 | testpaths = ["pins"]
 98 | addopts = "--doctest-modules"
 99 | doctest_optionflags = "NORMALIZE_WHITESPACE"
100 | 
101 | [tool.pyright]
102 | include = ["pins"]
103 | exclude = ["**/__pycache__"]
104 | ignore = ["pins/tests"]
105 | pythonVersion = "3.12"       # Use the maximum version supported by python-pins
106 | pythonPlatform = "Linux"
107 | 
108 | # Tracking compliance with these rules at https://github.com/rstudio/pins-python/issues/272
109 | reportArgumentType = false
110 | reportAttributeAccessIssue = false
111 | reportCallIssue = false
112 | reportIncompatibleMethodOverride = false
113 | reportMissingTypeStubs = false
114 | reportOptionalMemberAccess = false
115 | reportOptionalSubscript = false
116 | reportPossiblyUnboundVariable = false
117 | reportReturnType = false
118 | 
119 | [tool.ruff]
120 | line-length = 90
121 | extend-exclude = ["docs"]
122 | 
123 | [tool.ruff.lint]
124 | select = [
125 |     "E",  # Style
126 |     "F",  # Errors
127 |     "FA", # Use from __future__ import annotations for cleaner type hints
128 |     "I",  # Import sorting
129 |     "UP", # Upgrade to latest supported Python syntax
130 |     "W",  # Style
131 |     "A",  # Don't shadow built-ins
132 | ]
133 | ignore = [
134 |     "E501", # Line too long
135 |     "A002", # The pins interface includes builtin names in args, e.g. hash, id, etc.
136 | ]
137 | 
138 | [tool.codespell]
139 | skip = ["*.js"]
140 | 


--------------------------------------------------------------------------------
/requirements/dev.txt:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile with Python 3.11
  3 | # by the following command:
  4 | #
  5 | #    pip-compile --extra=check --extra=doc --extra=test --output-file=- --strip-extras pyproject.toml
  6 | #
  7 | adlfs==2024.12.0
  8 |     # via pins (pyproject.toml)
  9 | aiobotocore==2.22.0
 10 |     # via s3fs
 11 | aiohappyeyeballs==2.6.1
 12 |     # via aiohttp
 13 | aiohttp==3.12.7
 14 |     # via
 15 |     #   adlfs
 16 |     #   aiobotocore
 17 |     #   gcsfs
 18 |     #   s3fs
 19 | aioitertools==0.12.0
 20 |     # via aiobotocore
 21 | aiosignal==1.3.2
 22 |     # via aiohttp
 23 | annotated-types==0.7.0
 24 |     # via pydantic
 25 | appdirs==1.4.4
 26 |     # via pins (pyproject.toml)
 27 | appnope==0.1.4
 28 |     # via
 29 |     #   ipykernel
 30 |     #   ipython
 31 | asttokens==3.0.0
 32 |     # via stack-data
 33 | attrs==25.3.0
 34 |     # via
 35 |     #   aiohttp
 36 |     #   jsonschema
 37 |     #   pytest
 38 |     #   referencing
 39 |     #   sphobjinv
 40 | azure-core==1.34.0
 41 |     # via
 42 |     #   adlfs
 43 |     #   azure-identity
 44 |     #   azure-storage-blob
 45 | azure-datalake-store==0.0.53
 46 |     # via adlfs
 47 | azure-identity==1.23.0
 48 |     # via adlfs
 49 | azure-storage-blob==12.25.1
 50 |     # via adlfs
 51 | backcall==0.2.0
 52 |     # via ipython
 53 | beartype==0.21.0
 54 |     # via plum-dispatch
 55 | black==25.1.0
 56 |     # via quartodoc
 57 | botocore==1.37.3
 58 |     # via aiobotocore
 59 | build==1.2.2.post1
 60 |     # via pip-tools
 61 | cachetools==5.5.2
 62 |     # via google-auth
 63 | certifi==2025.4.26
 64 |     # via
 65 |     #   requests
 66 |     #   sphobjinv
 67 | cffi==1.17.1
 68 |     # via
 69 |     #   azure-datalake-store
 70 |     #   cryptography
 71 | cfgv==3.4.0
 72 |     # via pre-commit
 73 | charset-normalizer==3.4.2
 74 |     # via requests
 75 | click==8.2.1
 76 |     # via
 77 |     #   black
 78 |     #   pip-tools
 79 |     #   quartodoc
 80 | colorama==0.4.6
 81 |     # via griffe
 82 | comm==0.2.2
 83 |     # via ipykernel
 84 | cramjam==2.10.0
 85 |     # via fastparquet
 86 | cryptography==45.0.3
 87 |     # via
 88 |     #   azure-identity
 89 |     #   azure-storage-blob
 90 |     #   msal
 91 |     #   pyjwt
 92 | databackend==0.0.3
 93 |     # via pins (pyproject.toml)
 94 | databricks-sdk==0.55.0
 95 |     # via pins (pyproject.toml)
 96 | debugpy==1.8.14
 97 |     # via ipykernel
 98 | decopatch==1.4.10
 99 |     # via pytest-cases
100 | decorator==5.2.1
101 |     # via
102 |     #   gcsfs
103 |     #   ipython
104 | distlib==0.3.9
105 |     # via virtualenv
106 | executing==2.2.0
107 |     # via stack-data
108 | fastjsonschema==2.21.1
109 |     # via nbformat
110 | fastparquet==2024.11.0
111 |     # via pins (pyproject.toml)
112 | filelock==3.18.0
113 |     # via virtualenv
114 | frozenlist==1.6.2
115 |     # via
116 |     #   aiohttp
117 |     #   aiosignal
118 | fsspec==2025.5.1
119 |     # via
120 |     #   adlfs
121 |     #   fastparquet
122 |     #   gcsfs
123 |     #   pins (pyproject.toml)
124 |     #   s3fs
125 | gcsfs==2025.5.1
126 |     # via pins (pyproject.toml)
127 | google-api-core==2.25.0
128 |     # via
129 |     #   google-cloud-core
130 |     #   google-cloud-storage
131 | google-auth==2.40.2
132 |     # via
133 |     #   databricks-sdk
134 |     #   gcsfs
135 |     #   google-api-core
136 |     #   google-auth-oauthlib
137 |     #   google-cloud-core
138 |     #   google-cloud-storage
139 | google-auth-oauthlib==1.2.2
140 |     # via gcsfs
141 | google-cloud-core==2.4.3
142 |     # via google-cloud-storage
143 | google-cloud-storage==3.1.0
144 |     # via gcsfs
145 | google-crc32c==1.7.1
146 |     # via
147 |     #   google-cloud-storage
148 |     #   google-resumable-media
149 | google-resumable-media==2.7.2
150 |     # via google-cloud-storage
151 | googleapis-common-protos==1.70.0
152 |     # via google-api-core
153 | griffe==1.7.3
154 |     # via quartodoc
155 | humanize==4.12.3
156 |     # via pins (pyproject.toml)
157 | identify==2.6.12
158 |     # via pre-commit
159 | idna==3.10
160 |     # via
161 |     #   requests
162 |     #   yarl
163 | importlib-metadata==8.7.0
164 |     # via
165 |     #   pins (pyproject.toml)
166 |     #   quartodoc
167 | importlib-resources==6.5.2
168 |     # via
169 |     #   pins (pyproject.toml)
170 |     #   quartodoc
171 | iniconfig==2.1.0
172 |     # via pytest
173 | ipykernel==6.29.5
174 |     # via pins (pyproject.toml)
175 | ipython==8.12.0
176 |     # via
177 |     #   ipykernel
178 |     #   pins (pyproject.toml)
179 | isodate==0.7.2
180 |     # via azure-storage-blob
181 | jedi==0.19.2
182 |     # via ipython
183 | jinja2==3.1.6
184 |     # via pins (pyproject.toml)
185 | jmespath==1.0.1
186 |     # via
187 |     #   aiobotocore
188 |     #   botocore
189 | joblib==1.5.1
190 |     # via pins (pyproject.toml)
191 | jsonschema==4.24.0
192 |     # via
193 |     #   nbformat
194 |     #   sphobjinv
195 | jsonschema-specifications==2025.4.1
196 |     # via jsonschema
197 | jupyter-client==8.6.3
198 |     # via
199 |     #   ipykernel
200 |     #   nbclient
201 | jupyter-core==5.8.1
202 |     # via
203 |     #   ipykernel
204 |     #   jupyter-client
205 |     #   nbclient
206 |     #   nbformat
207 | makefun==1.16.0
208 |     # via
209 |     #   decopatch
210 |     #   pytest-cases
211 | markdown-it-py==3.0.0
212 |     # via rich
213 | markupsafe==3.0.2
214 |     # via jinja2
215 | matplotlib-inline==0.1.7
216 |     # via
217 |     #   ipykernel
218 |     #   ipython
219 | mdurl==0.1.2
220 |     # via markdown-it-py
221 | msal==1.32.3
222 |     # via
223 |     #   azure-datalake-store
224 |     #   azure-identity
225 |     #   msal-extensions
226 | msal-extensions==1.3.1
227 |     # via azure-identity
228 | multidict==6.4.4
229 |     # via
230 |     #   aiobotocore
231 |     #   aiohttp
232 |     #   yarl
233 | mypy-extensions==1.1.0
234 |     # via black
235 | nbclient==0.10.2
236 |     # via pins (pyproject.toml)
237 | nbformat==5.10.4
238 |     # via
239 |     #   nbclient
240 |     #   pins (pyproject.toml)
241 | nest-asyncio==1.6.0
242 |     # via ipykernel
243 | nodeenv==1.9.1
244 |     # via
245 |     #   pre-commit
246 |     #   pyright
247 | numpy==2.2.6
248 |     # via
249 |     #   fastparquet
250 |     #   pandas
251 |     #   rdata
252 |     #   xarray
253 | oauthlib==3.2.2
254 |     # via requests-oauthlib
255 | packaging==25.0
256 |     # via
257 |     #   black
258 |     #   build
259 |     #   fastparquet
260 |     #   ipykernel
261 |     #   pytest
262 |     #   pytest-cases
263 |     #   xarray
264 | pandas==2.2.3
265 |     # via
266 |     #   fastparquet
267 |     #   pins (pyproject.toml)
268 |     #   rdata
269 |     #   xarray
270 | parso==0.8.4
271 |     # via jedi
272 | pathspec==0.12.1
273 |     # via black
274 | pexpect==4.9.0
275 |     # via ipython
276 | pickleshare==0.7.5
277 |     # via ipython
278 | pip-tools==7.4.1
279 |     # via pins (pyproject.toml)
280 | platformdirs==4.3.8
281 |     # via
282 |     #   black
283 |     #   jupyter-core
284 |     #   virtualenv
285 | pluggy==1.6.0
286 |     # via pytest
287 | plum-dispatch==2.5.7
288 |     # via quartodoc
289 | pre-commit==4.2.0
290 |     # via pins (pyproject.toml)
291 | prompt-toolkit==3.0.51
292 |     # via ipython
293 | propcache==0.3.1
294 |     # via
295 |     #   aiohttp
296 |     #   yarl
297 | proto-plus==1.26.1
298 |     # via google-api-core
299 | protobuf==6.31.1
300 |     # via
301 |     #   google-api-core
302 |     #   googleapis-common-protos
303 |     #   proto-plus
304 | psutil==7.0.0
305 |     # via ipykernel
306 | ptyprocess==0.7.0
307 |     # via pexpect
308 | pure-eval==0.2.3
309 |     # via stack-data
310 | py==1.11.0
311 |     # via pytest
312 | pyarrow==20.0.0
313 |     # via pins (pyproject.toml)
314 | pyasn1==0.6.1
315 |     # via
316 |     #   pyasn1-modules
317 |     #   rsa
318 | pyasn1-modules==0.4.2
319 |     # via google-auth
320 | pycparser==2.22
321 |     # via cffi
322 | pydantic==2.11.5
323 |     # via quartodoc
324 | pydantic-core==2.33.2
325 |     # via pydantic
326 | pygments==2.19.1
327 |     # via
328 |     #   ipython
329 |     #   rich
330 | pyjwt==2.10.1
331 |     # via
332 |     #   msal
333 |     #   pyjwt
334 | pyproject-hooks==1.2.0
335 |     # via
336 |     #   build
337 |     #   pip-tools
338 | pyright==1.1.372
339 |     # via pins (pyproject.toml)
340 | pytest==7.1.3
341 |     # via
342 |     #   pins (pyproject.toml)
343 |     #   pytest-dotenv
344 |     #   pytest-parallel
345 | pytest-cases==3.8.6
346 |     # via pins (pyproject.toml)
347 | pytest-dotenv==0.5.2
348 |     # via pins (pyproject.toml)
349 | pytest-parallel==0.1.1
350 |     # via pins (pyproject.toml)
351 | python-dateutil==2.9.0.post0
352 |     # via
353 |     #   aiobotocore
354 |     #   botocore
355 |     #   jupyter-client
356 |     #   pandas
357 | python-dotenv==1.1.0
358 |     # via pytest-dotenv
359 | pytz==2025.2
360 |     # via pandas
361 | pyyaml==6.0.2
362 |     # via
363 |     #   pins (pyproject.toml)
364 |     #   pre-commit
365 |     #   quartodoc
366 | pyzmq==26.4.0
367 |     # via
368 |     #   ipykernel
369 |     #   jupyter-client
370 | quartodoc==0.10.0
371 |     # via pins (pyproject.toml)
372 | rdata==0.11.2
373 |     # via pins (pyproject.toml)
374 | referencing==0.36.2
375 |     # via
376 |     #   jsonschema
377 |     #   jsonschema-specifications
378 | requests==2.32.3
379 |     # via
380 |     #   azure-core
381 |     #   azure-datalake-store
382 |     #   databricks-sdk
383 |     #   gcsfs
384 |     #   google-api-core
385 |     #   google-cloud-storage
386 |     #   msal
387 |     #   pins (pyproject.toml)
388 |     #   quartodoc
389 |     #   requests-oauthlib
390 | requests-oauthlib==2.0.0
391 |     # via google-auth-oauthlib
392 | rich==14.0.0
393 |     # via plum-dispatch
394 | rpds-py==0.25.1
395 |     # via
396 |     #   jsonschema
397 |     #   referencing
398 | rsa==4.9.1
399 |     # via google-auth
400 | ruff==0.5.4
401 |     # via pins (pyproject.toml)
402 | s3fs==2025.5.1
403 |     # via pins (pyproject.toml)
404 | six==1.17.0
405 |     # via
406 |     #   azure-core
407 |     #   python-dateutil
408 | sphobjinv==2.3.1.3
409 |     # via quartodoc
410 | stack-data==0.6.3
411 |     # via ipython
412 | tabulate==0.9.0
413 |     # via quartodoc
414 | tblib==3.1.0
415 |     # via pytest-parallel
416 | tomli==2.2.1
417 |     # via pytest
418 | tornado==6.5.1
419 |     # via
420 |     #   ipykernel
421 |     #   jupyter-client
422 | traitlets==5.14.3
423 |     # via
424 |     #   comm
425 |     #   ipykernel
426 |     #   ipython
427 |     #   jupyter-client
428 |     #   jupyter-core
429 |     #   matplotlib-inline
430 |     #   nbclient
431 |     #   nbformat
432 | types-appdirs==1.4.3.5
433 |     # via pins (pyproject.toml)
434 | typing-extensions==4.14.0
435 |     # via
436 |     #   azure-core
437 |     #   azure-identity
438 |     #   azure-storage-blob
439 |     #   pins (pyproject.toml)
440 |     #   plum-dispatch
441 |     #   pydantic
442 |     #   pydantic-core
443 |     #   quartodoc
444 |     #   rdata
445 |     #   referencing
446 |     #   typing-inspection
447 | typing-inspection==0.4.1
448 |     # via pydantic
449 | tzdata==2025.2
450 |     # via pandas
451 | urllib3==2.4.0
452 |     # via
453 |     #   botocore
454 |     #   requests
455 | virtualenv==20.31.2
456 |     # via pre-commit
457 | watchdog==6.0.0
458 |     # via quartodoc
459 | wcwidth==0.2.13
460 |     # via prompt-toolkit
461 | wheel==0.45.1
462 |     # via pip-tools
463 | wrapt==1.17.2
464 |     # via aiobotocore
465 | xarray==2025.4.0
466 |     # via rdata
467 | xxhash==3.5.0
468 |     # via pins (pyproject.toml)
469 | yarl==1.20.0
470 |     # via aiohttp
471 | zipp==3.22.0
472 |     # via importlib-metadata
473 | 
474 | # The following packages are considered to be unsafe in a requirements file:
475 | # pip
476 | # setuptools
477 | 


--------------------------------------------------------------------------------
/requirements/minimum.txt:
--------------------------------------------------------------------------------
 1 | fsspec==2022.2.0
 2 | xxhash==1.0.0
 3 | pandas==0.23.0
 4 | jinja2==2.10.0
 5 | joblib==0.12.0
 6 | importlib-metadata==4.4
 7 | importlib-resources==1.3
 8 | appdirs<2.0.0
 9 | humanize==1.0.0
10 | databackend==0.0.3
11 | 


--------------------------------------------------------------------------------
/script/ci-compat-check/.gitignore:
--------------------------------------------------------------------------------
1 | tmp
2 | 


--------------------------------------------------------------------------------
/script/ci-compat-check/Makefile:
--------------------------------------------------------------------------------
 1 | BOARD_BASE_DIR=tmp
 2 | BOARD_PY=$(BOARD_BASE_DIR)/board-py
 3 | BOARD_R=$(BOARD_BASE_DIR)/board-r
 4 | 
 5 | all: validate
 6 | 
 7 | clean:
 8 | 	rm -r $(BOARD_PY) $(BOARD_R)
 9 | 
10 | validate: $(BOARD_PY) $(BOARD_R)
11 | 	@echo "\n\nRUNNING R PINS ---\n"
12 | 	Rscript validate_py_to_r.R $(BOARD_PY) $(BOARD_R)
13 | 	@echo "\n\nRUNNING PYTHON PINS ---\n"
14 | 	python validate_r_to_py.py $(BOARD_PY) $(BOARD_R)
15 | 
16 | $(BOARD_PY): dump_py_pins.py
17 | 	python dump_py_pins.py $@
18 | 
19 | $(BOARD_R): dump_r_pins.R
20 | 	Rscript dump_r_pins.R $@
21 | 


--------------------------------------------------------------------------------
/script/ci-compat-check/dump_py_pins.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from pins import board_folder
 4 | from pins.data import mtcars
 5 | 
 6 | if len(sys.argv) < 2:
 7 |     raise ValueError("must pass board location as command-line argument")
 8 | else:
 9 |     BOARD_PATH = sys.argv[1]
10 | 
11 | board = board_folder(BOARD_PATH)
12 | board.pin_write(mtcars, "mtcars", type="csv")
13 | 


--------------------------------------------------------------------------------
/script/ci-compat-check/dump_r_pins.R:
--------------------------------------------------------------------------------
1 | library(pins)
2 | args <- commandArgs(trailingOnly=TRUE)
3 | 
4 | board <- board_folder(args[1])
5 | board %>% pin_write(mtcars, "mtcars", type="csv")
6 | 


--------------------------------------------------------------------------------
/script/ci-compat-check/validate_py_to_r.R:
--------------------------------------------------------------------------------
 1 | library(pins)
 2 | 
 3 | args <- commandArgs(trailingOnly=TRUE)
 4 | 
 5 | 
 6 | # create board ----
 7 | 
 8 | board_py <- board_folder(args[1])
 9 | board_r <- board_folder(args[2])
10 | 
11 | 
12 | # check pins ----
13 | 
14 | cat("Checking mtcars pin\n")
15 | 
16 | res_mtcars <- board_py %>% pin_read("mtcars")
17 | stopifnot(all.equal(res_mtcars, datasets::mtcars, check.attributes=FALSE))
18 | 
19 | meta_mtcars_py <- board_py %>% pin_meta("mtcars")
20 | cat("\nPython meta:\n\n")
21 | print(meta_mtcars_py)
22 | 
23 | meta_mtcars_r <- board_r %>% pin_meta("mtcars")
24 | cat("\nR meta:\n\n")
25 | print(meta_mtcars_r)
26 | 


--------------------------------------------------------------------------------
/script/ci-compat-check/validate_r_to_py.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from pins import board_folder, data
 4 | 
 5 | path_py, path_r = sys.argv[1], sys.argv[2]
 6 | 
 7 | # create board ----
 8 | 
 9 | board_py = board_folder(path_py)
10 | board_r = board_folder(path_r)
11 | 
12 | 
13 | # check pins ----
14 | 
15 | print("Checking mtcars pin")
16 | 
17 | res_mtcars = board_r.pin_read("mtcars")
18 | assert res_mtcars.equals(data.mtcars)
19 | 
20 | meta_mtcars_py = board_py.pin_meta("mtcars")
21 | print("\nPython meta:\n")
22 | print(meta_mtcars_py)
23 | 
24 | meta_mtcars_r = board_r.pin_meta("mtcars")
25 | print("\nR meta:\n")
26 | print(meta_mtcars_r)
27 | 


--------------------------------------------------------------------------------
/script/setup-rsconnect/add-users.sh:
--------------------------------------------------------------------------------
1 | awk ' { system("useradd -m -s /bin/bash "$1); system("echo \""$1":"$2"\" | chpasswd"); system("id "$1) } ' /etc/users.txt
2 | 


--------------------------------------------------------------------------------
/script/setup-rsconnect/dump_api_keys.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | 
 4 | from pins.rsconnect.api import _HackyConnect
 5 | 
 6 | OUT_FILE = sys.argv[1]
 7 | 
 8 | 
 9 | def get_api_key(user, password, email):
10 |     rsc = _HackyConnect("http://localhost:3939")
11 | 
12 |     return rsc.create_first_admin(user, password, email).api_key
13 | 
14 | 
15 | api_keys = {
16 |     "admin": get_api_key("admin", "admin0", "admin@example.com"),
17 |     "susan": get_api_key("susan", "susan", "susan@example.com"),
18 |     "derek": get_api_key("derek", "derek", "derek@example.com"),
19 | }
20 | 
21 | json.dump(api_keys, open(OUT_FILE, "w"))
22 | 


--------------------------------------------------------------------------------
/script/setup-rsconnect/rstudio-connect.gcfg:
--------------------------------------------------------------------------------
 1 | [Server]
 2 | DataDir = /data
 3 | Address = http://localhost:3939
 4 | 
 5 | [HTTP]
 6 | Listen = :3939
 7 | 
 8 | [Authentication]
 9 | Provider = pam
10 | 
11 | [Authorization]
12 | DefaultUserRole = publisher
13 | 
14 | [Python]
15 | Enabled = false
16 | 
17 | [RPackageRepository "CRAN"]
18 | URL = https://packagemanager.rstudio.com/cran/__linux__/bionic/latest
19 | 
20 | [RPackageRepository "RSPM"]
21 | URL = https://packagemanager.rstudio.com/cran/__linux__/bionic/latest
22 | 


--------------------------------------------------------------------------------
/script/setup-rsconnect/users.txt:
--------------------------------------------------------------------------------
1 | admin admin0
2 | test  test
3 | susan susan
4 | derek derek
5 | 


--------------------------------------------------------------------------------
/script/stage_example_bundle.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from pins.meta import MetaFactory
 6 | from pins.rsconnect.fs import PinBundleManifest
 7 | 
 8 | p_root = Path("pins/tests/example-bundle")
 9 | p_root.parent.mkdir(parents=True, exist_ok=True)
10 | 
11 | p_index = p_root / "index.html"
12 | p_index.write_text("<html><body>yo</body></html>")
13 | 
14 | p_data = p_root / "data_frame.csv"
15 | df = pd.DataFrame({"x": [1, 2, 3]})
16 | df.to_csv(p_data)
17 | 
18 | p_meta = p_root / "data.txt"
19 | meta = MetaFactory().create(str(p_data), "csv", title="some title", name="data_frame.csv")
20 | meta.to_yaml(p_meta.open("w"))
21 | 
22 | # add manifest last, since it enumerates all the files
23 | # this lets you download them individually from rsconnect
24 | PinBundleManifest.add_manifest_to_directory(str(p_root))
25 | 


--------------------------------------------------------------------------------
/script/stage_r_pins.R:
--------------------------------------------------------------------------------
 1 | library(pins)
 2 | 
 3 | df <- data.frame(x = 1:2, y = c("a", "b"))
 4 | df_v2 <- data.frame(x = 1:2, y = c("a", "b"), z = 3:4)
 5 | 
 6 | #board <- board_s3("ci-pins", prefix = "r-pins-test")
 7 | board <- board_folder("pins/tests/pins-compat", versioned=TRUE)
 8 | 
 9 | all_pins <- board %>% pin_list()
10 | board %>% pin_delete(all_pins)
11 | 
12 | # write two versions of df as CSV ----
13 | board %>% pin_write(df, "df_csv", type="csv")
14 | Sys.sleep(2)
15 | board %>% pin_write(df_v2, "df_csv", type="csv")
16 | 
17 | # write two versions of df as arrow ----
18 | board %>% pin_write(df, "df_arrow", type="arrow")
19 | 
20 | # write two versions of df as RDS ----
21 | board %>% pin_write(df, "df_rds", type="rds")
22 | 
23 | # write unversioned pin as CSV
24 | board %>% pin_write(df, "df_unversioned", versioned=FALSE)
25 | 


--------------------------------------------------------------------------------
/script/stage_r_pins_old_types.R:
--------------------------------------------------------------------------------
 1 | cache = tempfile()
 2 | board_register_local(cache = cache)
 3 | 
 4 | some_df = data.frame(a = 1:2, b = c("x","y"))
 5 | pin(some_df, name="a-table")
 6 | 
 7 | # note that pin automatically changes _ to -
 8 | # TODO: for now manually copying into pins/tests/pins-old-types
 9 | # Note that a trivial version name, v, is used to check the reading behavior
10 | # since pins v0 does not save versions
11 | # >>> mkdir pins/tests/pins-old-types/a-table/v/
12 | # >>> cp -r <path_to_pin> pins/tests/pins-old-types/a-table/v/
13 | fs::path(cache, "a-table")
14 | 


--------------------------------------------------------------------------------