├── tests
    ├── __init__.py
    ├── fixtures
    │   ├── empty.txt
    │   ├── tiny_dataset
    │   │   ├── some_text.txt
    │   │   └── test.py
    │   └── cards
    │   │   ├── sample_template.md
    │   │   ├── sample_datasetcard_template.md
    │   │   ├── sample_no_metadata.md
    │   │   ├── sample_invalid_card_data.md
    │   │   ├── sample_simple.md
    │   │   ├── sample_windows_line_breaks.md
    │   │   ├── sample_datasetcard_simple.md
    │   │   ├── sample_invalid_model_index.md
    │   │   └── sample_simple_model_index.md
    ├── test_utils_runtime.py
    ├── README.md
    ├── testing_constants.py
    ├── test_utils_fixes.py
    ├── test_tf_import.py
    ├── test_utils_datetime.py
    ├── test_utils_sha.py
    ├── conftest.py
    ├── test_offline_utils.py
    ├── test_utils_chunks.py
    ├── test_login_utils.py
    ├── test_init_lazy_loading.py
    ├── test_cli.py
    ├── test_utils_git_credentials.py
    ├── test_utils_cli.py
    ├── test_utils_pagination.py
    ├── test_utils_assets.py
    ├── test_utils_paths.py
    ├── test_commit_api.py
    ├── test_fastai_integration.py
    ├── test_utils_tqdm.py
    ├── test_inference_api.py
    ├── test_utils_validators.py
    └── test_utils_http.py
├── contrib
    ├── __init__.py
    ├── timm
    │   ├── __init__.py
    │   ├── requirements.txt
    │   └── test_timm.py
    ├── sentence_transformers
    │   ├── __init__.py
    │   ├── requirements.txt
    │   └── test_sentence_transformers.py
    ├── conftest.py
    ├── utils.py
    └── README.md
├── src
    └── huggingface_hub
    │   ├── py.typed
    │   ├── utils
    │       ├── _typing.py
    │       ├── _fixes.py
    │       ├── sha.py
    │       ├── _hf_folder.py
    │       ├── _pagination.py
    │       ├── _chunk_utils.py
    │       ├── __init__.py
    │       ├── _datetime.py
    │       ├── tqdm.py
    │       ├── _subprocess.py
    │       ├── _paths.py
    │       └── logging.py
    │   ├── commands
    │       ├── __init__.py
    │       ├── env.py
    │       ├── huggingface_cli.py
    │       └── _cli_utils.py
    │   ├── constants.py
    │   └── templates
    │       └── datasetcard_template.md
├── .github
    ├── conda
    │   ├── build.sh
    │   └── meta.yaml
    ├── workflows
    │   ├── delete_doc_comment.yaml
    │   ├── build_documentation.yaml
    │   ├── build_pr_documentation.yaml
    │   ├── python-release.yml
    │   ├── release-conda.yml
    │   ├── python-quality.yml
    │   ├── contrib-tests.yml
    │   └── python-tests.yml
    └── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── feature_request.md
    │   └── bug-report.yml
├── docs
    ├── source
    │   ├── package_reference
    │   │   ├── overview.mdx
    │   │   ├── inference_api.mdx
    │   │   ├── login.mdx
    │   │   ├── mixins.mdx
    │   │   ├── community.mdx
    │   │   ├── file_download.mdx
    │   │   ├── cache.mdx
    │   │   ├── repository.mdx
    │   │   ├── cards.mdx
    │   │   ├── hf_api.mdx
    │   │   └── environment_variables.mdx
    │   ├── guides
    │   │   └── overview.mdx
    │   ├── _toctree.yml
    │   ├── index.mdx
    │   ├── how-to-inference.mdx
    │   ├── how-to-downstream.mdx
    │   ├── quick-start.mdx
    │   ├── how-to-discussions-and-pull-requests.mdx
    │   └── installation.mdx
    └── dev
    │   └── release.md
├── MANIFEST.in
├── pyproject.toml
├── codecov.yml
├── .pre-commit-config.yaml
├── setup.cfg
├── Makefile
├── .gitignore
├── README.md
├── setup.py
├── utils
    ├── check_contrib_list.py
    └── check_static_imports.py
└── CODE_OF_CONDUCT.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/contrib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/contrib/timm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/fixtures/empty.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/contrib/sentence_transformers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/fixtures/tiny_dataset/some_text.txt:
--------------------------------------------------------------------------------
1 | foo
2 | bar
3 | foobar


--------------------------------------------------------------------------------
/.github/conda/build.sh:
--------------------------------------------------------------------------------
1 | $PYTHON setup.py install     # Python command to install the script.
2 | 


--------------------------------------------------------------------------------
/contrib/timm/requirements.txt:
--------------------------------------------------------------------------------
1 | # Timm
2 | git+https://github.com/rwightman/pytorch-image-models.git#egg=timm


--------------------------------------------------------------------------------
/contrib/sentence_transformers/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/UKPLab/sentence-transformers.git#egg=sentence-transformers


--------------------------------------------------------------------------------
/tests/fixtures/cards/sample_template.md:
--------------------------------------------------------------------------------
1 | ---
2 | {{card_data}}
3 | ---
4 | 
5 | # {{ model_name | default("MyModelName", true)}}
6 | 
7 | {{ some_data }}
8 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/overview.mdx:
--------------------------------------------------------------------------------
1 | # Overview
2 | 
3 | This section contains an exhaustive and technical description of `huggingface_hub` classes and methods.


--------------------------------------------------------------------------------
/tests/fixtures/cards/sample_datasetcard_template.md:
--------------------------------------------------------------------------------
1 | ---
2 | {card_data}
3 | ---
4 | 
5 | # {{ pretty_name | default("Dataset Name", true)}}
6 | 
7 | {{ some_data }}
8 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include src/huggingface_hub/py.typed
2 | include src/huggingface_hub/templates/modelcard_template.md
3 | include src/huggingface_hub/templates/datasetcard_template.md
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/cards/sample_no_metadata.md:
--------------------------------------------------------------------------------
1 | # MyCoolModel
2 | 
3 | In this example, we don't have any metadata at the top of the file. In cases like these, `CardData` should be instantiated as empty.
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/cards/sample_invalid_card_data.md:
--------------------------------------------------------------------------------
1 | ---
2 | []
3 | ---
4 | 
5 | # invalid-card-data
6 | 
7 | This card should fail when trying to load it in because the card data between the `---` is a list instead of a dict.
8 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 88
3 | target_version = ['py37', 'py38', 'py39', 'py310']
4 | preview = true
5 | 
6 | [tool.mypy]
7 | ignore_missing_imports = true
8 | no_implicit_optional = true
9 | scripts_are_modules = true


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment:
2 |   # https://docs.codecov.com/docs/pull-request-comments#requiring-changes
3 |   require_changes: true
4 |   # https://docs.codecov.com/docs/pull-request-comments#after_n_builds
5 |   after_n_builds: 11
6 | 
7 | github_checks:
8 |   annotations: false
9 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/inference_api.mdx:
--------------------------------------------------------------------------------
1 | # Inference API
2 | 
3 | The `huggingface_hub` library allows users to programmatically access the Inference API. For more information about the Accelerated Inference API, please refer to the documentation [here](https://huggingface.co/docs/api-inference/index).
4 | 
5 | [[autodoc]] InferenceApi


--------------------------------------------------------------------------------
/.github/workflows/delete_doc_comment.yaml:
--------------------------------------------------------------------------------
 1 | name: Delete dev documentation
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [ closed ]
 6 | 
 7 | 
 8 | jobs:
 9 |   delete:
10 |     uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main
11 |     with:
12 |       pr_number: ${{ github.event.number }}
13 |       package: huggingface_hub


--------------------------------------------------------------------------------
/tests/fixtures/cards/sample_simple.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - en
 4 | license: mit
 5 | library_name: pytorch-lightning
 6 | tags:
 7 | - pytorch
 8 | - image-classification
 9 | datasets:
10 | - beans
11 | metrics:
12 | - acc
13 | ---
14 | 
15 | # my-cool-model
16 | 
17 | ## Model description
18 | 
19 | You can embed local or remote images using `![](...)`
20 | 


--------------------------------------------------------------------------------
/tests/fixtures/cards/sample_windows_line_breaks.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | license: mit
 3 | language: eo
 4 | thumbnail: https://huggingface.co/blog/assets/01_how-to-train/EsperBERTo-thumbnail-v2.png
 5 | widget:
 6 | - text: "Jen la komenco de bela <mask>."
 7 | - text: "Uno du <mask>"
 8 | - text: "Jen finiĝas bela <mask>."
 9 | ---
10 | 
11 | # Hello old Windows line breaks
12 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/login.mdx:
--------------------------------------------------------------------------------
 1 | # Login and logout
 2 | 
 3 | The `huggingface_hub` library allows users to programmatically login and logout the
 4 | machine to the Hub.
 5 | 
 6 | ## login
 7 | 
 8 | [[autodoc]] login
 9 | 
10 | ## interpreter_login
11 | 
12 | [[autodoc]] interpreter_login
13 | 
14 | ## notebook_login
15 | 
16 | [[autodoc]] notebook_login
17 | 
18 | ## logout
19 | 
20 | [[autodoc]] logout
21 | 


--------------------------------------------------------------------------------
/.github/workflows/build_documentation.yaml:
--------------------------------------------------------------------------------
 1 | name: Build documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - doc-builder*
 8 |       - v*-release
 9 | 
10 | jobs:
11 |    build:
12 |     uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
13 |     with:
14 |       commit_sha: ${{ github.sha }}
15 |       package: huggingface_hub
16 |     secrets:
17 |       token: ${{ secrets.HUGGINGFACE_PUSH }}


--------------------------------------------------------------------------------
/tests/test_utils_runtime.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from huggingface_hub.utils._runtime import is_google_colab, is_notebook
 4 | 
 5 | 
 6 | class TestRuntimeUtils(unittest.TestCase):
 7 |     def test_is_notebook(self) -> None:
 8 |         """Test `is_notebook`."""
 9 |         self.assertFalse(is_notebook())
10 | 
11 |     def test_is_google_colab(self) -> None:
12 |         """Test `is_google_colab`."""
13 |         self.assertFalse(is_google_colab())
14 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Running Tests
 2 | 
 3 | To run the test suite, please perform the following from the root directory of this repository:
 4 | 
 5 | 1. `pip install -e .[testing]`
 6 | 
 7 |       This will install all the testing requirements.
 8 | 2. `sudo apt-get update; sudo apt-get install git-lfs -y`
 9 | 
10 |       We need git-lfs on our system to run some of the tests
11 |     
12 | 3. `pytest ./tests/`
13 |     
14 |       We need to set an environmental variable to make sure the private API tests can run. 


--------------------------------------------------------------------------------
/.github/workflows/build_pr_documentation.yaml:
--------------------------------------------------------------------------------
 1 | name: Build PR Documentation
 2 | 
 3 | on:
 4 |   pull_request
 5 | 
 6 | concurrency:
 7 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 8 |   cancel-in-progress: true
 9 | 
10 | jobs:
11 |   build:
12 |     uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
13 |     with:
14 |       commit_sha: ${{ github.event.pull_request.head.sha }}
15 |       pr_number: ${{ github.event.number }}
16 |       package: huggingface_hub


--------------------------------------------------------------------------------
/tests/fixtures/cards/sample_datasetcard_simple.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - en
 4 | license:
 5 | - bsd-3-clause
 6 | annotations_creators:
 7 | - crowdsourced
 8 | - expert-generated
 9 | language_creators:
10 | - found
11 | multilinguality:
12 | - monolingual
13 | size_categories:
14 | - n<1K
15 | task_categories:
16 | - image-segmentation
17 | task_ids:
18 | - semantic-segmentation
19 | pretty_name: Sample Segmentation
20 | ---
21 | 
22 | # Dataset Card for Sample Segmentation
23 | 
24 | This is a sample dataset card for a semantic segmentation dataset.


--------------------------------------------------------------------------------
/tests/testing_constants.py:
--------------------------------------------------------------------------------
 1 | USER = "__DUMMY_TRANSFORMERS_USER__"
 2 | FULL_NAME = "Dummy User"
 3 | PASS = "__DUMMY_TRANSFORMERS_PASS__"
 4 | 
 5 | # Not critical, only usable on the sandboxed CI instance.
 6 | TOKEN = "hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL"
 7 | 
 8 | ENDPOINT_PRODUCTION = "https://huggingface.co"
 9 | ENDPOINT_STAGING = "https://hub-ci.huggingface.co"
10 | ENDPOINT_STAGING_BASIC_AUTH = f"https://{USER}:{PASS}@hub-ci.huggingface.co"
11 | 
12 | ENDPOINT_PRODUCTION_URL_SCHEME = (
13 |     ENDPOINT_PRODUCTION + "/{repo_id}/resolve/{revision}/{filename}"
14 | )
15 | 


--------------------------------------------------------------------------------
/tests/fixtures/cards/sample_invalid_model_index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: en
 3 | license: mit
 4 | library_name: timm
 5 | tags:
 6 | - pytorch
 7 | - image-classification
 8 | datasets:
 9 | - beans
10 | metrics:
11 | - acc
12 | model-index:
13 | - name: my-cool-model
14 |   results:
15 |   - task:
16 |       type: image-classification
17 |     metrics:
18 |     - type: acc
19 |       value: 0.9
20 | ---
21 | 
22 | # Invalid Model Index
23 | 
24 | In this example, the model index does not define a dataset field. In this case, we'll still initialize CardData, but will leave model-index/eval_results out of it.
25 | 


--------------------------------------------------------------------------------
/contrib/timm/test_timm.py:
--------------------------------------------------------------------------------
 1 | import timm
 2 | 
 3 | from ..utils import production_endpoint
 4 | 
 5 | 
 6 | MODEL_ID = "nateraw/timm-resnet50-beans"
 7 | 
 8 | 
 9 | @production_endpoint()
10 | def test_load_from_hub() -> None:
11 |     # Test load only config
12 |     _ = timm.models.hub.load_model_config_from_hf(MODEL_ID)
13 | 
14 |     # Load entire model from Hub
15 |     _ = timm.create_model("hf_hub:" + MODEL_ID, pretrained=True)
16 | 
17 | 
18 | def test_push_to_hub(repo_name: str, cleanup_repo: None) -> None:
19 |     model = timm.create_model("resnet18")
20 |     timm.models.hub.push_to_hf_hub(model, repo_name)
21 | 


--------------------------------------------------------------------------------
/tests/test_utils_fixes.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from huggingface_hub.utils import yaml_dump
 4 | 
 5 | 
 6 | class TestYamlDump(unittest.TestCase):
 7 |     def test_yaml_dump_emoji(self) -> None:
 8 |         self.assertEqual(yaml_dump({"emoji": "👀"}), "emoji: 👀\n")
 9 | 
10 |     def test_yaml_dump_japanese_characters(self) -> None:
11 |         self.assertEqual(yaml_dump({"some unicode": "日本か"}), "some unicode: 日本か\n")
12 | 
13 |     def test_yaml_dump_explicit_no_unicode(self) -> None:
14 |         self.assertEqual(
15 |             yaml_dump({"emoji": "👀"}, allow_unicode=False), 'emoji: "\\U0001F440"\n'
16 |         )
17 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | blank_issues_enabled: true
 2 | contact_links:
 3 |   - name: api-inference-community
 4 |     url: https://github.com/huggingface/api-inference-community/issues
 5 |     about: For all issues related to the inference API
 6 |   - name: Website Related
 7 |     url: https://github.com/huggingface/hub-docs/issues
 8 |     about: Feature requests and bug reports related to the website
 9 |   - name: Forum
10 |     url: https://discuss.huggingface.co/
11 |     about: General usage questions and community discussions
12 |   - name: Blank issue
13 |     url: https://github.com/huggingface/huggingface_hub/issues/new
14 |     about: Please note that the Forum is in most places the right place for discussions
15 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680 Feature request"
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/mixins.mdx:
--------------------------------------------------------------------------------
 1 | # Mixins & serialization methods
 2 | 
 3 | ## Mixins
 4 | 
 5 | The `huggingface_hub` library offers a range of mixins that can be used as a parent class for your
 6 | objects, in order to provide simple uploading and downloading functions.
 7 | 
 8 | ### Generic
 9 | 
10 | [[autodoc]] ModelHubMixin
11 |     - all
12 |     - _save_pretrained
13 |     - _from_pretrained
14 | 
15 | ### PyTorch
16 | 
17 | [[autodoc]] PyTorchModelHubMixin
18 | 
19 | ### Keras
20 | 
21 | [[autodoc]] KerasModelHubMixin
22 | 
23 | [[autodoc]] from_pretrained_keras
24 | 
25 | [[autodoc]] push_to_hub_keras
26 | 
27 | [[autodoc]] save_pretrained_keras
28 | 
29 | ### Fastai
30 | 
31 | [[autodoc]] from_pretrained_fastai
32 | 
33 | [[autodoc]] push_to_hub_fastai
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/.github/workflows/python-release.yml:
--------------------------------------------------------------------------------
 1 | name: Python release
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - v*
 7 | 
 8 | env:
 9 |   PYPI_TOKEN: ${{ secrets.PYPI_TOKEN_DIST }}
10 | 
11 | jobs:
12 |   python_release:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v2
19 |       with:
20 |         python-version: 3.9
21 |     - name: Install dependencies
22 |       run: |
23 |         pip install --upgrade pip
24 |         pip install setuptools wheel
25 | 
26 |     - run: python setup.py sdist bdist_wheel
27 | 
28 |     - run: |
29 |         pip install twine
30 | 
31 |     - name: Upload to PyPi
32 |       run: |
33 |           twine upload dist/* -u __token__ -p "$PYPI_TOKEN"
34 | 


--------------------------------------------------------------------------------
/tests/test_tf_import.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | 
 4 | from huggingface_hub.utils import is_tf_available
 5 | 
 6 | 
 7 | def require_tf(test_case):
 8 |     """
 9 |     Decorator marking a test that requires TensorFlow.
10 | 
11 |     These tests are skipped when TensorFlow is not installed.
12 | 
13 |     """
14 |     if not is_tf_available():
15 |         return unittest.skip("test requires Tensorflow")(test_case)
16 |     else:
17 |         return test_case
18 | 
19 | 
20 | @require_tf
21 | def test_import_huggingface_hub_doesnt_import_tensorfow():
22 |     # `import huggingface_hub` is not necessary since huggingface_hub is already imported at the top of this file,
23 |     # but let's keep it here anyway just in case
24 |     import huggingface_hub  # noqa
25 | 
26 |     assert "tensorflow" not in sys.modules
27 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/community.mdx:
--------------------------------------------------------------------------------
 1 | # Interacting with Discussions and Pull Requests
 2 | 
 3 | Check the [`HfApi`] documentation page for the reference of methods enabling
 4 | interaction with Pull Requests and Discussions on the Hub.
 5 | 
 6 | - [`get_repo_discussions`]
 7 | - [`get_discussion_details`]
 8 | - [`create_discussion`]
 9 | - [`create_pull_request`]
10 | - [`rename_discussion`]
11 | - [`comment_discussion`]
12 | - [`edit_discussion_comment`]
13 | - [`change_discussion_status`]
14 | - [`merge_pull_request`]
15 | 
16 | ## Data structures
17 | 
18 | [[autodoc]] Discussion
19 | 
20 | [[autodoc]] DiscussionWithDetails
21 | 
22 | [[autodoc]] DiscussionEvent
23 | 
24 | [[autodoc]] DiscussionComment
25 | 
26 | [[autodoc]] DiscussionStatusChange
27 | 
28 | [[autodoc]] DiscussionCommit
29 | 
30 | [[autodoc]] DiscussionTitleChange
31 | 


--------------------------------------------------------------------------------
/.github/conda/meta.yaml:
--------------------------------------------------------------------------------
 1 | {% set name = "huggingface_hub" %}
 2 | 
 3 | package:
 4 |   name: "{{ name|lower }}"
 5 |   version: "{{ HUB_VERSION }}"
 6 | 
 7 | source:
 8 |   path: ../../
 9 | 
10 | build:
11 |   noarch: python
12 | 
13 | requirements:
14 |   host:
15 |     - python
16 |     - pip
17 |     - filelock
18 |     - requests
19 |     - tqdm
20 |     - typing-extensions
21 |     - packaging
22 |     - pyyaml
23 |   run:
24 |     - python
25 |     - pip
26 |     - filelock
27 |     - requests
28 |     - tqdm
29 |     - typing-extensions
30 |     - packaging
31 |     - pyyaml
32 | 
33 | test:
34 |   imports:
35 |     - huggingface_hub
36 | 
37 | about:
38 |   home: https://huggingface.co
39 |   license: Apache License 2.0
40 |   license_file: LICENSE
41 |   summary: "Client library to download and publish models and other files on the huggingface.co hub"
42 | 


--------------------------------------------------------------------------------
/docs/source/guides/overview.mdx:
--------------------------------------------------------------------------------
 1 | # How-to guides
 2 | 
 3 | In this section, you will find practical guides to help you achieve a specific goal.
 4 | Take a look at these guides to learn how to use huggingface_hub to solve real-world problems:
 5 | 
 6 | - [Download an entire repository and use regex matching to filter and download specific
 7 |   files](../how-to-downstream).
 8 | - [Delete and clone a repository, and create and update a branch](../how-to-manage).
 9 | - [Upload your files with a context manager or use a helper to push files to a remote
10 |   repository](../how-to-upstream).
11 | - [Search thousands of models and datasets on the Hub with specific filters and
12 |   parameters to only return the best results](../searching-the-hub).
13 | - [Access the Inference API for accelerated inference](../how-to-inference).
14 | - [Interact with Discussions and Pull Requests ](../how-to-discussions-and-pull-requests).


--------------------------------------------------------------------------------
/docs/source/package_reference/file_download.mdx:
--------------------------------------------------------------------------------
 1 | # Downloading files
 2 | 
 3 | ## Download a single file
 4 | 
 5 | ### hf_hub_download
 6 | 
 7 | [[autodoc]] huggingface_hub.hf_hub_download
 8 | 
 9 | ### hf_hub_url
10 | 
11 | [[autodoc]] huggingface_hub.hf_hub_url
12 | 
13 | ## Download a snapshot of the repo
14 | 
15 | [[autodoc]] huggingface_hub.snapshot_download
16 | 
17 | ## Get metadata about a file
18 | 
19 | ### get_hf_file_metadata
20 | 
21 | [[autodoc]] huggingface_hub.get_hf_file_metadata
22 | 
23 | ### HfFileMetadata
24 | 
25 | [[autodoc]] huggingface_hub.HfFileMetadata
26 | 
27 | ## Caching
28 | 
29 | The methods displayed above are designed to work with a caching system that prevents
30 | re-downloading files. The caching system was updated in v0.8.0 to become the central
31 | cache-system shared across libraries that depend on the Hub.
32 | 
33 | Read the [cache-system guide](../how-to-cache) for a detailed presentation of caching at
34 | at HF.
35 | 


--------------------------------------------------------------------------------
/tests/fixtures/cards/sample_simple_model_index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: en
 3 | license: mit
 4 | library_name: timm
 5 | tags:
 6 | - pytorch
 7 | - image-classification
 8 | datasets:
 9 | - beans
10 | metrics:
11 | - accuracy
12 | model-index:
13 | - name: my-cool-model
14 |   results:
15 |   - task:
16 |       type: image-classification
17 |     dataset:
18 |       type: beans
19 |       name: Beans
20 |     metrics:
21 |     - type: accuracy
22 |       value: 0.9
23 |   - task:
24 |       type: image-classification
25 |     dataset:
26 |       type: beans
27 |       name: Beans
28 |       config: default
29 |       split: test
30 |       revision: 5503434ddd753f426f4b38109466949a1217c2bb
31 |       args:
32 |         date: 20220120
33 |     metrics:
34 |     - type: f1
35 |       value: 0.66
36 | ---
37 | 
38 | # my-cool-model
39 | 
40 | ## Model description
41 | 
42 | This is a test model card with multiple evaluations across different (dataset, metric) configurations.
43 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/utils/_typing.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022-present, the HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Handle typing imports based on system compatibility."""
16 | import sys
17 | 
18 | 
19 | if sys.version_info >= (3, 8):
20 |     from typing import Literal, TypedDict
21 | else:
22 |     from typing_extensions import Literal, TypedDict  # noqa: F401
23 | 
24 | HTTP_METHOD_T = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
25 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from argparse import _SubParsersAction
17 | 
18 | 
19 | class BaseHuggingfaceCLICommand(ABC):
20 |     @staticmethod
21 |     @abstractmethod
22 |     def register_subcommand(parser: _SubParsersAction):
23 |         raise NotImplementedError()
24 | 
25 |     @abstractmethod
26 |     def run(self):
27 |         raise NotImplementedError()
28 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/utils/_fixes.py:
--------------------------------------------------------------------------------
 1 | # JSONDecodeError was introduced in requests=2.27 released in 2022.
 2 | # This allows us to support older requests for users
 3 | # More information: https://github.com/psf/requests/pull/5856
 4 | try:
 5 |     from requests import JSONDecodeError  # type: ignore  # noqa: F401
 6 | except ImportError:
 7 |     try:
 8 |         from simplejson import JSONDecodeError  # type: ignore # noqa: F401
 9 |     except ImportError:
10 |         from json import JSONDecodeError  # type: ignore  # noqa: F401
11 | 
12 | from functools import partial
13 | from typing import Callable
14 | 
15 | import yaml
16 | 
17 | 
18 | # Wrap `yaml.dump` to set `allow_unicode=True` by default.
19 | #
20 | # Example:
21 | # ```py
22 | # >>> yaml.dump({"emoji": "👀", "some unicode": "日本か"})
23 | # 'emoji: "\\U0001F440"\nsome unicode: "\\u65E5\\u672C\\u304B"\n'
24 | #
25 | # >>> yaml_dump({"emoji": "👀", "some unicode": "日本か"})
26 | # 'emoji: "👀"\nsome unicode: "日本か"\n'
27 | # ```
28 | yaml_dump: Callable[..., str] = partial(  # type: ignore
29 |     yaml.dump, stream=None, allow_unicode=True
30 | )
31 | 


--------------------------------------------------------------------------------
/tests/test_utils_datetime.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from datetime import datetime, timezone
 3 | 
 4 | import pytest
 5 | 
 6 | from huggingface_hub.utils import parse_datetime
 7 | 
 8 | 
 9 | class TestDatetimeUtils(unittest.TestCase):
10 |     def test_parse_datetime(self):
11 |         """Test `parse_datetime` works correctly on datetimes returned by server."""
12 |         self.assertEqual(
13 |             parse_datetime("2022-08-19T07:19:38.123Z"),
14 |             datetime(2022, 8, 19, 7, 19, 38, 123000, tzinfo=timezone.utc),
15 |         )
16 | 
17 |         with pytest.raises(
18 |             ValueError, match=r".*Cannot parse '2022-08-19T07:19:38' as a datetime.*"
19 |         ):
20 |             parse_datetime("2022-08-19T07:19:38")
21 | 
22 |         with pytest.raises(
23 |             ValueError,
24 |             match=r".*Cannot parse '2022-08-19T07:19:38.123' as a datetime.*",
25 |         ):
26 |             parse_datetime("2022-08-19T07:19:38.123")
27 | 
28 |         with pytest.raises(
29 |             ValueError,
30 |             match=r".*Cannot parse '2022-08-19 07:19:38.123Z\+6:00' as a datetime.*",
31 |         ):
32 |             parse_datetime("2022-08-19 07:19:38.123Z+6:00")
33 | 


--------------------------------------------------------------------------------
/.github/workflows/release-conda.yml:
--------------------------------------------------------------------------------
 1 | name: Release Conda
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - v*
 7 |     branches:
 8 |       - conda_*
 9 | 
10 | env:
11 |   ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }}
12 | 
13 | jobs:
14 |   build_and_package:
15 |     runs-on: ubuntu-latest
16 |     defaults:
17 |       run:
18 |         shell: bash -l {0}
19 | 
20 |     steps:
21 |       - name: Checkout repository
22 |         uses: actions/checkout@v1
23 | 
24 |       - name: Install miniconda
25 |         uses: conda-incubator/setup-miniconda@v2
26 |         with:
27 |           auto-update-conda: true
28 |           auto-activate-base: false
29 |           python-version: 3.8
30 |           activate-environment: "build-hub"
31 | 
32 |       - name: Setup conda env
33 |         run: |
34 |           conda install -c defaults anaconda-client conda-build
35 | 
36 |       - name: Extract version
37 |         run: echo "HUB_VERSION=`python setup.py --version`" >> $GITHUB_ENV
38 | 
39 |       - name: Build conda packages
40 |         run: |
41 |           conda info
42 |           conda-build .github/conda
43 | 
44 |       - name: Upload to Anaconda
45 |         run: |
46 |           anaconda upload `conda-build .github/conda --output` --force
47 | 


--------------------------------------------------------------------------------
/.github/workflows/python-quality.yml:
--------------------------------------------------------------------------------
 1 | name: Python quality
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     paths-ignore:
 8 |       - "js/**"
 9 |       - "api-inference-community/**"
10 |   pull_request:
11 |     types: [assigned, opened, synchronize, reopened]
12 |     paths-ignore:
13 |       - "js/**"
14 |       - "api-inference-community/**"
15 | 
16 | jobs:
17 |   check_code_quality:
18 |     runs-on: ubuntu-latest
19 | 
20 |     steps:
21 |       - uses: actions/checkout@v2
22 |       - name: Set up Python
23 |         uses: actions/setup-python@v2
24 |         with:
25 |           python-version: 3.9
26 |       - name: Install dependencies
27 |         run: |
28 |           pip install --upgrade pip
29 |           pip install .[dev]
30 |       - run: black --check tests src
31 |       - run: isort --check-only tests src
32 |       - run: flake8 tests src
33 |       - run: python utils/check_contrib_list.py
34 |       - run: python utils/check_static_imports.py
35 | 
36 |       # Run type checking at least on huggingface_hub root file to check all modules
37 |       # that can be lazy-loaded actually exist.
38 |       - run: mypy src/huggingface_hub/__init__.py --follow-imports=silent
39 | 
40 |       # Run mypy on full package
41 |       - run: mypy src
42 | 


--------------------------------------------------------------------------------
/contrib/sentence_transformers/test_sentence_transformers.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from sentence_transformers import SentenceTransformer, util
 4 | 
 5 | from ..utils import production_endpoint
 6 | 
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def multi_qa_model() -> SentenceTransformer:
10 |     with production_endpoint():
11 |         return SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")
12 | 
13 | 
14 | def test_from_pretrained(multi_qa_model: SentenceTransformer) -> None:
15 |     # Example taken from https://www.sbert.net/docs/hugging_face.html#using-hugging-face-models.
16 |     query_embedding = multi_qa_model.encode("How big is London")
17 |     passage_embedding = multi_qa_model.encode(
18 |         [
19 |             "London has 9,787,426 inhabitants at the 2011 census",
20 |             "London is known for its financial district",
21 |         ]
22 |     )
23 |     print("Similarity:", util.dot_score(query_embedding, passage_embedding))
24 | 
25 | 
26 | @pytest.mark.xfail(
27 |     reason=(
28 |         "Production endpoint is hardcoded in sentence_transformers when pushing to Hub."
29 |     )
30 | )
31 | def test_push_to_hub(
32 |     multi_qa_model: SentenceTransformer, repo_name: str, cleanup_repo: None
33 | ) -> None:
34 |     multi_qa_model.save_to_hub(repo_name)
35 | 


--------------------------------------------------------------------------------
/.github/workflows/contrib-tests.yml:
--------------------------------------------------------------------------------
 1 | name: Contrib tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |   - cron:  '0 0 * * 6' # Run once a week, Saturday midnight
 7 |   push:
 8 |     branches:
 9 |       - ci_contrib_*
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         contrib: [
18 |           "sentence_transformers",
19 |           "timm",
20 |         ]
21 | 
22 |     steps:
23 |       - uses: actions/checkout@v2
24 |       - name: Set up Python 3.8
25 |         uses: actions/setup-python@v2
26 |         with:
27 |           python-version: 3.8
28 | 
29 |       # Install pip
30 |       - name: Install pip
31 |         run: pip install --upgrade pip
32 | 
33 |       # Install downstream library and its specific dependencies
34 |       - name: Install ${{ matrix.contrib }}
35 |         run: pip install -r contrib/${{ matrix.contrib }}/requirements.txt
36 | 
37 |       # Install huggingface_hub from source code + testing extras
38 |       - name: Install `huggingface_hub`
39 |         run: |
40 |           pip uninstall -y huggingface_hub
41 |           pip install .[testing]
42 | 
43 |       # Run tests
44 |       - name: Run tests
45 |         run: pytest contrib/${{ matrix.contrib }}
46 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/cache.mdx:
--------------------------------------------------------------------------------
 1 | # Cache-system reference
 2 | 
 3 | The caching system was updated in v0.8.0 to become the central cache-system shared
 4 | across libraries that depend on the Hub. Read the [cache-system guide](../how-to-cache)
 5 | for a detailed presentation of caching at HF.
 6 | 
 7 | ## Helpers
 8 | 
 9 | ## cached_assets_path
10 | 
11 | [[autodoc]] huggingface_hub.cached_assets_path
12 | 
13 | ### scan_cache_dir
14 | 
15 | [[autodoc]] huggingface_hub.scan_cache_dir
16 | 
17 | ## Data structures
18 | 
19 | All structures are built and returned by [`scan_cache_dir`] and are immutable.
20 | 
21 | ### HFCacheInfo
22 | 
23 | [[autodoc]] huggingface_hub.HFCacheInfo
24 | 
25 | ### CachedRepoInfo
26 | 
27 | [[autodoc]] huggingface_hub.CachedRepoInfo
28 |     - size_on_disk_str
29 |     - refs
30 | 
31 | ### CachedRevisionInfo
32 | 
33 | [[autodoc]] huggingface_hub.CachedRevisionInfo
34 |     - size_on_disk_str
35 |     - nb_files
36 | 
37 | ### CachedFileInfo
38 | 
39 | [[autodoc]] huggingface_hub.CachedFileInfo
40 |     - size_on_disk_str
41 | 
42 | ### DeleteCacheStrategy
43 | 
44 | [[autodoc]] huggingface_hub.DeleteCacheStrategy
45 |     - expected_freed_size_str
46 | 
47 | ## Exceptions
48 | 
49 | ### CorruptedCacheException
50 | 
51 | [[autodoc]] huggingface_hub.CorruptedCacheException


--------------------------------------------------------------------------------
/tests/test_utils_sha.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | from hashlib import sha256
 4 | from io import BytesIO
 5 | from tempfile import TemporaryDirectory
 6 | 
 7 | from huggingface_hub.utils.sha import sha_fileobj
 8 | 
 9 | 
10 | class TestShaUtils(unittest.TestCase):
11 |     def test_sha_fileobj(self):
12 |         with TemporaryDirectory() as tmpdir:
13 |             content = b"Random content" * 1000
14 |             sha = sha256(content).digest()
15 | 
16 |             # Test with file object
17 |             filepath = os.path.join(tmpdir, "file.bin")
18 |             with open(filepath, "wb+") as file:
19 |                 file.write(content)
20 | 
21 |             with open(filepath, "rb") as fileobj:
22 |                 self.assertEqual(sha_fileobj(fileobj, None), sha)
23 |             with open(filepath, "rb") as fileobj:
24 |                 self.assertEqual(sha_fileobj(fileobj, 50), sha)
25 |             with open(filepath, "rb") as fileobj:
26 |                 self.assertEqual(sha_fileobj(fileobj, 50_000), sha)
27 | 
28 |             # Test with in-memory file object
29 |             self.assertEqual(sha_fileobj(BytesIO(content), None), sha)
30 |             self.assertEqual(sha_fileobj(BytesIO(content), 50), sha)
31 |             self.assertEqual(sha_fileobj(BytesIO(content), 50_000), sha)
32 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.1.0
 4 |     hooks:
 5 |       - id: check-yaml
 6 |         exclude: .github/conda/meta.yaml
 7 |       - id: end-of-file-fixer
 8 |       - id: trailing-whitespace
 9 |       - id: check-case-conflict
10 |       - id: check-merge-conflict
11 |   - repo: https://github.com/psf/black
12 |     rev: 22.3.0
13 |     hooks:
14 |       - id: black
15 |   - repo: https://github.com/pycqa/flake8
16 |     rev: 4.0.1
17 |     hooks:
18 |       - id: flake8
19 |         types: [file, python]
20 |   - repo: https://github.com/PyCQA/isort
21 |     rev: 5.10.1
22 |     hooks:
23 |       - id: isort
24 |   - repo: https://github.com/pre-commit/mirrors-mypy
25 |     rev: v0.981
26 |     hooks:
27 |       - id: mypy
28 |         # taken from https://github.com/pre-commit/mirrors-mypy/issues/33#issuecomment-735449356
29 |         args: [src, --config-file=pyproject.toml]
30 |         pass_filenames: false
31 |         # Same list of dependencies as in `setup.py`
32 |         additional_dependencies:
33 |           [
34 |             "types-PyYAML",
35 |             "types-requests",
36 |             "types-simplejson",
37 |             "types-toml",
38 |             "types-tqdm",
39 |             "types-urllib3",
40 |           ]
41 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/utils/sha.py:
--------------------------------------------------------------------------------
 1 | """Utilities to efficiently compute the SHA 256 hash of a bunch of bytes"""
 2 | 
 3 | from functools import partial
 4 | from hashlib import sha256
 5 | from typing import BinaryIO, Iterable, Optional
 6 | 
 7 | 
 8 | def iter_fileobj(
 9 |     fileobj: BinaryIO, chunk_size: Optional[int] = None
10 | ) -> Iterable[bytes]:
11 |     """Returns an iterator over the content of ``fileobj`` in chunks of ``chunk_size``"""
12 |     chunk_size = chunk_size or -1
13 |     return iter(partial(fileobj.read, chunk_size), b"")
14 | 
15 | 
16 | def sha_iter(iterable: Iterable[bytes]):
17 |     sha = sha256()
18 |     for chunk in iterable:
19 |         sha.update(chunk)
20 |     return sha.digest()
21 | 
22 | 
23 | def sha_fileobj(fileobj: BinaryIO, chunk_size: Optional[int] = None) -> bytes:
24 |     """
25 |     Computes the sha256 hash of the given file object, by chunks of size `chunk_size`.
26 | 
27 |     Args:
28 |         fileobj (file-like object):
29 |             The File object to compute sha256 for, typically obtained with `open(path, "rb")`
30 |         chunk_size (`int`, *optional*):
31 |             The number of bytes to read from `fileobj` at once, defaults to 512
32 | 
33 |     Returns:
34 |         `bytes`: `fileobj`'s sha256 hash as bytes
35 |     """
36 |     chunk_size = chunk_size if chunk_size is not None else 512
37 |     return sha_iter(iter_fileobj(fileobj))
38 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from tempfile import TemporaryDirectory
 3 | from typing import Generator
 4 | 
 5 | import pytest
 6 | 
 7 | from _pytest.fixtures import SubRequest
 8 | from huggingface_hub import HfFolder
 9 | 
10 | 
11 | @pytest.fixture
12 | def fx_cache_dir(request: SubRequest) -> Generator[None, None, None]:
13 |     """Add a `cache_dir` attribute pointing to a temporary directory in tests.
14 | 
15 |     Example:
16 |     ```py
17 |     @pytest.mark.usefixtures("fx_cache_dir")
18 |     class TestWithCache(unittest.TestCase):
19 |         cache_dir: Path
20 | 
21 |         def test_cache_dir(self) -> None:
22 |             self.assertTrue(self.cache_dir.is_dir())
23 |     ```
24 |     """
25 |     with TemporaryDirectory() as cache_dir:
26 |         request.cls.cache_dir = Path(cache_dir).resolve()
27 |         yield
28 | 
29 | 
30 | @pytest.fixture(autouse=True, scope="session")
31 | def clean_hf_folder_token_for_tests() -> Generator:
32 |     """Clean token stored on machine before all tests and reset it back at the end.
33 | 
34 |     Useful to avoid token deletion when running tests locally.
35 |     """
36 |     # Remove registered token
37 |     token = HfFolder().get_token()
38 |     HfFolder().delete_token()
39 | 
40 |     yield  # Run all tests
41 | 
42 |     # Set back token once all tests have passed
43 |     if token is not None:
44 |         HfFolder().save_token(token)
45 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/commands/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Contains command to print information about the environment.
15 | 
16 | Usage:
17 |     huggingface-cli env
18 | """
19 | from argparse import _SubParsersAction
20 | 
21 | from ..utils import dump_environment_info
22 | from . import BaseHuggingfaceCLICommand
23 | 
24 | 
25 | class EnvironmentCommand(BaseHuggingfaceCLICommand):
26 |     def __init__(self, args):
27 |         self.args = args
28 | 
29 |     @staticmethod
30 |     def register_subcommand(parser: _SubParsersAction):
31 |         env_parser = parser.add_parser(
32 |             "env", help="Print information about the environment."
33 |         )
34 |         env_parser.set_defaults(func=EnvironmentCommand)
35 | 
36 |     def run(self) -> None:
37 |         dump_environment_info()
38 | 


--------------------------------------------------------------------------------
/tests/test_offline_utils.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | 
 3 | import pytest
 4 | 
 5 | import requests
 6 | from huggingface_hub.file_download import http_get
 7 | 
 8 | from .testing_utils import (
 9 |     OfflineSimulationMode,
10 |     RequestWouldHangIndefinitelyError,
11 |     offline,
12 | )
13 | 
14 | 
15 | def test_offline_with_timeout():
16 |     with offline(OfflineSimulationMode.CONNECTION_TIMES_OUT):
17 |         with pytest.raises(RequestWouldHangIndefinitelyError):
18 |             requests.request("GET", "https://huggingface.co")
19 |         with pytest.raises(requests.exceptions.ConnectTimeout):
20 |             requests.request("GET", "https://huggingface.co", timeout=1.0)
21 |         with pytest.raises(requests.exceptions.ConnectTimeout):
22 |             http_get("https://huggingface.co", BytesIO())
23 | 
24 | 
25 | def test_offline_with_connection_error():
26 |     with offline(OfflineSimulationMode.CONNECTION_FAILS):
27 |         with pytest.raises(requests.exceptions.ConnectionError):
28 |             requests.request("GET", "https://huggingface.co")
29 |         with pytest.raises(requests.exceptions.ConnectionError):
30 |             http_get("https://huggingface.co", BytesIO())
31 | 
32 | 
33 | def test_offline_with_datasets_offline_mode_enabled():
34 |     with offline(OfflineSimulationMode.HF_HUB_OFFLINE_SET_TO_1):
35 |         with pytest.raises(ConnectionError):
36 |             http_get("https://huggingface.co", BytesIO())
37 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/repository.mdx:
--------------------------------------------------------------------------------
 1 | # Managing local and online repositories
 2 | 
 3 | The `Repository` class is a helper class that wraps `git` and `git-lfs` commands. It provides tooling adapted
 4 | for managing repositories which can be very large.
 5 | 
 6 | It is the recommended tool as soon as any `git` operation is involved, or when collaboration will be a point
 7 | of focus with the repository itself.
 8 | 
 9 | ## The Repository class
10 | 
11 | [[autodoc]] Repository
12 |     - __init__
13 |     - current_branch
14 |     - all
15 | 
16 | ## Helper methods
17 | 
18 | [[autodoc]] huggingface_hub.repository.is_git_repo
19 | 
20 | [[autodoc]] huggingface_hub.repository.is_local_clone
21 | 
22 | [[autodoc]] huggingface_hub.repository.is_tracked_with_lfs
23 | 
24 | [[autodoc]] huggingface_hub.repository.is_git_ignored
25 | 
26 | [[autodoc]] huggingface_hub.repository.files_to_be_staged
27 | 
28 | [[autodoc]] huggingface_hub.repository.is_tracked_upstream
29 | 
30 | [[autodoc]] huggingface_hub.repository.commits_to_push
31 | 
32 | ## Following asynchronous commands
33 | 
34 | The `Repository` utility offers several methods which can be launched asynchronously:
35 | - `git_push`
36 | - `git_pull`
37 | - `push_to_hub`
38 | - The `commit` context manager
39 | 
40 | See below for utilities to manage such asynchronous methods.
41 | 
42 | [[autodoc]] Repository
43 |     - commands_failed
44 |     - commands_in_progress
45 |     - wait_for_commands
46 | 
47 | [[autodoc]] huggingface_hub.repository.CommandInProgress


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | default_section = FIRSTPARTY
 3 | ensure_newline_before_comments = True
 4 | force_grid_wrap = 0
 5 | include_trailing_comma = True
 6 | known_first_party = huggingface_hub
 7 | known_third_party =
 8 |     absl
 9 |     conllu
10 |     datasets
11 |     elasticsearch
12 |     fairseq
13 |     faiss-cpu
14 |     fastprogress
15 |     fire
16 |     fugashi
17 |     git
18 |     graphviz
19 |     h5py
20 |     matplotlib
21 |     nltk
22 |     numpy
23 |     packaging
24 |     pandas
25 |     pydot
26 |     PIL
27 |     psutil
28 |     pytest
29 |     pytorch_lightning
30 |     rouge_score
31 |     sacrebleu
32 |     seqeval
33 |     sklearn
34 |     streamlit
35 |     tensorboardX
36 |     tensorflow
37 |     tensorflow_datasets
38 |     timeout_decorator
39 |     torch
40 |     torchtext
41 |     torchvision
42 |     torch_xla
43 |     tqdm
44 | 
45 | line_length = 88
46 | lines_after_imports = 2
47 | multi_line_output = 3
48 | use_parentheses = True
49 | 
50 | [flake8]
51 | exclude = .git,__pycache__,old,build,dist,.venv*
52 | ignore = E203, E501, E741, W503
53 | max-line-length = 88
54 | 
55 | [tool:pytest]
56 | # -Werror::FutureWarning -> test fails if FutureWarning is thrown
57 | # -s                     -> logs are not captured
58 | # -v                     -> verbose mode
59 | # --log-cli-level=INFO   -> log level
60 | # --durations=0          -> print execution time of each test
61 | addopts = -Werror::FutureWarning --log-cli-level=INFO -sv --durations=0
62 | env =
63 |     HUGGINGFACE_CO_STAGING=1


--------------------------------------------------------------------------------
/tests/test_utils_chunks.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from huggingface_hub.utils._chunk_utils import chunk_iterable
 4 | 
 5 | 
 6 | class TestUtilsCommon(unittest.TestCase):
 7 |     def test_chunk_iterable_non_truncated(self):
 8 |         # Can iterable over any iterable (iterator, list, tuple,...)
 9 |         for iterable in (range(12), list(range(12)), tuple(range(12))):
10 |             # 12 is a multiple of 4 -> last chunk is not truncated
11 |             for chunk, expected_chunk in zip(
12 |                 chunk_iterable(iterable, chunk_size=4),
13 |                 [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]],
14 |             ):
15 |                 self.assertListEqual(list(chunk), expected_chunk)
16 | 
17 |     def test_chunk_iterable_last_chunk_truncated(self):
18 |         # Can iterable over any iterable (iterator, list, tuple,...)
19 |         for iterable in (range(12), list(range(12)), tuple(range(12))):
20 |             # 12 is NOT a multiple of 5 -> last chunk is truncated
21 |             for chunk, expected_chunk in zip(
22 |                 chunk_iterable(iterable, chunk_size=5),
23 |                 [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11]],
24 |             ):
25 |                 self.assertListEqual(list(chunk), expected_chunk)
26 | 
27 |     def test_chunk_iterable_validation(self):
28 |         with self.assertRaises(ValueError):
29 |             next(chunk_iterable(range(128), 0))
30 | 
31 |         with self.assertRaises(ValueError):
32 |             next(chunk_iterable(range(128), -1))
33 | 


--------------------------------------------------------------------------------
/tests/fixtures/tiny_dataset/test.py:
--------------------------------------------------------------------------------
 1 | import datasets
 2 | 
 3 | 
 4 | _CITATION = """\
 5 | """
 6 | 
 7 | _DESCRIPTION = """\
 8 | This is a test dataset.
 9 | """
10 | 
11 | _URLS = {"train": "https://pastebin.com/raw/HvpE1CnA", "dev": "some_text.txt"}
12 | 
13 | 
14 | class Test(datasets.GeneratorBasedBuilder):
15 |     """SQUAD: The Stanford Question Answering Dataset. Version 1.1."""
16 | 
17 |     def _info(self):
18 |         return datasets.DatasetInfo(
19 |             description=_DESCRIPTION,
20 |             features=datasets.Features(
21 |                 {
22 |                     "text": datasets.Value("string"),
23 |                 }
24 |             ),
25 |             supervised_keys=None,
26 |             homepage="https://huggingface.co/datasets/lhoestq/test",
27 |             citation=_CITATION,
28 |         )
29 | 
30 |     def _split_generators(self, dl_manager):
31 |         downloaded_files = dl_manager.download_and_extract(_URLS)
32 | 
33 |         return [
34 |             datasets.SplitGenerator(
35 |                 name=datasets.Split.TRAIN,
36 |                 gen_kwargs={"filepath": downloaded_files["train"]},
37 |             ),
38 |             datasets.SplitGenerator(
39 |                 name=datasets.Split.VALIDATION,
40 |                 gen_kwargs={"filepath": downloaded_files["dev"]},
41 |             ),
42 |         ]
43 | 
44 |     def _generate_examples(self, filepath):
45 |         """This function returns the examples in the raw (text) form."""
46 |         for _id, line in enumerate(open(filepath, encoding="utf-8")):
47 |             yield _id, {"text": line.rstrip()}
48 | 


--------------------------------------------------------------------------------
/tests/test_login_utils.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import unittest
 3 | from typing import Optional
 4 | 
 5 | from huggingface_hub._login import _set_store_as_git_credential_helper_globally
 6 | from huggingface_hub.utils import run_subprocess
 7 | 
 8 | 
 9 | class TestSetGlobalStore(unittest.TestCase):
10 |     previous_config: Optional[str]
11 | 
12 |     def setUp(self) -> None:
13 |         """Get current global config value."""
14 |         try:
15 |             self.previous_config = run_subprocess(
16 |                 "git config --global credential.helper"
17 |             ).stdout
18 |         except subprocess.CalledProcessError:
19 |             self.previous_config = None  # Means global credential.helper value not set
20 | 
21 |         run_subprocess("git config --global credential.helper store")
22 | 
23 |     def tearDown(self) -> None:
24 |         """Reset global config value."""
25 |         if self.previous_config is None:
26 |             run_subprocess("git config --global --unset credential.helper")
27 |         else:
28 |             run_subprocess(
29 |                 f"git config --global credential.helper {self.previous_config}"
30 |             )
31 | 
32 |     def test_set_store_as_git_credential_helper_globally(self) -> None:
33 |         """Test `_set_store_as_git_credential_helper_globally` works as expected.
34 | 
35 |         Previous value from the machine is restored after the test.
36 |         """
37 |         _set_store_as_git_credential_helper_globally()
38 |         new_config = run_subprocess("git config --global credential.helper").stdout
39 |         self.assertEqual(new_config, "store\n")
40 | 


--------------------------------------------------------------------------------
/contrib/conftest.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import uuid
 3 | from typing import Generator
 4 | 
 5 | import pytest
 6 | 
 7 | from huggingface_hub import HfFolder, delete_repo
 8 | 
 9 | 
10 | @pytest.fixture(scope="session")
11 | def token() -> str:
12 |     # Not critical, only usable on the sandboxed CI instance.
13 |     return "hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL"
14 | 
15 | 
16 | @pytest.fixture(scope="session")
17 | def user() -> str:
18 |     return "__DUMMY_TRANSFORMERS_USER__"
19 | 
20 | 
21 | @pytest.fixture(autouse=True, scope="session")
22 | def login_as_dummy_user(token: str) -> Generator:
23 |     """Login with dummy user token on machine
24 | 
25 |     Once all tests are completed, set back previous token."""
26 |     # Remove registered token
27 |     old_token = HfFolder().get_token()
28 |     HfFolder().save_token(token)
29 | 
30 |     yield  # Run all tests
31 | 
32 |     # Set back token once all tests have passed
33 |     if old_token is not None:
34 |         HfFolder().save_token(old_token)
35 | 
36 | 
37 | @pytest.fixture
38 | def repo_name(request) -> None:
39 |     """
40 |     Return a readable pseudo-unique repository name for tests.
41 | 
42 |     Example: "repo-2fe93f-16599646671840"
43 |     """
44 |     prefix = request.module.__name__  # example: `test_timm`
45 |     id = uuid.uuid4().hex[:6]
46 |     ts = int(time.time() * 10e3)
47 |     return f"repo-{prefix}-{id}-{ts}"
48 | 
49 | 
50 | @pytest.fixture
51 | def cleanup_repo(user: str, repo_name: str) -> None:
52 |     """Delete the repo at the end of the tests.
53 | 
54 |     TODO: Adapt to handle `repo_type` as well
55 |     """
56 |     yield  # run test
57 |     delete_repo(repo_id=f"{user}/{repo_name}")
58 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/cards.mdx:
--------------------------------------------------------------------------------
 1 | # Repository Cards
 2 | 
 3 | The huggingface_hub library provides a Python interface to create, share, and update Model/Dataset Cards. 
 4 | Visit the [dedicated documentation page](https://huggingface.co/docs/hub/models-cards) for a deeper view of what 
 5 | Model Cards on the Hub are, and how they work under the hood. You can also check out our [Model Cards guide](how-to-model-cards) to 
 6 | get a feel for how you would use these utilities in your own projects.
 7 | 
 8 | ## Repo Card
 9 | 
10 | The `RepoCard` object is the parent class of [`ModelCard`] and [`DatasetCard`].
11 | 
12 | [[autodoc]] huggingface_hub.repocard.RepoCard
13 |     - __init__
14 |     - all
15 | ## Card Data
16 | 
17 | The [`CardData`] object is the parent class of [`ModelCardData`] and [`DatasetCardData`].
18 | 
19 | [[autodoc]] huggingface_hub.repocard_data.CardData
20 | 
21 | ## Model Cards
22 | ### ModelCard
23 | 
24 | [[autodoc]] ModelCard
25 | 
26 | ### ModelCardData
27 | 
28 | [[autodoc]] ModelCardData
29 | 
30 | ## Dataset Cards
31 | 
32 | Dataset cards are also known as Data Cards in the ML Community.
33 | 
34 | ### DatasetCard
35 | 
36 | [[autodoc]] DatasetCard
37 | 
38 | ### DatasetCardData
39 | 
40 | [[autodoc]] DatasetCardData
41 | 
42 | ## Utilities
43 | 
44 | ### EvalResult
45 | 
46 | [[autodoc]] EvalResult
47 | 
48 | ### model_index_to_eval_results
49 | 
50 | [[autodoc]] huggingface_hub.repocard_data.model_index_to_eval_results
51 | 
52 | ### eval_results_to_model_index
53 | 
54 | [[autodoc]] huggingface_hub.repocard_data.eval_results_to_model_index
55 | 
56 | ### metadata_eval_result
57 | 
58 | [[autodoc]] huggingface_hub.repocard.metadata_eval_result
59 | 
60 | ### metadata_update
61 | 
62 | [[autodoc]] huggingface_hub.repocard.metadata_update


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F41B Bug Report"
 2 | description: Report a bug on huggingface_hub
 3 | labels: ["bug"]
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: |
 8 |         Thanks for taking the time to fill out this bug report!
 9 |   - type: textarea
10 |     id: bug-description
11 |     attributes:
12 |       label: Describe the bug
13 |       description: A clear and concise description of what the bug is. If you intend to submit a pull request for this issue, tell us in the description. Thanks!
14 |       placeholder: Bug description
15 |     validations:
16 |       required: true
17 |   - type: textarea
18 |     id: reproduction
19 |     attributes:
20 |       label: Reproduction
21 |       description: Please provide a minimal reproducible code which we can copy/paste and reproduce the issue.
22 |       placeholder: Reproduction
23 |   - type: textarea
24 |     id: logs
25 |     attributes:
26 |       label: Logs
27 |       description: "Please include the Python logs if you can."
28 |       render: shell
29 |   - type: textarea
30 |     id: system-info
31 |     attributes:
32 |       label: System info
33 |       description: |
34 |         Please dump your environment info by running the following command and copy-paste the result here:
35 |         ```txt
36 |         huggingface-cli env
37 |         ```
38 | 
39 |         If you are working in a notebook, please run it in a code cell:
40 |         ```py
41 |         from huggingface_hub import dump_environment_info
42 | 
43 |         dump_environment_info()
44 |         ```
45 |       render: shell
46 |       placeholder: |
47 |         - huggingface_hub version: 0.11.0.dev0
48 |         - Platform: Linux-5.15.0-52-generic-x86_64-with-glibc2.35
49 |         - Python version: 3.10.6
50 |         ...
51 |     validations:
52 |       required: true
53 | 


--------------------------------------------------------------------------------
/docs/dev/release.md:
--------------------------------------------------------------------------------
 1 | This document covers all steps that need to be done in order to do a release of the `huggingface_hub` library.
 2 | 
 3 | 1. On a clone of the main repo, not your fork, checkout the main branch and pull the latest changes:
 4 | ```
 5 | git checkout main
 6 | git pull
 7 |    ```
 8 | 
 9 | 2. Checkout a new branch with the version that you'd like to release: v<MINOR-VERSION>-release,
10 | for example `v0.5-release`. All patches will be done to that same branch.
11 | 
12 | 3. Update the `__version__` variable in the `src/huggingface_hub/__init__.py` file to point
13 | to the version you're releasing:
14 | ```
15 | __version__ = "<VERSION>"
16 |    ```
17 | 
18 | 4. Make sure that the conda build works correctly by building it locally:
19 | ```
20 | conda install -c defaults anaconda-client conda-build
21 | HUB_VERSION=<VERSION> conda-build .github/conda
22 |    ```
23 | 
24 | 5. Make sure that the pip wheel works correctly by building it locally and installing it:
25 | ```
26 | pip install setuptools wheel
27 | python setup.py sdist bdist_wheel
28 | pip install dist/huggingface_hub-<VERSION>-py3-none-any.whl
29 |    ```
30 | 
31 | 6. Commit, tag, and push the branch:
32 | ```
33 | git commit -am "Release: v<VERSION>"
34 | git tag v<VERSION> -m "Adds tag v<VERSION> for pypi and conda"
35 | git push -u --tags origin v<MINOR-VERSION>-release
36 |    ```
37 | 
38 | 7. Verify that the docs have been built correctly. You can check that on the following link:
39 | https://huggingface.co/docs/huggingface_hub/v<VERSION>
40 | 
41 | 8. Checkout main once again to update the version in the `__init__.py` file:
42 | ```
43 | git checkout main
44 |    ```
45 | 
46 | 9. Update the version to contain the `.dev0` suffix:
47 | ```
48 | __version__ = "<VERSION+1>.dev0"  # For example, after releasing v0.5.0 or v0.5.1: "0.6.0.dev0".
49 |    ```
50 | 
51 | 10. Push the changes!
52 | ```
53 | git push origin main
54 | ```
55 | 


--------------------------------------------------------------------------------
/contrib/utils.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | from typing import Generator
 3 | from unittest.mock import patch
 4 | 
 5 | 
 6 | @contextlib.contextmanager
 7 | def production_endpoint() -> Generator:
 8 |     """Patch huggingface_hub to connect to production server in a context manager.
 9 | 
10 |     Ugly way to patch all constants at once.
11 |     TODO: refactor when https://github.com/huggingface/huggingface_hub/issues/1172 is fixed.
12 | 
13 |     Example:
14 |     ```py
15 |     def test_push_to_hub():
16 |         # Pull from production Hub
17 |         with production_endpoint():
18 |             model = ...from_pretrained("modelname")
19 | 
20 |         # Push to staging Hub
21 |         model.push_to_hub()
22 |     ```
23 |     """
24 |     PROD_ENDPOINT = "https://huggingface.co"
25 |     ENDPOINT_TARGETS = [
26 |         "huggingface_hub.constants",
27 |         "huggingface_hub._commit_api",
28 |         "huggingface_hub.hf_api",
29 |         "huggingface_hub.lfs",
30 |         "huggingface_hub.commands.user",
31 |         "huggingface_hub.utils._git_credential",
32 |     ]
33 | 
34 |     PROD_URL_TEMPLATE = PROD_ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}"
35 |     URL_TEMPLATE_TARGETS = [
36 |         "huggingface_hub.constants",
37 |         "huggingface_hub.file_download",
38 |     ]
39 | 
40 |     from huggingface_hub.hf_api import api
41 | 
42 |     patchers = (
43 |         [patch(target + ".ENDPOINT", PROD_ENDPOINT) for target in ENDPOINT_TARGETS]
44 |         + [
45 |             patch(target + ".HUGGINGFACE_CO_URL_TEMPLATE", PROD_URL_TEMPLATE)
46 |             for target in URL_TEMPLATE_TARGETS
47 |         ]
48 |         + [patch.object(api, "endpoint", PROD_URL_TEMPLATE)]
49 |     )
50 | 
51 |     # Start all patches
52 |     for patcher in patchers:
53 |         patcher.start()
54 | 
55 |     yield
56 | 
57 |     # Stop all patches
58 |     for patcher in patchers:
59 |         patcher.stop()
60 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/commands/huggingface_cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from argparse import ArgumentParser
17 | 
18 | from huggingface_hub.commands.delete_cache import DeleteCacheCommand
19 | from huggingface_hub.commands.env import EnvironmentCommand
20 | from huggingface_hub.commands.lfs import LfsCommands
21 | from huggingface_hub.commands.scan_cache import ScanCacheCommand
22 | from huggingface_hub.commands.user import UserCommands
23 | 
24 | 
25 | def main():
26 |     parser = ArgumentParser(
27 |         "huggingface-cli", usage="huggingface-cli <command> [<args>]"
28 |     )
29 |     commands_parser = parser.add_subparsers(help="huggingface-cli command helpers")
30 | 
31 |     # Register commands
32 |     EnvironmentCommand.register_subcommand(commands_parser)
33 |     UserCommands.register_subcommand(commands_parser)
34 |     LfsCommands.register_subcommand(commands_parser)
35 |     ScanCacheCommand.register_subcommand(commands_parser)
36 |     DeleteCacheCommand.register_subcommand(commands_parser)
37 | 
38 |     # Let's go
39 |     args = parser.parse_args()
40 | 
41 |     if not hasattr(args, "func"):
42 |         parser.print_help()
43 |         exit(1)
44 | 
45 |     # Run
46 |     service = args.func(args)
47 |     service.run()
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/tests/test_init_lazy_loading.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import jedi
 4 | 
 5 | 
 6 | class TestHuggingfaceHubInit(unittest.TestCase):
 7 |     def test_autocomplete_on_root_imports(self) -> None:
 8 |         """Test autocomplete with `huggingface_hub` works with Jedi.
 9 | 
10 |         Not all autocomplete systems are based on Jedi but if this one works we can
11 |         assume others do as well.
12 |         """
13 |         source = """from huggingface_hub import c"""
14 |         script = jedi.Script(source, path="example.py")
15 |         completions = script.complete(1, len(source))
16 | 
17 |         for completion in completions:
18 |             if completion.name == "create_commit":
19 |                 # Assert `create_commit` is suggestion from `huggingface_hub` lib
20 |                 self.assertEqual(completion.module_name, "huggingface_hub")
21 | 
22 |                 # Assert autocomplete knows where `create_commit` lives
23 |                 # It would not be the case with a dynamic import.
24 |                 goto_list = completion.goto()
25 |                 self.assertEqual(len(goto_list), 1)
26 | 
27 |                 # Assert docstring is find. This means autocomplete can also provide
28 |                 # the help section.
29 |                 signature_list = goto_list[0].get_signatures()
30 |                 self.assertEqual(len(signature_list), 1)
31 |                 self.assertTrue(
32 |                     signature_list[0]
33 |                     .docstring()
34 |                     .startswith("create_commit(repo_id: str,")
35 |                 )
36 |                 break
37 |         else:
38 |             self.fail(
39 |                 "Jedi autocomplete did not suggest `create_commit` to complete the"
40 |                 f" line `{source}`. It is most probable that static imports are not"
41 |                 " correct in `./src/huggingface_hub/__init__.py`. Please run `make"
42 |                 " style` to fix this."
43 |             )
44 | 


--------------------------------------------------------------------------------
/docs/source/_toctree.yml:
--------------------------------------------------------------------------------
 1 | - title: "Get started"
 2 |   sections:
 3 |     - local: index
 4 |       title: Home
 5 |     - local: quick-start
 6 |       title: Quickstart
 7 |     - local: installation
 8 |       title: Installation
 9 | - title: "How-to guides"
10 |   sections:
11 |     - local: guides/overview
12 |       title: Overview
13 |     - local: how-to-manage
14 |       title: Create and manage repositories
15 |     - local: how-to-downstream
16 |       title: Download files from the Hub
17 |     - local: how-to-upstream
18 |       title: Upload files to the Hub
19 |     - local: searching-the-hub
20 |       title: Searching the Hub
21 |     - local: how-to-inference
22 |       title: Access the Inference API
23 |     - local: how-to-discussions-and-pull-requests
24 |       title: Interact with Discussions and Pull Requests
25 |     - local: how-to-cache
26 |       title: Manage the Cache
27 |     - local: how-to-model-cards
28 |       title: Create and Share Model Cards
29 | - title: "Reference"
30 |   sections:
31 |     - local: package_reference/overview
32 |       title: Overview
33 |     - local: package_reference/login
34 |       title: Login and logout
35 |     - local: package_reference/environment_variables
36 |       title: Environment variables
37 |     - local: package_reference/repository
38 |       title: Managing local and online repositories
39 |     - local: package_reference/hf_api
40 |       title: Hugging Face Hub API
41 |     - local: package_reference/file_download
42 |       title: Downloading files
43 |     - local: package_reference/mixins
44 |       title: Mixins & serialization methods
45 |     - local: package_reference/inference_api
46 |       title: Inference API
47 |     - local: package_reference/utilities
48 |       title: Utilities
49 |     - local: package_reference/community
50 |       title: Discussions and Pull Requests
51 |     - local: package_reference/cache
52 |       title: Cache-system reference
53 |     - local: package_reference/cards
54 |       title: Repo Cards and Repo Card Data
55 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: contrib quality style test
 2 | 
 3 | 
 4 | check_dirs := contrib src tests utils setup.py
 5 | 
 6 | 
 7 | quality:
 8 | 	black --check $(check_dirs)
 9 | 	isort --check-only $(check_dirs)
10 | 	flake8 $(check_dirs)
11 | 	mypy src
12 | 	python utils/check_contrib_list.py
13 | 	python utils/check_static_imports.py
14 | 
15 | style:
16 | 	black $(check_dirs)
17 | 	isort $(check_dirs)
18 | 	python utils/check_contrib_list.py --update
19 | 	python utils/check_static_imports.py --update
20 | 
21 | test:
22 | 	pytest ./tests/
23 | 
24 | # Taken from https://stackoverflow.com/a/12110773
25 | # Commands:
26 | #	make contrib_setup_timm : setup tests for timm
27 | #	make contrib_test_timm  : run tests for timm
28 | #	make contrib_timm       : setup and run tests for timm
29 | #	make contrib_clear_timm : delete timm virtual env
30 | #
31 | #	make contrib_setup      : setup ALL tests
32 | #	make contrib_test       : run ALL tests
33 | #	make contrib            : setup and run ALL tests
34 | #	make contrib_clear      : delete all virtual envs
35 | # Use -j4 flag to run jobs in parallel.
36 | CONTRIB_LIBS := sentence_transformers timm
37 | CONTRIB_JOBS := $(addprefix contrib_,${CONTRIB_LIBS})
38 | CONTRIB_CLEAR_JOBS := $(addprefix contrib_clear_,${CONTRIB_LIBS})
39 | CONTRIB_SETUP_JOBS := $(addprefix contrib_setup_,${CONTRIB_LIBS})
40 | CONTRIB_TEST_JOBS := $(addprefix contrib_test_,${CONTRIB_LIBS})
41 | 
42 | contrib_clear_%:
43 | 	rm -rf contrib/$*/.venv
44 | 
45 | contrib_setup_%:
46 | 	python3 -m venv contrib/$*/.venv
47 | 	./contrib/$*/.venv/bin/pip install -r contrib/$*/requirements.txt
48 | 	./contrib/$*/.venv/bin/pip uninstall -y huggingface_hub
49 | 	./contrib/$*/.venv/bin/pip install -e .[testing]
50 | 
51 | contrib_test_%:
52 | 	./contrib/$*/.venv/bin/python -m pytest contrib/$*
53 | 
54 | contrib_%:
55 | 	make contrib_setup_$*
56 | 	make contrib_test_$*
57 | 
58 | contrib: ${CONTRIB_JOBS};
59 | contrib_clear: ${CONTRIB_CLEAR_JOBS}; echo "Successful contrib tests."
60 | contrib_setup: ${CONTRIB_SETUP_JOBS}; echo "Successful contrib setup."
61 | contrib_test: ${CONTRIB_TEST_JOBS}; echo "Successful contrib tests."


--------------------------------------------------------------------------------
/src/huggingface_hub/commands/_cli_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Contains a utility for good-looking prints."""
15 | import os
16 | from typing import List, Union
17 | 
18 | 
19 | class ANSI:
20 |     """
21 |     Helper for en.wikipedia.org/wiki/ANSI_escape_code
22 |     """
23 | 
24 |     _bold = "\u001b[1m"
25 |     _gray = "\u001b[90m"
26 |     _red = "\u001b[31m"
27 |     _reset = "\u001b[0m"
28 | 
29 |     @classmethod
30 |     def bold(cls, s: str) -> str:
31 |         return cls._format(s, cls._bold)
32 | 
33 |     @classmethod
34 |     def gray(cls, s: str) -> str:
35 |         return cls._format(s, cls._gray)
36 | 
37 |     @classmethod
38 |     def red(cls, s: str) -> str:
39 |         return cls._format(s, cls._bold + cls._red)
40 | 
41 |     @classmethod
42 |     def _format(cls, s: str, code: str) -> str:
43 |         if os.environ.get("NO_COLOR"):
44 |             # See https://no-color.org/
45 |             return s
46 |         return f"{code}{s}{cls._reset}"
47 | 
48 | 
49 | def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str:
50 |     """
51 |     Inspired by:
52 | 
53 |     - stackoverflow.com/a/8356620/593036
54 |     - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
55 |     """
56 |     col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
57 |     row_format = ("{{:{}}} " * len(headers)).format(*col_widths)
58 |     lines = []
59 |     lines.append(row_format.format(*headers))
60 |     lines.append(row_format.format(*["-" * w for w in col_widths]))
61 |     for row in rows:
62 |         lines.append(row_format.format(*row))
63 |     return "\n".join(lines)
64 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/utils/_hf_folder.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022-present, the HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Contain helper class to retrieve/store token from/to local cache."""
16 | import os
17 | from pathlib import Path
18 | from typing import Optional
19 | 
20 | 
21 | class HfFolder:
22 |     path_token = Path("~/.huggingface/token").expanduser()
23 | 
24 |     @classmethod
25 |     def save_token(cls, token: str) -> None:
26 |         """
27 |         Save token, creating folder as needed.
28 | 
29 |         Args:
30 |             token (`str`):
31 |                 The token to save to the [`HfFolder`]
32 |         """
33 |         cls.path_token.parent.mkdir(exist_ok=True)
34 |         with cls.path_token.open("w+") as f:
35 |             f.write(token)
36 | 
37 |     @classmethod
38 |     def get_token(cls) -> Optional[str]:
39 |         """
40 |         Get token or None if not existent.
41 | 
42 |         Note that a token can be also provided using the
43 |         `HUGGING_FACE_HUB_TOKEN` environment variable.
44 | 
45 |         Returns:
46 |             `str` or `None`: The token, `None` if it doesn't exist.
47 |         """
48 |         token: Optional[str] = os.environ.get("HUGGING_FACE_HUB_TOKEN")
49 |         if token is None:
50 |             try:
51 |                 return cls.path_token.read_text()
52 |             except FileNotFoundError:
53 |                 pass
54 |         return token
55 | 
56 |     @classmethod
57 |     def delete_token(cls) -> None:
58 |         """
59 |         Deletes the token from storage. Does not fail if token does not exist.
60 |         """
61 |         try:
62 |             cls.path_token.unlink()
63 |         except FileNotFoundError:
64 |             pass
65 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/utils/_pagination.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022-present, the HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Contains utilities to handle pagination on Huggingface Hub."""
16 | from typing import Dict, Iterable, Optional
17 | 
18 | import requests
19 | 
20 | from . import hf_raise_for_status, logging
21 | 
22 | 
23 | logger = logging.get_logger(__name__)
24 | 
25 | 
26 | def paginate(path: str, params: Dict, headers: Dict) -> Iterable:
27 |     """Fetch a list of models/datasets/spaces and paginate through results.
28 | 
29 |     For now, pagination is not mandatory on the Hub. However at some point the number of
30 |     repos per page will be limited for performance reasons. This helper makes `huggingface_hub`
31 |     compliant with future server-side updates.
32 | 
33 |     This is using the same "Link" header format as GitHub.
34 |     See:
35 |     - https://requests.readthedocs.io/en/latest/api/#requests.Response.links
36 |     - https://docs.github.com/en/rest/guides/traversing-with-pagination#link-header
37 |     """
38 |     r = requests.get(path, params=params, headers=headers)
39 |     hf_raise_for_status(r)
40 |     yield from r.json()
41 | 
42 |     # If pagination is implemented server-side, follow pages
43 |     # Next link already contains query params
44 |     next_page = _get_next_page(r)
45 |     while next_page is not None:
46 |         logger.debug(f"Pagination detected. Requesting next page: {next_page}")
47 |         r = requests.get(next_page, headers=headers)
48 |         hf_raise_for_status(r)
49 |         yield from r.json()
50 |         next_page = _get_next_page(r)
51 | 
52 | 
53 | def _get_next_page(response: requests.Response) -> Optional[str]:
54 |     return response.links.get("next", {}).get("url")
55 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/utils/_chunk_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022-present, the HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Contains a utility to iterate by chunks over an iterator."""
16 | import itertools
17 | from typing import Iterable, TypeVar
18 | 
19 | 
20 | T = TypeVar("T")
21 | 
22 | 
23 | def chunk_iterable(iterable: Iterable[T], chunk_size: int) -> Iterable[Iterable[T]]:
24 |     """Iterates over an iterator chunk by chunk.
25 | 
26 |     Taken from https://stackoverflow.com/a/8998040.
27 |     See also https://github.com/huggingface/huggingface_hub/pull/920#discussion_r938793088.
28 | 
29 |     Args:
30 |         iterable (`Iterable`):
31 |             The iterable on which we want to iterate.
32 |         chunk_size (`int`):
33 |             Size of the chunks. Must be a strictly positive integer (e.g. >0).
34 | 
35 |     Example:
36 | 
37 |     ```python
38 |     >>> from huggingface_hub.utils import chunk_iterable
39 | 
40 |     >>> for items in chunk_iterable(range(17), chunk_size=8):
41 |     ...     print(items)
42 |     # [0, 1, 2, 3, 4, 5, 6, 7]
43 |     # [8, 9, 10, 11, 12, 13, 14, 15]
44 |     # [16] # smaller last chunk
45 |     ```
46 | 
47 |     Raises:
48 |         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
49 |             If `chunk_size` <= 0.
50 | 
51 |     <Tip warning={true}>
52 |         The last chunk can be smaller than `chunk_size`.
53 |     </Tip>
54 |     """
55 |     if not isinstance(chunk_size, int) or chunk_size <= 0:
56 |         raise ValueError("`chunk_size` must be a strictly positive integer (>0).")
57 | 
58 |     iterator = iter(iterable)
59 |     while True:
60 |         try:
61 |             next_item = next(iterator)
62 |         except StopIteration:
63 |             return
64 |         yield itertools.chain((next_item,), itertools.islice(iterator, chunk_size - 1))
65 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from argparse import ArgumentParser
 3 | 
 4 | from huggingface_hub.commands.delete_cache import DeleteCacheCommand
 5 | from huggingface_hub.commands.scan_cache import ScanCacheCommand
 6 | 
 7 | 
 8 | class TestCLI(unittest.TestCase):
 9 |     def setUp(self) -> None:
10 |         """
11 |         Set up CLI as in `src/huggingface_hub/commands/huggingface_cli.py`.
12 | 
13 |         TODO: add other subcommands.
14 |         """
15 |         self.parser = ArgumentParser(
16 |             "huggingface-cli", usage="huggingface-cli <command> [<args>]"
17 |         )
18 |         commands_parser = self.parser.add_subparsers()
19 |         ScanCacheCommand.register_subcommand(commands_parser)
20 |         DeleteCacheCommand.register_subcommand(commands_parser)
21 | 
22 |     def test_scan_cache_basic(self) -> None:
23 |         """Test `huggingface-cli scan-cache`."""
24 |         args = self.parser.parse_args(["scan-cache"])
25 |         self.assertEqual(args.dir, None)
26 |         self.assertEqual(args.verbose, 0)
27 |         self.assertEqual(args.func, ScanCacheCommand)
28 | 
29 |     def test_scan_cache_verbose(self) -> None:
30 |         """Test `huggingface-cli scan-cache -v`."""
31 |         args = self.parser.parse_args(["scan-cache", "-v"])
32 |         self.assertEqual(args.dir, None)
33 |         self.assertEqual(args.verbose, 1)
34 |         self.assertEqual(args.func, ScanCacheCommand)
35 | 
36 |     def test_scan_cache_with_dir(self) -> None:
37 |         """Test `huggingface-cli scan-cache --dir something`."""
38 |         args = self.parser.parse_args(["scan-cache", "--dir", "something"])
39 |         self.assertEqual(args.dir, "something")
40 |         self.assertEqual(args.verbose, 0)
41 |         self.assertEqual(args.func, ScanCacheCommand)
42 | 
43 |     def test_scan_cache_ultra_verbose(self) -> None:
44 |         """Test `huggingface-cli scan-cache -vvv`."""
45 |         args = self.parser.parse_args(["scan-cache", "-vvv"])
46 |         self.assertEqual(args.dir, None)
47 |         self.assertEqual(args.verbose, 3)
48 |         self.assertEqual(args.func, ScanCacheCommand)
49 | 
50 |     def test_delete_cache_with_dir(self) -> None:
51 |         """Test `huggingface-cli delete-cache --dir something`."""
52 |         args = self.parser.parse_args(["delete-cache", "--dir", "something"])
53 |         self.assertEqual(args.dir, "something")
54 |         self.assertEqual(args.func, DeleteCacheCommand)
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib64/
 18 | parts/
 19 | sdist/
 20 | var/
 21 | wheels/
 22 | pip-wheel-metadata/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | db.sqlite3-journal
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # IPython
 80 | profile_default/
 81 | ipython_config.py
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # pipenv
 87 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 88 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 89 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 90 | #   install all needed dependencies.
 91 | #Pipfile.lock
 92 | 
 93 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 94 | __pypackages__/
 95 | 
 96 | # Celery stuff
 97 | celerybeat-schedule
 98 | celerybeat.pid
 99 | 
100 | # SageMath parsed files
101 | *.sage.py
102 | 
103 | # Environments
104 | .env
105 | .venv
106 | .venv*
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | .venv*
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | .vscode/
132 | .idea/
133 | 
134 | .DS_Store
135 | 


--------------------------------------------------------------------------------
/tests/test_utils_git_credentials.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import unittest
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | 
 7 | from huggingface_hub.constants import ENDPOINT
 8 | from huggingface_hub.utils import run_interactive_subprocess, run_subprocess
 9 | from huggingface_hub.utils._git_credential import (
10 |     list_credential_helpers,
11 |     set_git_credential,
12 |     unset_git_credential,
13 | )
14 | 
15 | 
16 | STORE_AND_CACHE_HELPERS_CONFIG = """
17 | [credential]
18 |     helper = store
19 |     helper = cache --timeout 30000
20 | """
21 | 
22 | 
23 | @pytest.mark.usefixtures("fx_cache_dir")
24 | class TestGitCredentials(unittest.TestCase):
25 |     cache_dir: Path
26 | 
27 |     def setUp(self):
28 |         """Initialize and configure a local repo.
29 | 
30 |         Avoid to configure git helpers globally on a contributor's machine.
31 |         """
32 |         run_subprocess("git init", folder=self.cache_dir)
33 |         with (self.cache_dir / ".git" / "config").open("w") as f:
34 |             f.write(STORE_AND_CACHE_HELPERS_CONFIG)
35 | 
36 |     def test_list_credential_helpers(self) -> None:
37 |         helpers = list_credential_helpers(folder=self.cache_dir)
38 |         self.assertIn("cache", helpers)
39 |         self.assertIn("store", helpers)
40 | 
41 |     def test_set_and_unset_git_credential(self) -> None:
42 |         username = "hf_test_user_" + str(round(time.time()))  # make username unique
43 | 
44 |         # Set credentials
45 |         set_git_credential(
46 |             token="hf_test_token", username=username, folder=self.cache_dir
47 |         )
48 | 
49 |         # Check credentials are stored
50 |         with run_interactive_subprocess(
51 |             "git credential fill", folder=self.cache_dir
52 |         ) as (stdin, stdout):
53 |             stdin.write(f"url={ENDPOINT}\nusername={username}\n\n")
54 |             stdin.flush()
55 |             output = stdout.read()
56 |         self.assertIn("password=hf_test_token", output)
57 | 
58 |         # Unset credentials
59 |         unset_git_credential(username=username, folder=self.cache_dir)
60 | 
61 |         # Check credentials are NOT stored
62 |         # Cannot check with `git credential fill` as it would hang forever: only
63 |         # checking `store` helper instead.
64 |         with run_interactive_subprocess(
65 |             "git credential-store get", folder=self.cache_dir
66 |         ) as (stdin, stdout):
67 |             stdin.write(f"url={ENDPOINT}\nusername={username}\n\n")
68 |             stdin.flush()
69 |             output = stdout.read()
70 |         self.assertEqual("", output)
71 | 


--------------------------------------------------------------------------------
/tests/test_utils_cli.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | from unittest import mock
 4 | 
 5 | from huggingface_hub.commands._cli_utils import ANSI, tabulate
 6 | 
 7 | 
 8 | class TestCLIUtils(unittest.TestCase):
 9 |     @mock.patch.dict(os.environ, {}, clear=True)
10 |     def test_ansi_utils(self) -> None:
11 |         """Test `ANSI` works as expected."""
12 |         self.assertEqual(
13 |             ANSI.bold("this is bold"),
14 |             "\x1b[1mthis is bold\x1b[0m",
15 |         )
16 | 
17 |         self.assertEqual(
18 |             ANSI.gray("this is gray"),
19 |             "\x1b[90mthis is gray\x1b[0m",
20 |         )
21 | 
22 |         self.assertEqual(
23 |             ANSI.red("this is red"),
24 |             "\x1b[1m\x1b[31mthis is red\x1b[0m",
25 |         )
26 | 
27 |         self.assertEqual(
28 |             ANSI.gray(ANSI.bold("this is bold and grey")),
29 |             "\x1b[90m\x1b[1mthis is bold and grey\x1b[0m\x1b[0m",
30 |         )
31 | 
32 |     @mock.patch.dict(os.environ, {"NO_COLOR": "1"}, clear=True)
33 |     def test_ansi_no_color(self) -> None:
34 |         """Test `ANSI` respects `NO_COLOR` env var."""
35 | 
36 |         self.assertEqual(
37 |             ANSI.bold("this is bold"),
38 |             "this is bold",
39 |         )
40 | 
41 |         self.assertEqual(
42 |             ANSI.gray("this is gray"),
43 |             "this is gray",
44 |         )
45 | 
46 |         self.assertEqual(
47 |             ANSI.red("this is red"),
48 |             "this is red",
49 |         )
50 | 
51 |         self.assertEqual(
52 |             ANSI.gray(ANSI.bold("this is bold and grey")),
53 |             "this is bold and grey",
54 |         )
55 | 
56 |     def test_tabulate_utility(self) -> None:
57 |         """Test `tabulate` works as expected."""
58 |         rows = [[1, 2, 3], ["a very long value", "foo", "bar"], ["", 123, 456]]
59 |         headers = ["Header 1", "something else", "a third column"]
60 |         self.assertEqual(
61 |             tabulate(rows=rows, headers=headers),
62 |             "Header 1          something else a third column \n"
63 |             "----------------- -------------- -------------- \n"
64 |             "                1              2              3 \n"
65 |             "a very long value foo            bar            \n"
66 |             "                             123            456 ",
67 |         )
68 | 
69 |     def test_tabulate_utility_with_too_short_row(self) -> None:
70 |         """
71 |         Test `tabulate` throw IndexError when a row has less values than the header
72 |         list.
73 |         """
74 |         self.assertRaises(
75 |             IndexError,
76 |             tabulate,
77 |             rows=[[1]],
78 |             headers=["Header 1", "Header 2"],
79 |         )
80 | 


--------------------------------------------------------------------------------
/contrib/README.md:
--------------------------------------------------------------------------------
 1 | # Contrib test suite
 2 | 
 3 | The contrib folder contains simple end-to-end scripts to test integration of `huggingface_hub` in downstream libraries. The main goal is to proactively notice breaking changes and deprecation warnings.
 4 | 
 5 | ## Add tests for a new library
 6 | 
 7 | To add another contrib lib, one must:
 8 | 1. Create a subfolder with the lib name. Example: `./contrib/transformers`
 9 | 2. Create a `requirements.txt` file specific to this lib. Example `./contrib/transformers/requirements.txt`
10 | 3. Implements tests for this lib. Example: `./contrib/transformers/test_push_to_hub.py`
11 | 4. Run `make style`. This will edit both `makefile` and `.github/workflows/contrib-tests.yml` to add the lib to list of libs to test. Make sure changes are accurate before committing.
12 | 
13 | ## Run contrib tests on CI
14 | 
15 | Contrib tests can be [manually triggered in GitHub](https://github.com/huggingface/huggingface_hub/actions) with the `Contrib tests` workflow.
16 | 
17 | Tests are not run in the default test suite (for each PR) as this would slow down development process. The goal is to notice breaking changes, not to avoid them. In particular, it is interesting to trigger it before a release to make sure it will not cause too much friction.
18 | 
19 | ## Run contrib tests locally
20 | 
21 | Tests must be ran individually for each dependent library. Here is an example to run
22 | `timm` tests. Tests are separated to avoid conflicts between version dependencies.
23 | 
24 | ### Run all contrib tests
25 | 
26 | Before running tests, a virtual env must be setup for each contrib library. To do so, run:
27 | 
28 | ```sh
29 | # Run setup in parallel to save time 
30 | make contrib_setup -j4
31 | ```
32 | 
33 | Then tests can be run
34 | 
35 | ```sh
36 | # Optional: -j4 to run in parallel. Output will be messy in that case.
37 | make contrib_test -j4
38 | ```
39 | 
40 | Optionally, it is possible to setup and run all tests in a single command. However this
41 | take more time as you don't need to setup the venv each time you run tests.
42 | 
43 | ```sh
44 | make contrib -j4
45 | ```
46 | 
47 | Finally, it is possible to delete all virtual envs to get a fresh start for contrib tests.
48 | After running this command, `contrib_setup` will have to re-download/re-install all dependencies.
49 | 
50 | ```
51 | make contrib_clear
52 | ```
53 | 
54 | ### Run contrib tests for a single lib
55 | 
56 | Instead of running tests for all contrib libraries, you can run a specific lib:
57 | 
58 | ```sh
59 | # Setup timm tests
60 | make contrib_setup_timm
61 | 
62 | # Run timm tests
63 | make contrib_test_timm
64 | 
65 | # (or) Setup and run timm tests at once
66 | make contrib_timm
67 | 
68 | # Delete timm virtualenv if corrupted
69 | make contrib_clear_timm
70 | ```
71 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/hf_api.mdx:
--------------------------------------------------------------------------------
  1 | # Hugging Face Hub API
  2 | 
  3 | Below is the documentation for the `HfApi` class, which serves as a Python wrapper for the Hugging Face
  4 | Hub's API.
  5 | 
  6 | All methods from the `HfApi` are also accessible from the package's root directly, both approaches are detailed
  7 | below.
  8 | 
  9 | The following approach uses the method from the root of the package:
 10 | 
 11 | ```python
 12 | from huggingface_hub import list_models
 13 | 
 14 | models = list_models()
 15 | ```
 16 | 
 17 | The following approach uses the `HfApi` class:
 18 | 
 19 | ```python
 20 | from huggingface_hub import HfApi
 21 | 
 22 | hf_api = HfApi()
 23 | models = hf_api.list_models()
 24 | ```
 25 | 
 26 | Using the [`HfApi`] class directly enables you to configure the client. In particular, a
 27 | token can be passed to be authenticated in all API calls. This is different than
 28 | `huggingface-cli login` or [`login`] as the token is not persisted on the machine. One
 29 | can also specify a different endpoint than the Hugging Face's Hub (for example to interact
 30 | with a Private Hub).
 31 | 
 32 | ```py
 33 | from huggingface_hub import HfApi
 34 | 
 35 | hf_api = HfApi(
 36 |     endpoint="https://huggingface.co", # Can be a Private Hub endpoint.
 37 |     token="hf_xxx", # Token is not persisted on the machine.
 38 | )
 39 | ```
 40 | 
 41 | ### HfApi
 42 | 
 43 | [[autodoc]] HfApi
 44 | 
 45 | ### ModelInfo
 46 | 
 47 | [[autodoc]] huggingface_hub.hf_api.ModelInfo
 48 | 
 49 | ### DatasetInfo
 50 | 
 51 | [[autodoc]] huggingface_hub.hf_api.DatasetInfo
 52 | 
 53 | ### SpaceInfo
 54 | 
 55 | [[autodoc]] huggingface_hub.hf_api.SpaceInfo
 56 | 
 57 | ### RepoFile
 58 | 
 59 | [[autodoc]] huggingface_hub.hf_api.RepoFile
 60 | 
 61 | ### CommitInfo
 62 | 
 63 | [[autodoc]] huggingface_hub.hf_api.CommitInfo
 64 | 
 65 | ## `create_commit` API
 66 | 
 67 | Below are the supported values for [`CommitOperation`]:
 68 | 
 69 | [[autodoc]] CommitOperationAdd
 70 | 
 71 | [[autodoc]] CommitOperationDelete
 72 | 
 73 | ## Hugging Face local storage
 74 | 
 75 | `huggingface_hub` stores the authentication information locally so that it may be re-used in subsequent
 76 | methods.
 77 | 
 78 | It does this using the [`HfFolder`] utility, which saves data at the root of the user.
 79 | 
 80 | [[autodoc]] HfFolder
 81 | 
 82 | ## Filtering helpers
 83 | 
 84 | Some helpers to filter repositories on the Hub are available in the `huggingface_hub` package.
 85 | 
 86 | ### DatasetFilter
 87 | 
 88 | [[autodoc]] DatasetFilter
 89 | 
 90 | ### ModelFilter
 91 | 
 92 | [[autodoc]] ModelFilter
 93 | 
 94 | ### DatasetSearchArguments
 95 | 
 96 | [[autodoc]] DatasetSearchArguments
 97 | 
 98 | ### ModelSearchArguments
 99 | 
100 | [[autodoc]] ModelSearchArguments
101 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | #!/usr/bin/env python
 3 | # coding=utf-8
 4 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License
17 | 
18 | from . import tqdm as _tqdm  # _tqdm is the module
19 | from ._cache_assets import cached_assets_path
20 | from ._cache_manager import (
21 |     CachedFileInfo,
22 |     CachedRepoInfo,
23 |     CachedRevisionInfo,
24 |     CacheNotFound,
25 |     CorruptedCacheException,
26 |     DeleteCacheStrategy,
27 |     HFCacheInfo,
28 |     scan_cache_dir,
29 | )
30 | from ._chunk_utils import chunk_iterable
31 | from ._datetime import parse_datetime
32 | from ._errors import (
33 |     BadRequestError,
34 |     EntryNotFoundError,
35 |     GatedRepoError,
36 |     HfHubHTTPError,
37 |     LocalEntryNotFoundError,
38 |     RepositoryNotFoundError,
39 |     RevisionNotFoundError,
40 |     hf_raise_for_status,
41 | )
42 | from ._fixes import yaml_dump
43 | from ._git_credential import (
44 |     erase_from_credential_store,
45 |     list_credential_helpers,
46 |     read_from_credential_store,
47 |     set_git_credential,
48 |     unset_git_credential,
49 |     write_to_credential_store,
50 | )
51 | from ._headers import build_hf_headers, get_token_to_send
52 | from ._hf_folder import HfFolder
53 | from ._http import http_backoff
54 | from ._paths import filter_repo_objects
55 | from ._runtime import (
56 |     dump_environment_info,
57 |     get_fastai_version,
58 |     get_fastcore_version,
59 |     get_graphviz_version,
60 |     get_hf_hub_version,
61 |     get_jinja_version,
62 |     get_pydot_version,
63 |     get_python_version,
64 |     get_tf_version,
65 |     get_torch_version,
66 |     is_fastai_available,
67 |     is_fastcore_available,
68 |     is_google_colab,
69 |     is_graphviz_available,
70 |     is_jinja_available,
71 |     is_notebook,
72 |     is_pydot_available,
73 |     is_tf_available,
74 |     is_torch_available,
75 | )
76 | from ._subprocess import run_interactive_subprocess, run_subprocess
77 | from ._validators import (
78 |     HFValidationError,
79 |     smoothly_deprecate_use_auth_token,
80 |     validate_hf_hub_args,
81 |     validate_repo_id,
82 | )
83 | from .tqdm import (
84 |     are_progress_bars_disabled,
85 |     disable_progress_bars,
86 |     enable_progress_bars,
87 |     tqdm,
88 | )
89 | 


--------------------------------------------------------------------------------
/tests/test_utils_pagination.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest.mock import Mock, call, patch
 3 | 
 4 | from huggingface_hub.utils._pagination import paginate
 5 | 
 6 | from .testing_utils import handle_injection_in_test
 7 | 
 8 | 
 9 | class TestPagination(unittest.TestCase):
10 |     @patch("huggingface_hub.utils._pagination.requests.get")
11 |     @patch("huggingface_hub.utils._pagination.hf_raise_for_status")
12 |     @handle_injection_in_test
13 |     def test_mocked_paginate(
14 |         self, mock_get: Mock, mock_hf_raise_for_status: Mock
15 |     ) -> None:
16 |         mock_params = Mock()
17 |         mock_headers = Mock()
18 | 
19 |         # Simulate page 1
20 |         mock_response_page_1 = Mock()
21 |         mock_response_page_1.json.return_value = [1, 2, 3]
22 |         mock_response_page_1.links = {"next": {"url": "url_p2"}}
23 | 
24 |         # Simulate page 2
25 |         mock_response_page_2 = Mock()
26 |         mock_response_page_2.json.return_value = [4, 5, 6]
27 |         mock_response_page_2.links = {"next": {"url": "url_p3"}}
28 | 
29 |         # Simulate page 3
30 |         mock_response_page_3 = Mock()
31 |         mock_response_page_3.json.return_value = [7, 8]
32 |         mock_response_page_3.links = {}
33 | 
34 |         # Mock response
35 |         mock_get.side_effect = [
36 |             mock_response_page_1,
37 |             mock_response_page_2,
38 |             mock_response_page_3,
39 |         ]
40 | 
41 |         results = paginate("url", params=mock_params, headers=mock_headers)
42 | 
43 |         # Requests are made only when generator is yielded
44 |         self.assertEqual(mock_get.call_count, 0)
45 | 
46 |         # Results after concatenating pages
47 |         self.assertListEqual(list(results), [1, 2, 3, 4, 5, 6, 7, 8])
48 | 
49 |         # All pages requested: 3 requests, 3 raise for status
50 |         self.assertEqual(mock_get.call_count, 3)
51 |         self.assertEqual(mock_hf_raise_for_status.call_count, 3)
52 | 
53 |         # Params not passed to next pages
54 |         self.assertListEqual(
55 |             mock_get.call_args_list,
56 |             [
57 |                 call("url", params=mock_params, headers=mock_headers),
58 |                 call("url_p2", headers=mock_headers),
59 |                 call("url_p3", headers=mock_headers),
60 |             ],
61 |         )
62 | 
63 |     def test_paginate_github_api(self) -> None:
64 |         # Real test: paginate over huggingface repos on Github
65 |         # Use enumerate and stop after first page to avoid loading all repos
66 |         for num, _ in enumerate(
67 |             paginate(
68 |                 "https://api.github.com/orgs/huggingface/repos?limit=4",
69 |                 params={},
70 |                 headers={},
71 |             )
72 |         ):
73 |             if num == 6:
74 |                 break
75 |         else:
76 |             self.fail("Did not get more than 6 repos")
77 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/utils/_datetime.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022-present, the HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Contains utilities to handle datetimes in Huggingface Hub."""
16 | from datetime import datetime, timedelta, timezone
17 | 
18 | 
19 | # Local machine offset compared to UTC.
20 | # Taken from https://stackoverflow.com/a/3168394.
21 | # `utcoffset()` returns `None` if no offset -> empty timedelta.
22 | UTC_OFFSET = datetime.now(timezone.utc).astimezone().utcoffset() or timedelta()
23 | 
24 | 
25 | def parse_datetime(date_string: str) -> datetime:
26 |     """
27 |     Parses a date_string returned from the server to a datetime object.
28 | 
29 |     This parser is a weak-parser is the sense that it handles only a single format of
30 |     date_string. It is expected that the server format will never change. The
31 |     implementation depends only on the standard lib to avoid an external dependency
32 |     (python-dateutil). See full discussion about this decision on PR:
33 |     https://github.com/huggingface/huggingface_hub/pull/999.
34 | 
35 |     Example:
36 |         ```py
37 |         > parse_datetime('2022-08-19T07:19:38.123Z')
38 |         datetime.datetime(2022, 8, 19, 7, 19, 38, 123000, tzinfo=timezone.utc)
39 |         ```
40 | 
41 |     Args:
42 |         date_string (`str`):
43 |             A string representing a datetime returned by the Hub server.
44 |             String is expected to follow '%Y-%m-%dT%H:%M:%S.%fZ' pattern.
45 | 
46 |     Returns:
47 |         A python datetime object.
48 | 
49 |     Raises:
50 |         :class:`ValueError`:
51 |             If `date_string` cannot be parsed.
52 |     """
53 |     try:
54 |         # Datetime ending with a Z means "UTC". Here we parse the date as local machine
55 |         # timezone and then move it to the appropriate UTC timezone.
56 |         # See https://en.wikipedia.org/wiki/ISO_8601#Coordinated_Universal_Time_(UTC)
57 |         # Taken from https://stackoverflow.com/a/3168394.
58 | 
59 |         dt = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
60 |         dt += UTC_OFFSET  # By default, datetime is not timezoned -> move to UTC time
61 |         return dt.astimezone(timezone.utc)  # Set explicit timezone
62 |     except ValueError as e:
63 |         raise ValueError(
64 |             f"Cannot parse '{date_string}' as a datetime. Date string is expected to"
65 |             " follow '%Y-%m-%dT%H:%M:%S.%fZ' pattern."
66 |         ) from e
67 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # `huggingface_hub`
 2 | 
 3 | <a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
 4 | <a href="https://codecov.io/gh/huggingface/huggingface_hub"><img alt="Code coverage" src="https://codecov.io/gh/huggingface/huggingface_hub/branch/main/graph/badge.svg?token=RXP95LE2XL"></a>
 5 | <a href="https://github.com/huggingface/huggingface_hub/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/huggingface_hub.svg"></a>
 6 | <a href="https://github.com/huggingface/huggingface_hub"><img alt="Documentation" src="https://img.shields.io/pypi/pyversions/huggingface_hub.svg"></a>
 7 | <a href="https://huggingface.co/docs/huggingface_hub/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/huggingface_hub/index.svg?down_color=red&down_message=offline&up_message=online&label=doc"></a>
 8 | 
 9 | ## Welcome to the huggingface_hub library
10 | 
11 | 
12 | The `huggingface_hub` is a client library to interact with the Hugging Face Hub. The Hugging Face Hub is a platform with over 90K models, 14K datasets, and 12K demos in which people can easily collaborate in their ML workflows. The Hub works as a central place where anyone can share, explore, discover, and experiment with open-source Machine Learning.
13 | 
14 | With `huggingface_hub`, you can easily download and upload models, datasets, and Spaces. You can extract useful information from the Hub, and do much more. Some example use cases:
15 | * Downloading and caching files from a Hub repository.
16 | * Creating repositories and uploading an updated model every few epochs.
17 | * Extract metadata from all models that match certain criteria (e.g. models for `text-classification`).
18 | * List all files from a specific repository.
19 | 
20 | Read all about it in [the library documentation](https://huggingface.co/docs/huggingface_hub).
21 | 
22 | <br>
23 | 
24 | ## Integrating to the Hub.
25 | 
26 | We're partnering with cool open source ML libraries to provide free model hosting and versioning. You can find the existing integrations [here](https://huggingface.co/docs/hub/libraries).
27 | 
28 | The advantages are:
29 | 
30 | - Free model or dataset hosting for libraries and their users.
31 | - Built-in file versioning, even with very large files, thanks to a git-based approach.
32 | - Hosted inference API for all models publicly available.
33 | - In-browser widgets to play with the uploaded models.
34 | - Anyone can upload a new model for your library, they just need to add the corresponding tag for the model to be discoverable.
35 | - Fast downloads! We use Cloudfront (a CDN) to geo-replicate downloads so they're blazing fast from anywhere on the globe.
36 | - Usage stats and more features to come.
37 | 
38 | If you would like to integrate your library, feel free to open an issue to begin the discussion. We wrote a [step-by-step guide](https://huggingface.co/docs/hub/adding-a-library) with ❤️ showing how to do this integration.
39 | 
40 | <br>
41 | 
42 | ## Feedback (feature requests, bugs, etc.) is super welcome 💙💚💛💜♥️🧡
43 | 


--------------------------------------------------------------------------------
/tests/test_utils_assets.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from pathlib import Path
 3 | from unittest.mock import patch
 4 | 
 5 | import pytest
 6 | 
 7 | from huggingface_hub import cached_assets_path
 8 | 
 9 | 
10 | @pytest.mark.usefixtures("fx_cache_dir")
11 | class CacheAssetsTest(unittest.TestCase):
12 |     cache_dir: Path
13 | 
14 |     def test_cached_assets_path_with_namespace_and_subfolder(self) -> None:
15 |         expected_path = self.cache_dir / "datasets" / "SQuAD" / "download"
16 |         self.assertFalse(expected_path.is_dir())
17 | 
18 |         path = cached_assets_path(
19 |             library_name="datasets",
20 |             namespace="SQuAD",
21 |             subfolder="download",
22 |             assets_dir=self.cache_dir,
23 |         )
24 | 
25 |         self.assertEqual(path, expected_path)  # Path is generated
26 |         self.assertTrue(path.is_dir())  # And dir is created
27 | 
28 |     def test_cached_assets_path_without_subfolder(self) -> None:
29 |         path = cached_assets_path(
30 |             library_name="datasets", namespace="SQuAD", assets_dir=self.cache_dir
31 |         )
32 |         self.assertEqual(path, self.cache_dir / "datasets" / "SQuAD" / "default")
33 |         self.assertTrue(path.is_dir())
34 | 
35 |     def test_cached_assets_path_without_namespace(self) -> None:
36 |         path = cached_assets_path(
37 |             library_name="datasets", subfolder="download", assets_dir=self.cache_dir
38 |         )
39 |         self.assertEqual(path, self.cache_dir / "datasets" / "default" / "download")
40 |         self.assertTrue(path.is_dir())
41 | 
42 |     def test_cached_assets_path_without_namespace_and_subfolder(self) -> None:
43 |         path = cached_assets_path(library_name="datasets", assets_dir=self.cache_dir)
44 |         self.assertEqual(path, self.cache_dir / "datasets" / "default" / "default")
45 |         self.assertTrue(path.is_dir())
46 | 
47 |     def test_cached_assets_path_forbidden_symbols(self) -> None:
48 |         path = cached_assets_path(
49 |             library_name="ReAlLy dumb",
50 |             namespace="user/repo_name",
51 |             subfolder="this is/not\\clever",
52 |             assets_dir=self.cache_dir,
53 |         )
54 |         self.assertEqual(
55 |             path,
56 |             self.cache_dir
57 |             / "ReAlLy--dumb"
58 |             / "user--repo_name"
59 |             / "this--is--not--clever",
60 |         )
61 |         self.assertTrue(path.is_dir())
62 | 
63 |     def test_cached_assets_path_default_assets_dir(self) -> None:
64 |         with patch(
65 |             "huggingface_hub.utils._cache_assets.HUGGINGFACE_ASSETS_CACHE",
66 |             self.cache_dir,
67 |         ):  # Uses environment variable from HUGGINGFACE_ASSETS_CACHE
68 |             self.assertEqual(
69 |                 cached_assets_path(library_name="datasets"),
70 |                 self.cache_dir / "datasets" / "default" / "default",
71 |             )
72 | 
73 |     def test_cached_assets_path_is_a_file(self) -> None:
74 |         expected_path = self.cache_dir / "datasets" / "default" / "default"
75 |         expected_path.parent.mkdir(parents=True)
76 |         expected_path.touch()  # this should be the generated folder but is a file !
77 | 
78 |         with self.assertRaises(ValueError):
79 |             cached_assets_path(library_name="datasets", assets_dir=self.cache_dir)
80 | 
81 |     def test_cached_assets_path_parent_is_a_file(self) -> None:
82 |         expected_path = self.cache_dir / "datasets" / "default" / "default"
83 |         expected_path.parent.parent.mkdir(parents=True)
84 |         expected_path.parent.touch()  # cannot create folder as a parent is a file !
85 | 
86 |         with self.assertRaises(ValueError):
87 |             cached_assets_path(library_name="datasets", assets_dir=self.cache_dir)
88 | 


--------------------------------------------------------------------------------
/.github/workflows/python-tests.yml:
--------------------------------------------------------------------------------
  1 | name: Python tests
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - main
  7 |       - ci_*
  8 |     paths-ignore:
  9 |       - "docs/**"
 10 |   pull_request:
 11 |     types: [assigned, opened, synchronize, reopened]
 12 |     paths-ignore:
 13 |       - "docs/**"
 14 | 
 15 | jobs:
 16 |   build:
 17 |     runs-on: ubuntu-latest
 18 |     strategy:
 19 |       fail-fast: false
 20 |       matrix:
 21 |         python-version: ["3.7", "3.11"]
 22 |         test_name:
 23 |           [
 24 |             "Repository only",
 25 |             "Everything else",
 26 |             "torch",
 27 |           ]
 28 |         include:
 29 |           - python-version: "3.11" # LFS not ran on 3.7
 30 |             test_name: "lfs"
 31 |           - python-version: "3.7"
 32 |             test_name: "fastai"  # fastai not supported on 3.11 -> test it on 3.10
 33 |           - python-version: "3.10"
 34 |             test_name: "fastai"
 35 |           - python-version: "3.7"
 36 |             test_name: "tensorflow" # Tensorflow not supported on 3.11 -> test it on 3.10
 37 |           - python-version: "3.10"
 38 |             test_name: "tensorflow"
 39 | 
 40 |     steps:
 41 |       - uses: actions/checkout@v2
 42 |       - name: Set up Python ${{ matrix.python-version }}
 43 |         uses: actions/setup-python@v2
 44 |         with:
 45 |           python-version: ${{ matrix.python-version }}
 46 | 
 47 |       # Install dependencies
 48 |       - name: Configure and install dependencies
 49 |         run: |
 50 |           pip install --upgrade pip
 51 |           pip install .[testing]
 52 | 
 53 |           case "${{ matrix.test_name }}" in
 54 | 
 55 |             "Repository only" | "Everything else")
 56 |               sudo apt install -y libsndfile1-dev
 57 |               ;;
 58 | 
 59 |             lfs)
 60 |               git config --global user.email "ci@dummy.com"
 61 |               git config --global user.name "ci"
 62 |               ;;
 63 | 
 64 |             fastai | torch)
 65 |               pip install .[${{ matrix.test_name }}]
 66 |               ;;
 67 | 
 68 |             tensorflow)
 69 |               sudo apt install -y graphviz
 70 |               pip install .[tensorflow]
 71 |               ;;
 72 | 
 73 |           esac
 74 | 
 75 |       # Run tests
 76 |       - name: Run tests
 77 |         working-directory: ./src # For code coverage to work
 78 |         run: |
 79 |           PYTEST="python -m pytest --cov=./huggingface_hub --cov-report=xml:../coverage.xml"
 80 | 
 81 |           case "${{ matrix.test_name }}" in
 82 | 
 83 |             "Repository only")
 84 |               eval "$PYTEST ../tests -k 'RepositoryTest or RepositoryDatasetTest'"
 85 |               ;;
 86 | 
 87 |             "Everything else")
 88 |               eval "$PYTEST ../tests -k 'not RepositoryTest and not RepositoryDatasetTest'"
 89 |               ;;
 90 | 
 91 |             lfs)
 92 |               eval "RUN_GIT_LFS_TESTS=1 $PYTEST ../tests -k 'HfLargefilesTest'"
 93 |             ;;
 94 | 
 95 | 
 96 |             fastai)
 97 |               eval "$PYTEST ../tests/test_fastai*"
 98 |             ;;
 99 | 
100 |             tensorflow)
101 |               # Cannot be on same line since '_tf*' checks if tensorflow is NOT imported by default
102 |               eval "$PYTEST ../tests/test_tf*"
103 |               eval "$PYTEST ../tests/test_keras*"
104 |             ;;
105 | 
106 |             torch)
107 |               eval "$PYTEST ../tests/test_hubmixin*"
108 |             ;;
109 | 
110 |           esac
111 | 
112 |       # Upload code coverage
113 |       - name: Upload coverage reports to Codecov with GitHub Action
114 |         uses: codecov/codecov-action@v3
115 |         with:
116 |           files: ./coverage.xml
117 |           verbose: true
118 | 


--------------------------------------------------------------------------------
/docs/source/index.mdx:
--------------------------------------------------------------------------------
 1 | # 🤗 Hub client library
 2 | 
 3 | The `huggingface_hub` library allows you to interact with the [Hugging Face
 4 | Hub](https://hf.co), a machine learning platform for creators and collaborators.
 5 | Discover pre-trained models and datasets for your projects or play with the hundreds of
 6 | machine learning apps hosted on the Hub. You can also create and share your own models
 7 | and datasets with the community. The `huggingface_hub` library provides a simple way to
 8 | do all these things with Python.
 9 | 
10 | Read the [quick start guide](quick-start) to get up and running with the
11 | `huggingface_hub` library. You will learn how to download files from the Hub, create a
12 | repository, and upload files to the Hub. Keep reading to learn more about how to manage
13 | your repositories on the 🤗 Hub, how to interact in discussions or even how to access
14 | the Inference API.
15 | 
16 | <div class="mt-10">
17 |   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5">
18 | 
19 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./guides/overview"
20 |       ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">How-to guides</div>
21 |       <p class="text-gray-700">Practical guides to help you achieve a specific goal. Take a look at these guides to learn how to use huggingface_hub to solve real-world problems.</p>
22 |     </a>
23 | 
24 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./package_reference/overview"
25 |       ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Reference</div>
26 |       <p class="text-gray-700">Exhaustive and technical description of huggingface_hub classes and methods.</p>
27 |     </a>
28 |   </div>
29 | </div>
30 | 
31 | <!-- 
32 | <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./tutorials/overview"
33 |   ><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Tutorials</div>
34 |   <p class="text-gray-700">Learn the basics and become familiar with using huggingface_hub to programmatically interact with the 🤗 Hub!</p>
35 | </a>
36 | <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./concepts/overview"
37 |   ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Conceptual guides</div>
38 |   <p class="text-gray-700">High-level explanations for building a better understanding of important topics such as huggingface_hub philosophy, the git-based vs http-based paradigm or the cache system internals.</p>
39 | </a> -->
40 | 
41 | ## Contribute
42 | 
43 | All contributions to the `huggingface_hub` are welcomed and equally valued! 🤗 Besides
44 | adding or fixing existing issues in the code, you can also help improve the
45 | documentation by making sure it is accurate and up-to-date, help answer questions on
46 | issues, and request new features you think will improve the library. Take a look at the
47 | [contribution
48 | guide](https://github.com/huggingface/huggingface_hub/blob/main/CONTRIBUTING.md) to
49 | learn more about how to submit a new issue or feature request, how to submit a pull
50 | request, and how to test your contributions to make sure everything works as expected.
51 | 
52 | Contributors should also be respectful of our [code of
53 | conduct](https://github.com/huggingface/huggingface_hub/blob/main/CODE_OF_CONDUCT.md) to
54 | create an inclusive and welcoming collaborative space for everyone.


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | from setuptools import find_packages, setup
  2 | 
  3 | 
  4 | def get_version() -> str:
  5 |     rel_path = "src/huggingface_hub/__init__.py"
  6 |     with open(rel_path, "r") as fp:
  7 |         for line in fp.read().splitlines():
  8 |             if line.startswith("__version__"):
  9 |                 delim = '"' if '"' in line else "'"
 10 |                 return line.split(delim)[1]
 11 |     raise RuntimeError("Unable to find version string.")
 12 | 
 13 | 
 14 | install_requires = [
 15 |     "filelock",
 16 |     "requests",
 17 |     "tqdm>=4.42.1",
 18 |     "pyyaml>=5.1",
 19 |     "typing-extensions>=3.7.4.3",  # to be able to import TypeAlias
 20 |     "importlib_metadata;python_version<'3.8'",
 21 |     "packaging>=20.9",
 22 | ]
 23 | 
 24 | extras = {}
 25 | 
 26 | extras["cli"] = [
 27 |     "InquirerPy==0.3.4",
 28 |     # Note: installs `prompt-toolkit` in the background
 29 | ]
 30 | 
 31 | extras["torch"] = [
 32 |     "torch",
 33 | ]
 34 | 
 35 | extras["fastai"] = [
 36 |     "toml",
 37 |     "fastai>=2.4",
 38 |     "fastcore>=1.3.27",
 39 | ]
 40 | 
 41 | extras["tensorflow"] = ["tensorflow", "pydot", "graphviz"]
 42 | 
 43 | extras["testing"] = extras["cli"] + [
 44 |     "isort>=5.5.4",
 45 |     "jedi",
 46 |     "Jinja2",
 47 |     "pytest",
 48 |     "pytest-cov",
 49 |     "pytest-env",
 50 |     "soundfile",
 51 | ]
 52 | 
 53 | # Typing extra dependencies list is duplicated in `.pre-commit-config.yaml`
 54 | # Please make sure to update the list there when adding a new typing dependency.
 55 | extras["typing"] = [
 56 |     "types-PyYAML",
 57 |     "types-requests",
 58 |     "types-simplejson",
 59 |     "types-toml",
 60 |     "types-tqdm",
 61 |     "types-urllib3",
 62 | ]
 63 | 
 64 | extras["quality"] = [
 65 |     "black==22.3",
 66 |     "flake8>=3.8.3",
 67 |     "flake8-bugbear",
 68 |     "isort>=5.5.4",
 69 |     "mypy==0.982",
 70 | ]
 71 | 
 72 | extras["all"] = extras["testing"] + extras["quality"] + extras["typing"]
 73 | 
 74 | extras["dev"] = extras["all"]
 75 | 
 76 | 
 77 | setup(
 78 |     name="huggingface_hub",
 79 |     version=get_version(),
 80 |     author="Hugging Face, Inc.",
 81 |     author_email="julien@huggingface.co",
 82 |     description=(
 83 |         "Client library to download and publish models, datasets and other repos on the"
 84 |         " huggingface.co hub"
 85 |     ),
 86 |     long_description=open("README.md", "r", encoding="utf-8").read(),
 87 |     long_description_content_type="text/markdown",
 88 |     keywords=(
 89 |         "model-hub machine-learning models natural-language-processing deep-learning"
 90 |         " pytorch pretrained-models"
 91 |     ),
 92 |     license="Apache",
 93 |     url="https://github.com/huggingface/huggingface_hub",
 94 |     package_dir={"": "src"},
 95 |     packages=find_packages("src"),
 96 |     extras_require=extras,
 97 |     entry_points={
 98 |         "console_scripts": [
 99 |             "huggingface-cli=huggingface_hub.commands.huggingface_cli:main"
100 |         ]
101 |     },
102 |     python_requires=">=3.7.0",
103 |     install_requires=install_requires,
104 |     classifiers=[
105 |         "Intended Audience :: Developers",
106 |         "Intended Audience :: Education",
107 |         "Intended Audience :: Science/Research",
108 |         "License :: OSI Approved :: Apache Software License",
109 |         "Operating System :: OS Independent",
110 |         "Programming Language :: Python :: 3",
111 |         "Programming Language :: Python :: 3 :: Only",
112 |         "Programming Language :: Python :: 3.7",
113 |         "Programming Language :: Python :: 3.8",
114 |         "Programming Language :: Python :: 3.9",
115 |         "Programming Language :: Python :: 3.10",
116 |         "Programming Language :: Python :: 3.11",
117 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
118 |     ],
119 |     include_package_data=True,
120 | )
121 | 


--------------------------------------------------------------------------------
/tests/test_utils_paths.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from dataclasses import dataclass
  3 | from pathlib import Path
  4 | from typing import Any, Callable, List, Optional, Union
  5 | 
  6 | from huggingface_hub.utils import filter_repo_objects
  7 | 
  8 | 
  9 | @dataclass
 10 | class DummyObject:
 11 |     path: Path
 12 | 
 13 | 
 14 | DUMMY_FILES = ["not_hidden.pdf", "profile.jpg", ".hidden.pdf", ".hidden_picture.png"]
 15 | DUMMY_PATHS = [Path(path) for path in DUMMY_FILES]
 16 | DUMMY_OBJECTS = [DummyObject(path=path) for path in DUMMY_FILES]
 17 | 
 18 | 
 19 | class TestPathsUtils(unittest.TestCase):
 20 |     def test_get_all_pdfs(self) -> None:
 21 |         """Get all PDFs even hidden ones."""
 22 |         self._check(
 23 |             items=DUMMY_FILES,
 24 |             expected_items=["not_hidden.pdf", ".hidden.pdf"],
 25 |             allow_patterns=["*.pdf"],
 26 |         )
 27 | 
 28 |     def test_get_all_pdfs_except_hidden(self) -> None:
 29 |         """Get all PDFs except hidden ones."""
 30 |         self._check(
 31 |             items=DUMMY_FILES,
 32 |             expected_items=["not_hidden.pdf"],
 33 |             allow_patterns=["*.pdf"],
 34 |             ignore_patterns=[".*"],
 35 |         )
 36 | 
 37 |     def test_get_all_pdfs_except_hidden_using_single_pattern(self) -> None:
 38 |         """Get all PDFs except hidden ones, using single pattern."""
 39 |         self._check(
 40 |             items=DUMMY_FILES,
 41 |             expected_items=["not_hidden.pdf"],
 42 |             allow_patterns="*.pdf",  # not a list
 43 |             ignore_patterns=".*",  # not a list
 44 |         )
 45 | 
 46 |     def test_get_all_images(self) -> None:
 47 |         """Get all images."""
 48 |         self._check(
 49 |             items=DUMMY_FILES,
 50 |             expected_items=["profile.jpg", ".hidden_picture.png"],
 51 |             allow_patterns=["*.png", "*.jpg"],
 52 |         )
 53 | 
 54 |     def test_get_all_images_except_hidden_from_paths(self) -> None:
 55 |         """Get all images except hidden ones, from Path list."""
 56 |         self._check(
 57 |             items=DUMMY_PATHS,
 58 |             expected_items=[Path("profile.jpg")],
 59 |             allow_patterns=["*.png", "*.jpg"],
 60 |             ignore_patterns=".*",
 61 |         )
 62 | 
 63 |     def test_get_all_images_except_hidden_from_objects(self) -> None:
 64 |         """Get all images except hidden ones, from object list."""
 65 |         self._check(
 66 |             items=DUMMY_OBJECTS,
 67 |             expected_items=[DummyObject(path="profile.jpg")],
 68 |             allow_patterns=["*.png", "*.jpg"],
 69 |             ignore_patterns=".*",
 70 |             key=lambda x: x.path,
 71 |         )
 72 | 
 73 |     def test_filter_objects_key_not_provided(self) -> None:
 74 |         """Test ValueError is raised if filtering non-string objects."""
 75 |         with self.assertRaisesRegex(ValueError, "Please provide `key` argument"):
 76 |             list(
 77 |                 filter_repo_objects(
 78 |                     items=DUMMY_OBJECTS,
 79 |                     allow_patterns=["*.png", "*.jpg"],
 80 |                     ignore_patterns=".*",
 81 |                 )
 82 |             )
 83 | 
 84 |     def _check(
 85 |         self,
 86 |         items: List[Any],
 87 |         expected_items: List[Any],
 88 |         allow_patterns: Optional[Union[List[str], str]] = None,
 89 |         ignore_patterns: Optional[Union[List[str], str]] = None,
 90 |         key: Optional[Callable[[Any], str]] = None,
 91 |     ) -> None:
 92 |         """Run `filter_repo_objects` and check output against expected result."""
 93 |         self.assertListEqual(
 94 |             list(
 95 |                 filter_repo_objects(
 96 |                     items=items,
 97 |                     allow_patterns=allow_patterns,
 98 |                     ignore_patterns=ignore_patterns,
 99 |                     key=key,
100 |                 )
101 |             ),
102 |             expected_items,
103 |         )
104 | 


--------------------------------------------------------------------------------
/tests/test_commit_api.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from huggingface_hub._commit_api import (
 4 |     CommitOperationAdd,
 5 |     CommitOperationDelete,
 6 |     warn_on_overwriting_operations,
 7 | )
 8 | 
 9 | 
10 | class TestCommitOperationDelete(unittest.TestCase):
11 |     def test_implicit_file(self):
12 |         self.assertFalse(CommitOperationDelete(path_in_repo="path/to/file").is_folder)
13 |         self.assertFalse(
14 |             CommitOperationDelete(path_in_repo="path/to/file.md").is_folder
15 |         )
16 | 
17 |     def test_implicit_folder(self):
18 |         self.assertTrue(CommitOperationDelete(path_in_repo="path/to/folder/").is_folder)
19 |         self.assertTrue(
20 |             CommitOperationDelete(path_in_repo="path/to/folder.md/").is_folder
21 |         )
22 | 
23 |     def test_explicit_file(self):
24 |         # Weird case: if user explicitly set as file (`is_folder`=False) but path has a
25 |         # trailing "/" => user input has priority
26 |         self.assertFalse(
27 |             CommitOperationDelete(
28 |                 path_in_repo="path/to/folder/", is_folder=False
29 |             ).is_folder
30 |         )
31 |         self.assertFalse(
32 |             CommitOperationDelete(
33 |                 path_in_repo="path/to/folder.md/", is_folder=False
34 |             ).is_folder
35 |         )
36 | 
37 |     def test_explicit_folder(self):
38 |         # No need for the trailing "/" is `is_folder` explicitly passed
39 |         self.assertTrue(
40 |             CommitOperationDelete(
41 |                 path_in_repo="path/to/folder", is_folder=True
42 |             ).is_folder
43 |         )
44 |         self.assertTrue(
45 |             CommitOperationDelete(
46 |                 path_in_repo="path/to/folder.md", is_folder=True
47 |             ).is_folder
48 |         )
49 | 
50 |     def test_is_folder_wrong_value(self):
51 |         with self.assertRaises(ValueError):
52 |             CommitOperationDelete(path_in_repo="path/to/folder", is_folder="any value")
53 | 
54 | 
55 | class TestWarnOnOverwritingOperations(unittest.TestCase):
56 | 
57 |     add_file_ab = CommitOperationAdd(path_in_repo="a/b.txt", path_or_fileobj=b"data")
58 |     add_file_abc = CommitOperationAdd(path_in_repo="a/b/c.md", path_or_fileobj=b"data")
59 |     add_file_abd = CommitOperationAdd(path_in_repo="a/b/d.md", path_or_fileobj=b"data")
60 |     update_file_abc = CommitOperationAdd(
61 |         path_in_repo="a/b/c.md", path_or_fileobj=b"updated data"
62 |     )
63 |     delete_file_abc = CommitOperationDelete(path_in_repo="a/b/c.md")
64 |     delete_folder_a = CommitOperationDelete(path_in_repo="a/")
65 |     delete_folder_e = CommitOperationDelete(path_in_repo="e/")
66 | 
67 |     def test_no_overwrite(self) -> None:
68 |         warn_on_overwriting_operations(
69 |             [
70 |                 self.add_file_ab,
71 |                 self.add_file_abc,
72 |                 self.add_file_abd,
73 |                 self.delete_folder_e,
74 |             ]
75 |         )
76 | 
77 |     def test_add_then_update_file(self) -> None:
78 |         with self.assertWarns(UserWarning):
79 |             warn_on_overwriting_operations([self.add_file_abc, self.update_file_abc])
80 | 
81 |     def test_add_then_delete_file(self) -> None:
82 |         with self.assertWarns(UserWarning):
83 |             warn_on_overwriting_operations([self.add_file_abc, self.delete_file_abc])
84 | 
85 |     def test_add_then_delete_folder(self) -> None:
86 |         with self.assertWarns(UserWarning):
87 |             warn_on_overwriting_operations([self.add_file_abc, self.delete_folder_a])
88 | 
89 |         with self.assertWarns(UserWarning):
90 |             warn_on_overwriting_operations([self.add_file_ab, self.delete_folder_a])
91 | 
92 |     def test_delete_file_then_add(self) -> None:
93 |         warn_on_overwriting_operations([self.delete_file_abc, self.add_file_abc])
94 | 
95 |     def test_delete_folder_then_add(self) -> None:
96 |         warn_on_overwriting_operations(
97 |             [self.delete_folder_a, self.add_file_ab, self.add_file_abc]
98 |         )
99 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/constants.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | from typing import Optional
  4 | 
  5 | 
  6 | # Possible values for env variables
  7 | 
  8 | ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
  9 | ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
 10 | 
 11 | 
 12 | def _is_true(value: Optional[str]) -> bool:
 13 |     if value is None:
 14 |         return False
 15 |     return value.upper() in ENV_VARS_TRUE_VALUES
 16 | 
 17 | 
 18 | def _is_true_or_auto(value: Optional[str]) -> bool:
 19 |     if value is None:
 20 |         return False
 21 |     return value.upper() in ENV_VARS_TRUE_AND_AUTO_VALUES
 22 | 
 23 | 
 24 | # Constants for file downloads
 25 | 
 26 | PYTORCH_WEIGHTS_NAME = "pytorch_model.bin"
 27 | TF2_WEIGHTS_NAME = "tf_model.h5"
 28 | TF_WEIGHTS_NAME = "model.ckpt"
 29 | FLAX_WEIGHTS_NAME = "flax_model.msgpack"
 30 | CONFIG_NAME = "config.json"
 31 | REPOCARD_NAME = "README.md"
 32 | 
 33 | # Git-related constants
 34 | 
 35 | DEFAULT_REVISION = "main"
 36 | REGEX_COMMIT_OID = re.compile(r"[A-Fa-f0-9]{5,40}")
 37 | 
 38 | HUGGINGFACE_CO_URL_HOME = "https://huggingface.co/"
 39 | 
 40 | _staging_mode = _is_true(os.environ.get("HUGGINGFACE_CO_STAGING"))
 41 | 
 42 | ENDPOINT = os.getenv("HF_ENDPOINT") or (
 43 |     "https://hub-ci.huggingface.co" if _staging_mode else "https://huggingface.co"
 44 | )
 45 | 
 46 | HUGGINGFACE_CO_URL_TEMPLATE = ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}"
 47 | HUGGINGFACE_HEADER_X_REPO_COMMIT = "X-Repo-Commit"
 48 | HUGGINGFACE_HEADER_X_LINKED_ETAG = "X-Linked-Etag"
 49 | HUGGINGFACE_HEADER_X_LINKED_SIZE = "X-Linked-Size"
 50 | 
 51 | REPO_ID_SEPARATOR = "--"
 52 | # ^ this substring is not allowed in repo_ids on hf.co
 53 | # and is the canonical one we use for serialization of repo ids elsewhere.
 54 | 
 55 | 
 56 | REPO_TYPE_DATASET = "dataset"
 57 | REPO_TYPE_SPACE = "space"
 58 | REPO_TYPE_MODEL = "model"
 59 | REPO_TYPES = [None, REPO_TYPE_MODEL, REPO_TYPE_DATASET, REPO_TYPE_SPACE]
 60 | SPACES_SDK_TYPES = ["gradio", "streamlit", "static"]
 61 | 
 62 | REPO_TYPES_URL_PREFIXES = {
 63 |     REPO_TYPE_DATASET: "datasets/",
 64 |     REPO_TYPE_SPACE: "spaces/",
 65 | }
 66 | REPO_TYPES_MAPPING = {
 67 |     "datasets": REPO_TYPE_DATASET,
 68 |     "spaces": REPO_TYPE_SPACE,
 69 |     "models": REPO_TYPE_MODEL,
 70 | }
 71 | 
 72 | 
 73 | # default cache
 74 | default_home = os.path.join(os.path.expanduser("~"), ".cache")
 75 | hf_cache_home = os.path.expanduser(
 76 |     os.getenv(
 77 |         "HF_HOME",
 78 |         os.path.join(os.getenv("XDG_CACHE_HOME", default_home), "huggingface"),
 79 |     )
 80 | )
 81 | 
 82 | default_cache_path = os.path.join(hf_cache_home, "hub")
 83 | default_assets_cache_path = os.path.join(hf_cache_home, "assets")
 84 | 
 85 | HUGGINGFACE_HUB_CACHE = os.getenv("HUGGINGFACE_HUB_CACHE", default_cache_path)
 86 | HUGGINGFACE_ASSETS_CACHE = os.getenv(
 87 |     "HUGGINGFACE_ASSETS_CACHE", default_assets_cache_path
 88 | )
 89 | 
 90 | HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE"))
 91 | 
 92 | 
 93 | # Here, `True` will disable progress bars globally without possibility of enabling it
 94 | # programmatically. `False` will enable them without possibility of disabling them.
 95 | # If environment variable is not set (None), then the user is free to enable/disable
 96 | # them programmatically.
 97 | # TL;DR: env variable has priority over code
 98 | __HF_HUB_DISABLE_PROGRESS_BARS = os.environ.get("HF_HUB_DISABLE_PROGRESS_BARS")
 99 | HF_HUB_DISABLE_PROGRESS_BARS: Optional[bool] = (
100 |     _is_true(__HF_HUB_DISABLE_PROGRESS_BARS)
101 |     if __HF_HUB_DISABLE_PROGRESS_BARS is not None
102 |     else None
103 | )
104 | 
105 | # Disable warning on machines that do not support symlinks (e.g. Windows non-developer)
106 | HF_HUB_DISABLE_SYMLINKS_WARNING: bool = _is_true(
107 |     os.environ.get("HF_HUB_DISABLE_SYMLINKS_WARNING")
108 | )
109 | 
110 | # Disable sending the cached token by default is all HTTP requests to the Hub
111 | HF_HUB_DISABLE_IMPLICIT_TOKEN: bool = _is_true(
112 |     os.environ.get("HF_HUB_DISABLE_IMPLICIT_TOKEN")
113 | )
114 | 


--------------------------------------------------------------------------------
/docs/source/how-to-inference.mdx:
--------------------------------------------------------------------------------
 1 | # Access the Inference API
 2 | 
 3 | The Inference API provides fast inference for your hosted models. The Inference API can be accessed via usual HTTP requests with your favorite programming language, but the `huggingface_hub` library has a client wrapper to access the Inference API programmatically. This guide will show you how to make calls to the Inference API with the `huggingface_hub` library.
 4 | 
 5 | <Tip>
 6 | 
 7 | If you want to make the HTTP calls directly, please refer to [Accelerated Inference API Documentation](https://api-inference.huggingface.co/docs/python/html/index.html) or to the sample snippets visible on every supported model page.
 8 | 
 9 | </Tip>
10 | 
11 | <div class="flex justify-center">
12 | <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/inference_api_snippet.png"/>
13 | <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/inference_api_snippet-dark.png"/>
14 | </div>
15 | 
16 | Begin by creating an instance of the [`InferenceApi`] with the model repository ID of the model you want to use. You can find your `API_TOKEN` under Settings from your Hugging Face account. The `API_TOKEN` will allow you to send requests to the Inference API.
17 | 
18 | ```python
19 | >>> from huggingface_hub.inference_api import InferenceApi
20 | >>> inference = InferenceApi(repo_id="bert-base-uncased", token=API_TOKEN)
21 | ```
22 | 
23 | The metadata in the model card and configuration files (see [here](https://huggingface.co/docs/hub/models-widgets#enabling-a-widget) for more details) determines the pipeline type. For example, when using the [bert-base-uncased](https://huggingface.co/bert-base-uncased) model, the Inference API can automatically infer that this model should be used for a `fill-mask` task.
24 | 
25 | ```python
26 | >>> from huggingface_hub.inference_api import InferenceApi
27 | >>> inference = InferenceApi(repo_id="bert-base-uncased", token=API_TOKEN)
28 | >>> inference(inputs="The goal of life is [MASK].")
29 | [{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}]
30 | ```
31 | 
32 | Each task requires a different type of input. A `question-answering` task expects a dictionary with the `question` and `context` keys as the input:
33 | 
34 | ```python
35 | >>> inference = InferenceApi(repo_id="deepset/roberta-base-squad2", token=API_TOKEN)
36 | >>> inputs = {"question":"Where is Hugging Face headquarters?", "context":"Hugging Face is based in Brooklyn, New York. There is also an office in Paris, France."}
37 | >>> inference(inputs)
38 | {'score': 0.94622403383255, 'start': 25, 'end': 43, 'answer': 'Brooklyn, New York'}
39 | ```
40 | 
41 | Some tasks may require additional parameters (see [here](https://api-inference.huggingface.co/docs/python/html/detailed_parameters.html) for a detailed list of all parameters for each task). As an example, for `zero-shot-classification` tasks, the model needs candidate labels that can be supplied to `params`:
42 | 
43 | ```python
44 | >>> inference = InferenceApi(repo_id="typeform/distilbert-base-uncased-mnli", token=API_TOKEN)
45 | >>> inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"
46 | >>> params = {"candidate_labels":["refund", "legal", "faq"]}
47 | >>> inference(inputs, params)
48 | {'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]}
49 | ```
50 | 
51 | Some models may support multiple tasks. The `sentence-transformers` models can complete both `sentence-similarity` and `feature-extraction` tasks. Specify which task you want to perform with the `task` parameter:
52 | 
53 | ```python
54 | >>> inference = InferenceApi(repo_id="paraphrase-xlm-r-multilingual-v1", 
55 | ...                          task="feature-extraction", 
56 | ...                          token=API_TOKEN,
57 | ... )
58 | ```


--------------------------------------------------------------------------------
/tests/test_fastai_integration.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | from unittest import TestCase, skip
  4 | 
  5 | from huggingface_hub import HfApi
  6 | from huggingface_hub.fastai_utils import (
  7 |     _save_pretrained_fastai,
  8 |     from_pretrained_fastai,
  9 |     push_to_hub_fastai,
 10 | )
 11 | from huggingface_hub.utils import (
 12 |     is_fastai_available,
 13 |     is_fastcore_available,
 14 |     is_torch_available,
 15 | )
 16 | 
 17 | from .testing_constants import ENDPOINT_STAGING, TOKEN, USER
 18 | from .testing_utils import expect_deprecation, repo_name, set_write_permission_and_retry
 19 | 
 20 | 
 21 | WORKING_REPO_SUBDIR = f"fixtures/working_repo_{__name__.split('.')[-1]}"
 22 | WORKING_REPO_DIR = os.path.join(
 23 |     os.path.dirname(os.path.abspath(__file__)), WORKING_REPO_SUBDIR
 24 | )
 25 | 
 26 | if is_fastai_available():
 27 |     from fastai.data.block import DataBlock
 28 |     from fastai.test_utils import synth_learner
 29 | 
 30 | if is_torch_available():
 31 |     import torch
 32 | 
 33 | 
 34 | def require_fastai_fastcore(test_case):
 35 |     """
 36 |     Decorator marking a test that requires fastai and fastcore.
 37 |     These tests are skipped when fastai and fastcore are not installed.
 38 |     """
 39 |     if not is_fastai_available():
 40 |         return skip("Test requires fastai")(test_case)
 41 |     elif not is_fastcore_available():
 42 |         return skip("Test requires fastcore")(test_case)
 43 |     else:
 44 |         return test_case
 45 | 
 46 | 
 47 | def fake_dataloaders(a=2, b=3, bs=16, n=10):
 48 |     def get_data(n):
 49 |         x = torch.randn(bs * n, 1)
 50 |         return torch.cat((x, a * x + b + 0.1 * torch.randn(bs * n, 1)), 1)
 51 | 
 52 |     ds = get_data(n)
 53 |     dblock = DataBlock()
 54 |     return dblock.dataloaders(ds)
 55 | 
 56 | 
 57 | if is_fastai_available():
 58 |     dummy_model = synth_learner(data=fake_dataloaders())
 59 |     dummy_config = dict(test="test_0")
 60 | else:
 61 |     dummy_model = None
 62 |     dummy_config = None
 63 | 
 64 | 
 65 | @require_fastai_fastcore
 66 | class TestFastaiUtils(TestCase):
 67 |     @classmethod
 68 |     @expect_deprecation("set_access_token")
 69 |     def setUpClass(cls):
 70 |         """
 71 |         Share this valid token in all tests below.
 72 |         """
 73 |         cls._api = HfApi(endpoint=ENDPOINT_STAGING, token=TOKEN)
 74 |         cls._token = TOKEN
 75 |         cls._api.set_access_token(TOKEN)
 76 | 
 77 |     def tearDown(self) -> None:
 78 |         try:
 79 |             shutil.rmtree(WORKING_REPO_DIR, onerror=set_write_permission_and_retry)
 80 |         except FileNotFoundError:
 81 |             pass
 82 | 
 83 |     def test_save_pretrained_without_config(self):
 84 |         REPO_NAME = repo_name("fastai-save")
 85 |         _save_pretrained_fastai(dummy_model, f"{WORKING_REPO_DIR}/{REPO_NAME}")
 86 |         files = os.listdir(f"{WORKING_REPO_DIR}/{REPO_NAME}")
 87 |         self.assertTrue("model.pkl" in files)
 88 |         self.assertTrue("pyproject.toml" in files)
 89 |         self.assertTrue("README.md" in files)
 90 |         self.assertEqual(len(files), 3)
 91 | 
 92 |     def test_save_pretrained_with_config(self):
 93 |         REPO_NAME = repo_name("fastai-save")
 94 |         _save_pretrained_fastai(
 95 |             dummy_model, f"{WORKING_REPO_DIR}/{REPO_NAME}", config=dummy_config
 96 |         )
 97 |         files = os.listdir(f"{WORKING_REPO_DIR}/{REPO_NAME}")
 98 |         self.assertTrue("config.json" in files)
 99 |         self.assertEqual(len(files), 4)
100 | 
101 |     def test_push_to_hub_and_from_pretrained_fastai(self):
102 |         REPO_NAME = repo_name("fastai-push_to_hub")
103 |         push_to_hub_fastai(
104 |             learner=dummy_model,
105 |             repo_id=f"{USER}/{REPO_NAME}",
106 |             token=self._token,
107 |             config=dummy_config,
108 |         )
109 |         model_info = self._api.model_info(f"{USER}/{REPO_NAME}")
110 |         self.assertEqual(model_info.modelId, f"{USER}/{REPO_NAME}")
111 | 
112 |         loaded_model = from_pretrained_fastai(f"{USER}/{REPO_NAME}")
113 |         self.assertEqual(
114 |             dummy_model.show_training_loop(), loaded_model.show_training_loop()
115 |         )
116 |         self._api.delete_repo(repo_id=f"{REPO_NAME}")
117 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/templates/datasetcard_template.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | {{ card_data }}
  3 | ---
  4 | 
  5 | # Dataset Card for {{ pretty_name | default("Dataset Name", true) }}
  6 | 
  7 | ## Table of Contents
  8 | - [Table of Contents](#table-of-contents)
  9 | - [Dataset Description](#dataset-description)
 10 |   - [Dataset Summary](#dataset-summary)
 11 |   - [Supported Tasks and Leaderboards](#supported-tasks-and-leaderboards)
 12 |   - [Languages](#languages)
 13 | - [Dataset Structure](#dataset-structure)
 14 |   - [Data Instances](#data-instances)
 15 |   - [Data Fields](#data-fields)
 16 |   - [Data Splits](#data-splits)
 17 | - [Dataset Creation](#dataset-creation)
 18 |   - [Curation Rationale](#curation-rationale)
 19 |   - [Source Data](#source-data)
 20 |   - [Annotations](#annotations)
 21 |   - [Personal and Sensitive Information](#personal-and-sensitive-information)
 22 | - [Considerations for Using the Data](#considerations-for-using-the-data)
 23 |   - [Social Impact of Dataset](#social-impact-of-dataset)
 24 |   - [Discussion of Biases](#discussion-of-biases)
 25 |   - [Other Known Limitations](#other-known-limitations)
 26 | - [Additional Information](#additional-information)
 27 |   - [Dataset Curators](#dataset-curators)
 28 |   - [Licensing Information](#licensing-information)
 29 |   - [Citation Information](#citation-information)
 30 |   - [Contributions](#contributions)
 31 | 
 32 | ## Dataset Description
 33 | 
 34 | - **Homepage:** {{ homepage_url | default("", true)}}
 35 | - **Repository:** {{ repo_url | default("", true)}}
 36 | - **Paper:** {{ paper_url | default("", true)}}
 37 | - **Leaderboard:** {{ leaderboard_url | default("", true)}}
 38 | - **Point of Contact:** {{ point_of_contact | default("", true)}}
 39 | 
 40 | ### Dataset Summary
 41 | 
 42 | {{ dataset_summary | default("[More Information Needed]", true)}}
 43 | 
 44 | ### Supported Tasks and Leaderboards
 45 | 
 46 | {{ supported_tasks_and_leaderboards_section | default("[More Information Needed]", true)}}
 47 | 
 48 | ### Languages
 49 | 
 50 | {{ languages_section | default("[More Information Needed]", true)}}
 51 | 
 52 | ## Dataset Structure
 53 | 
 54 | ### Data Instances
 55 | 
 56 | {{ data_instances_section | default("[More Information Needed]", true)}}
 57 | 
 58 | ### Data Fields
 59 | 
 60 | {{ data_fields_section | default("[More Information Needed]", true)}}
 61 | 
 62 | ### Data Splits
 63 | 
 64 | {{ data_splits_section | default("[More Information Needed]", true)}}
 65 | 
 66 | ## Dataset Creation
 67 | 
 68 | ### Curation Rationale
 69 | 
 70 | {{ curation_rationale_section | default("[More Information Needed]", true)}}
 71 | 
 72 | ### Source Data
 73 | 
 74 | #### Initial Data Collection and Normalization
 75 | 
 76 | {{ data_collection_section | default("[More Information Needed]", true)}}
 77 | 
 78 | #### Who are the source language producers?
 79 | 
 80 | {{ source_language_producers_section | default("[More Information Needed]", true)}}
 81 | 
 82 | ### Annotations
 83 | 
 84 | #### Annotation process
 85 | 
 86 | {{ annotation_process_section | default("[More Information Needed]", true)}}
 87 | 
 88 | #### Who are the annotators?
 89 | 
 90 | {{ who_are_annotators_section | default("[More Information Needed]", true)}}
 91 | 
 92 | ### Personal and Sensitive Information
 93 | 
 94 | {{ personal_and_sensitive_information_section | default("[More Information Needed]", true)}}
 95 | 
 96 | ## Considerations for Using the Data
 97 | 
 98 | ### Social Impact of Dataset
 99 | 
100 | {{ social_impact_section | default("[More Information Needed]", true)}}
101 | 
102 | ### Discussion of Biases
103 | 
104 | {{ discussion_of_biases_section | default("[More Information Needed]", true)}}
105 | 
106 | ### Other Known Limitations
107 | 
108 | {{ known_limitations_section | default("[More Information Needed]", true)}}
109 | 
110 | ## Additional Information
111 | 
112 | ### Dataset Curators
113 | 
114 | {{ dataset_curators_section | default("[More Information Needed]", true)}}
115 | 
116 | ### Licensing Information
117 | 
118 | {{ licensing_information_section | default("[More Information Needed]", true)}}
119 | 
120 | ### Citation Information
121 | 
122 | {{ citation_information_section | default("[More Information Needed]", true)}}
123 | 
124 | ### Contributions
125 | 
126 | Thanks to [@github-username](https://github.com/<github-username>) for adding this dataset.


--------------------------------------------------------------------------------
/utils/check_contrib_list.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2022-present, the HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Contains a tool to list contrib test suites automatically."""
 16 | import argparse
 17 | import re
 18 | from pathlib import Path
 19 | from typing import NoReturn
 20 | 
 21 | 
 22 | ROOT_DIR = Path(__file__).parent.parent
 23 | CONTRIB_PATH = ROOT_DIR / "contrib"
 24 | MAKEFILE_PATH = ROOT_DIR / "Makefile"
 25 | WORKFLOW_PATH = ROOT_DIR / ".github" / "workflows" / "contrib-tests.yml"
 26 | 
 27 | MAKEFILE_REGEX = re.compile(r"^CONTRIB_LIBS := .*$", flags=re.MULTILINE)
 28 | WORKFLOW_REGEX = re.compile(
 29 |     r"""
 30 |     # First: match "contrib: ["
 31 |     (?P<before>^\s{8}contrib:\s\[\n)
 32 |     # Match list of libs
 33 |     (\s{10}\".*\",\n)*
 34 |     # Finally: match trailing "]"
 35 |     (?P<after>^\s{8}\])
 36 |     """,
 37 |     flags=re.MULTILINE | re.VERBOSE,
 38 | )
 39 | 
 40 | 
 41 | def check_contrib_list(update: bool) -> NoReturn:
 42 |     """List `contrib` test suites.
 43 | 
 44 |     Make sure `Makefile` and `.github/workflows/contrib-tests.yml` are consistent with
 45 |     the list."""
 46 |     # List contrib test suites
 47 |     contrib_list = sorted(
 48 |         path.name
 49 |         for path in CONTRIB_PATH.glob("*")
 50 |         if path.is_dir() and not path.name.startswith("_")
 51 |     )
 52 | 
 53 |     # Check Makefile is consistent with list
 54 |     makefile_content = MAKEFILE_PATH.read_text()
 55 |     makefile_expected_content = MAKEFILE_REGEX.sub(
 56 |         f"CONTRIB_LIBS := {' '.join(contrib_list)}", makefile_content
 57 |     )
 58 | 
 59 |     # Check workflow is consistent with list
 60 |     workflow_content = WORKFLOW_PATH.read_text()
 61 |     _substitute = "\n".join(f'{" "*10}"{lib}",' for lib in contrib_list)
 62 |     workflow_content_expected = WORKFLOW_REGEX.sub(
 63 |         rf"\g<before>{_substitute}\n\g<after>", workflow_content
 64 |     )
 65 | 
 66 |     #
 67 |     failed = False
 68 |     if makefile_content != makefile_expected_content:
 69 |         if update:
 70 |             print(
 71 |                 "✅ Contrib libs have been updated in `Makefile`."
 72 |                 "\n   Please make sure the changes are accurate and commit them."
 73 |             )
 74 |             MAKEFILE_PATH.write_text(makefile_expected_content)
 75 |         else:
 76 |             print(
 77 |                 "❌ Expected content mismatch in `Makefile`.\n   It is most likely that"
 78 |                 " you added a contrib test and did not update the Makefile.\n   Please"
 79 |                 " run `make style` or `python utils/check_contrib_list.py --update`."
 80 |             )
 81 |             failed = True
 82 | 
 83 |     if workflow_content != workflow_content_expected:
 84 |         if update:
 85 |             print(
 86 |                 f"✅ Contrib libs have been updated in `{WORKFLOW_PATH}`."
 87 |                 "\n   Please make sure the changes are accurate and commit them."
 88 |             )
 89 |             WORKFLOW_PATH.write_text(workflow_content_expected)
 90 |         else:
 91 |             print(
 92 |                 f"❌ Expected content mismatch in `{WORKFLOW_PATH}`.\n   It is most"
 93 |                 " likely that you added a contrib test and did not update the github"
 94 |                 " workflow file.\n   Please run `make style` or `python"
 95 |                 " utils/check_contrib_list.py --update`."
 96 |             )
 97 |             failed = True
 98 | 
 99 |     if failed:
100 |         exit(1)
101 |     print("✅ All good! (contrib list)")
102 |     exit(0)
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     parser = argparse.ArgumentParser()
107 |     parser.add_argument(
108 |         "--update",
109 |         action="store_true",
110 |         help="Whether to fix Makefile and github workflow if a new lib is detected.",
111 |     )
112 |     args = parser.parse_args()
113 | 
114 |     check_contrib_list(update=args.update)
115 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/utils/tqdm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf-8
  3 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License
 16 | """Utility helpers to handle progress bars in `huggingface_hub`.
 17 | 
 18 | Example:
 19 |     1. Use `huggingface_hub.utils.tqdm` as you would use `tqdm.tqdm` or `tqdm.auto.tqdm`.
 20 |     2. To disable progress bars, either use `disable_progress_bars()` helper or set the
 21 |        environment variable `HF_HUB_DISABLE_PROGRESS_BARS` to 1.
 22 |     3. To re-enable progress bars, use `enable_progress_bars()`.
 23 |     4. To check whether progress bars are disabled, use `are_progress_bars_disabled()`.
 24 | 
 25 | NOTE: Environment variable `HF_HUB_DISABLE_PROGRESS_BARS` has the priority.
 26 | 
 27 | Example:
 28 |     ```py
 29 |     from huggingface_hub.utils import (
 30 |         are_progress_bars_disabled,
 31 |         disable_progress_bars,
 32 |         enable_progress_bars,
 33 |         tqdm,
 34 |     )
 35 | 
 36 |     # Disable progress bars globally
 37 |     disable_progress_bars()
 38 | 
 39 |     # Use as normal `tqdm`
 40 |     for _ in tqdm(range(5)):
 41 |        do_something()
 42 | 
 43 |     # Still not showing progress bars, as `disable=False` is overwritten to `True`.
 44 |     for _ in tqdm(range(5), disable=False):
 45 |        do_something()
 46 | 
 47 |     are_progress_bars_disabled() # True
 48 | 
 49 |     # Re-enable progress bars globally
 50 |     enable_progress_bars()
 51 | 
 52 |     # Progress bar will be shown !
 53 |     for _ in tqdm(range(5)):
 54 |        do_something()
 55 |     ```
 56 | """
 57 | import warnings
 58 | 
 59 | from tqdm.auto import tqdm as old_tqdm
 60 | 
 61 | from ..constants import HF_HUB_DISABLE_PROGRESS_BARS
 62 | 
 63 | 
 64 | # `HF_HUB_DISABLE_PROGRESS_BARS` is `Optional[bool]` while `_hf_hub_progress_bars_disabled`
 65 | # is a `bool`. If `HF_HUB_DISABLE_PROGRESS_BARS` is set to True or False, it has priority.
 66 | # If `HF_HUB_DISABLE_PROGRESS_BARS` is None, it means the user have not set the
 67 | # environment variable and is free to enable/disable progress bars programmatically.
 68 | # TL;DR: env variable has priority over code.
 69 | #
 70 | # By default, progress bars are enabled.
 71 | _hf_hub_progress_bars_disabled: bool = HF_HUB_DISABLE_PROGRESS_BARS or False
 72 | 
 73 | 
 74 | def disable_progress_bars() -> None:
 75 |     """
 76 |     Disable globally progress bars used in `huggingface_hub` except if
 77 |     `HF_HUB_DISABLE_PROGRESS_BARS` environment variable has been set.
 78 |     """
 79 |     if HF_HUB_DISABLE_PROGRESS_BARS is False:
 80 |         warnings.warn(
 81 |             "Cannot disable progress bars: environment variable"
 82 |             " `HF_HUB_DISABLE_PROGRESS_BARS=0` is set and has priority."
 83 |         )
 84 |         return
 85 |     global _hf_hub_progress_bars_disabled
 86 |     _hf_hub_progress_bars_disabled = True
 87 | 
 88 | 
 89 | def enable_progress_bars() -> None:
 90 |     """
 91 |     Enable globally progress bars used in `huggingface_hub` except if
 92 |     `HF_HUB_DISABLE_PROGRESS_BARS` environment variable has been set.
 93 |     """
 94 |     if HF_HUB_DISABLE_PROGRESS_BARS is True:
 95 |         warnings.warn(
 96 |             "Cannot enable progress bars: environment variable"
 97 |             " `HF_HUB_DISABLE_PROGRESS_BARS=1` is set and has priority."
 98 |         )
 99 |         return
100 |     global _hf_hub_progress_bars_disabled
101 |     _hf_hub_progress_bars_disabled = False
102 | 
103 | 
104 | def are_progress_bars_disabled() -> bool:
105 |     """Return whether progress bars are globally disabled or not."""
106 |     global _hf_hub_progress_bars_disabled
107 |     return _hf_hub_progress_bars_disabled
108 | 
109 | 
110 | class tqdm(old_tqdm):
111 |     """
112 |     Class to override `disable` argument in case progress bars are globally disabled.
113 | 
114 |     Taken from https://github.com/tqdm/tqdm/issues/619#issuecomment-619639324.
115 |     """
116 | 
117 |     def __init__(self, *args, **kwargs):
118 |         if are_progress_bars_disabled():
119 |             kwargs["disable"] = True
120 |         super().__init__(*args, **kwargs)
121 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/utils/_subprocess.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf-8
  3 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License
 16 | """Contains utilities to easily handle subprocesses in `huggingface_hub`."""
 17 | import os
 18 | import subprocess
 19 | from contextlib import contextmanager
 20 | from pathlib import Path
 21 | from typing import IO, Generator, List, Optional, Tuple, Union
 22 | 
 23 | from .logging import get_logger
 24 | 
 25 | 
 26 | logger = get_logger(__name__)
 27 | 
 28 | 
 29 | def run_subprocess(
 30 |     command: Union[str, List[str]],
 31 |     folder: Optional[Union[str, Path]] = None,
 32 |     check=True,
 33 |     **kwargs,
 34 | ) -> subprocess.CompletedProcess:
 35 |     """
 36 |     Method to run subprocesses. Calling this will capture the `stderr` and `stdout`,
 37 |     please call `subprocess.run` manually in case you would like for them not to
 38 |     be captured.
 39 | 
 40 |     Args:
 41 |         command (`str` or `List[str]`):
 42 |             The command to execute as a string or list of strings.
 43 |         folder (`str`, *optional*):
 44 |             The folder in which to run the command. Defaults to current working
 45 |             directory (from `os.getcwd()`).
 46 |         check (`bool`, *optional*, defaults to `True`):
 47 |             Setting `check` to `True` will raise a `subprocess.CalledProcessError`
 48 |             when the subprocess has a non-zero exit code.
 49 |         kwargs (`Dict[str]`):
 50 |             Keyword arguments to be passed to the `subprocess.run` underlying command.
 51 | 
 52 |     Returns:
 53 |         `subprocess.CompletedProcess`: The completed process.
 54 |     """
 55 |     if isinstance(command, str):
 56 |         command = command.split()
 57 | 
 58 |     if isinstance(folder, Path):
 59 |         folder = str(folder)
 60 | 
 61 |     return subprocess.run(
 62 |         command,
 63 |         stderr=subprocess.PIPE,
 64 |         stdout=subprocess.PIPE,
 65 |         check=check,
 66 |         encoding="utf-8",
 67 |         errors="replace",  # if not utf-8, replace char by �
 68 |         cwd=folder or os.getcwd(),
 69 |         **kwargs,
 70 |     )
 71 | 
 72 | 
 73 | @contextmanager
 74 | def run_interactive_subprocess(
 75 |     command: Union[str, List[str]],
 76 |     folder: Optional[Union[str, Path]] = None,
 77 |     **kwargs,
 78 | ) -> Generator[Tuple[IO[str], IO[str]], None, None]:
 79 |     """Run a subprocess in an interactive mode in a context manager.
 80 | 
 81 |     Args:
 82 |         command (`str` or `List[str]`):
 83 |             The command to execute as a string or list of strings.
 84 |         folder (`str`, *optional*):
 85 |             The folder in which to run the command. Defaults to current working
 86 |             directory (from `os.getcwd()`).
 87 |         kwargs (`Dict[str]`):
 88 |             Keyword arguments to be passed to the `subprocess.run` underlying command.
 89 | 
 90 |     Returns:
 91 |         `Tuple[IO[str], IO[str]]`: A tuple with `stdin` and `stdout` to interact
 92 |         with the process (input and output are utf-8 encoded).
 93 | 
 94 |     Example:
 95 |     ```python
 96 |     with _interactive_subprocess("git credential-store get") as (stdin, stdout):
 97 |         # Write to stdin
 98 |         stdin.write("url=hf.co\nusername=obama\n".encode("utf-8"))
 99 |         stdin.flush()
100 | 
101 |         # Read from stdout
102 |         output = stdout.read().decode("utf-8")
103 |     ```
104 |     """
105 |     if isinstance(command, str):
106 |         command = command.split()
107 | 
108 |     with subprocess.Popen(
109 |         command,
110 |         stdin=subprocess.PIPE,
111 |         stdout=subprocess.PIPE,
112 |         stderr=subprocess.STDOUT,
113 |         encoding="utf-8",
114 |         errors="replace",  # if not utf-8, replace char by �
115 |         cwd=folder or os.getcwd(),
116 |         **kwargs,
117 |     ) as process:
118 |         assert process.stdin is not None, "subprocess is opened as subprocess.PIPE"
119 |         assert process.stdout is not None, "subprocess is opened as subprocess.PIPE"
120 |         yield process.stdin, process.stdout
121 | 


--------------------------------------------------------------------------------
/tests/test_utils_tqdm.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from unittest.mock import patch
  3 | 
  4 | import pytest
  5 | from pytest import CaptureFixture
  6 | 
  7 | from huggingface_hub.utils import (
  8 |     are_progress_bars_disabled,
  9 |     disable_progress_bars,
 10 |     enable_progress_bars,
 11 |     tqdm,
 12 | )
 13 | 
 14 | 
 15 | class TestTqdmUtils(unittest.TestCase):
 16 |     @pytest.fixture(autouse=True)
 17 |     def capsys(self, capsys: CaptureFixture) -> None:
 18 |         """Workaround to make capsys work in unittest framework.
 19 | 
 20 |         Capsys is a convenient pytest fixture to capture stdout.
 21 |         See https://waylonwalker.com/pytest-capsys/.
 22 | 
 23 |         Taken from https://github.com/pytest-dev/pytest/issues/2504#issuecomment-309475790.
 24 |         """
 25 |         self.capsys = capsys
 26 | 
 27 |     def setUp(self) -> None:
 28 |         """Get verbosity to set it back after the tests."""
 29 |         self._previous_are_progress_bars_disabled = are_progress_bars_disabled()
 30 |         return super().setUp()
 31 | 
 32 |     def tearDown(self) -> None:
 33 |         """Set back progress bars verbosity as before testing."""
 34 |         if self._previous_are_progress_bars_disabled:
 35 |             disable_progress_bars()
 36 |         else:
 37 |             enable_progress_bars()
 38 | 
 39 |     @patch("huggingface_hub.utils._tqdm.HF_HUB_DISABLE_PROGRESS_BARS", None)
 40 |     def test_tqdm_helpers(self) -> None:
 41 |         """Test helpers to enable/disable progress bars."""
 42 |         disable_progress_bars()
 43 |         self.assertTrue(are_progress_bars_disabled())
 44 | 
 45 |         enable_progress_bars()
 46 |         self.assertFalse(are_progress_bars_disabled())
 47 | 
 48 |     @patch("huggingface_hub.utils._tqdm.HF_HUB_DISABLE_PROGRESS_BARS", True)
 49 |     def test_cannot_enable_tqdm_when_env_variable_is_set(self) -> None:
 50 |         """
 51 |         Test helpers cannot enable/disable progress bars when
 52 |         `HF_HUB_DISABLE_PROGRESS_BARS` is set.
 53 |         """
 54 |         disable_progress_bars()
 55 |         self.assertTrue(are_progress_bars_disabled())
 56 | 
 57 |         with self.assertWarns(UserWarning):
 58 |             enable_progress_bars()
 59 |         self.assertTrue(are_progress_bars_disabled())  # Still disabled !
 60 | 
 61 |     @patch("huggingface_hub.utils._tqdm.HF_HUB_DISABLE_PROGRESS_BARS", False)
 62 |     def test_cannot_disable_tqdm_when_env_variable_is_set(self) -> None:
 63 |         """
 64 |         Test helpers cannot enable/disable progress bars when
 65 |         `HF_HUB_DISABLE_PROGRESS_BARS` is set.
 66 |         """
 67 |         enable_progress_bars()
 68 |         self.assertFalse(are_progress_bars_disabled())
 69 | 
 70 |         with self.assertWarns(UserWarning):
 71 |             disable_progress_bars()
 72 |         self.assertFalse(are_progress_bars_disabled())  # Still enabled !
 73 | 
 74 |     @patch("huggingface_hub.utils._tqdm.HF_HUB_DISABLE_PROGRESS_BARS", None)
 75 |     def test_tqdm_disabled(self) -> None:
 76 |         """Test TQDM not outputing anything when globally disabled."""
 77 |         disable_progress_bars()
 78 |         for _ in tqdm(range(10)):
 79 |             pass
 80 | 
 81 |         captured = self.capsys.readouterr()
 82 |         self.assertEqual(captured.out, "")
 83 |         self.assertEqual(captured.err, "")
 84 | 
 85 |     @patch("huggingface_hub.utils._tqdm.HF_HUB_DISABLE_PROGRESS_BARS", None)
 86 |     def test_tqdm_disabled_cannot_be_forced(self) -> None:
 87 |         """Test TQDM cannot be forced when globally disabled."""
 88 |         disable_progress_bars()
 89 |         for _ in tqdm(range(10), disable=False):
 90 |             pass
 91 | 
 92 |         captured = self.capsys.readouterr()
 93 |         self.assertEqual(captured.out, "")
 94 |         self.assertEqual(captured.err, "")
 95 | 
 96 |     @patch("huggingface_hub.utils._tqdm.HF_HUB_DISABLE_PROGRESS_BARS", None)
 97 |     def test_tqdm_can_be_disabled_when_globally_enabled(self) -> None:
 98 |         """Test TQDM can still be locally disabled even when globally enabled."""
 99 |         enable_progress_bars()
100 |         for _ in tqdm(range(10), disable=True):
101 |             pass
102 | 
103 |         captured = self.capsys.readouterr()
104 |         self.assertEqual(captured.out, "")
105 |         self.assertEqual(captured.err, "")
106 | 
107 |     @patch("huggingface_hub.utils._tqdm.HF_HUB_DISABLE_PROGRESS_BARS", None)
108 |     def test_tqdm_enabled(self) -> None:
109 |         """Test TQDM work normally when globally enabled."""
110 |         enable_progress_bars()
111 |         for _ in tqdm(range(10)):
112 |             pass
113 | 
114 |         captured = self.capsys.readouterr()
115 |         self.assertEqual(captured.out, "")
116 |         self.assertIn("10/10", captured.err)  # tqdm log
117 | 


--------------------------------------------------------------------------------
/tests/test_inference_api.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import unittest
 15 | 
 16 | from huggingface_hub import hf_hub_download
 17 | from huggingface_hub.inference_api import InferenceApi
 18 | 
 19 | from .testing_utils import with_production_testing
 20 | 
 21 | 
 22 | class InferenceApiTest(unittest.TestCase):
 23 |     def read(self, filename: str) -> bytes:
 24 |         with open(filename, "rb") as f:
 25 |             bpayload = f.read()
 26 |         return bpayload
 27 | 
 28 |     @with_production_testing
 29 |     def test_simple_inference(self):
 30 |         api = InferenceApi("bert-base-uncased")
 31 |         inputs = "Hi, I think [MASK] is cool"
 32 |         results = api(inputs)
 33 |         self.assertIsInstance(results, list)
 34 | 
 35 |         result = results[0]
 36 |         self.assertIsInstance(result, dict)
 37 |         self.assertTrue("sequence" in result)
 38 |         self.assertTrue("score" in result)
 39 | 
 40 |     @with_production_testing
 41 |     def test_inference_with_params(self):
 42 |         api = InferenceApi("typeform/distilbert-base-uncased-mnli")
 43 |         inputs = (
 44 |             "I bought a device but it is not working and I would like to get"
 45 |             " reimbursed!"
 46 |         )
 47 |         params = {"candidate_labels": ["refund", "legal", "faq"]}
 48 |         result = api(inputs, params)
 49 |         self.assertIsInstance(result, dict)
 50 |         self.assertTrue("sequence" in result)
 51 |         self.assertTrue("scores" in result)
 52 | 
 53 |     @with_production_testing
 54 |     def test_inference_with_dict_inputs(self):
 55 |         api = InferenceApi("distilbert-base-cased-distilled-squad")
 56 |         inputs = {
 57 |             "question": "What's my name?",
 58 |             "context": "My name is Clara and I live in Berkeley.",
 59 |         }
 60 |         result = api(inputs)
 61 |         self.assertIsInstance(result, dict)
 62 |         self.assertTrue("score" in result)
 63 |         self.assertTrue("answer" in result)
 64 | 
 65 |     @with_production_testing
 66 |     def test_inference_with_audio(self):
 67 |         api = InferenceApi("facebook/wav2vec2-base-960h")
 68 |         file = hf_hub_download(
 69 |             repo_id="hf-internal-testing/dummy-flac-single-example",
 70 |             repo_type="dataset",
 71 |             filename="example.flac",
 72 |         )
 73 |         data = self.read(file)
 74 |         result = api(data=data)
 75 |         self.assertIsInstance(result, dict)
 76 |         self.assertTrue("text" in result, f"We received {result} instead")
 77 | 
 78 |     @with_production_testing
 79 |     def test_inference_with_image(self):
 80 |         api = InferenceApi("google/vit-base-patch16-224")
 81 |         file = hf_hub_download(
 82 |             repo_id="Narsil/image_dummy", repo_type="dataset", filename="lena.png"
 83 |         )
 84 |         data = self.read(file)
 85 |         result = api(data=data)
 86 |         self.assertIsInstance(result, list)
 87 |         for classification in result:
 88 |             self.assertIsInstance(classification, dict)
 89 |             self.assertTrue("score" in classification)
 90 |             self.assertTrue("label" in classification)
 91 | 
 92 |     @with_production_testing
 93 |     def test_inference_overriding_task(self):
 94 |         api = InferenceApi(
 95 |             "sentence-transformers/paraphrase-albert-small-v2",
 96 |             task="feature-extraction",
 97 |         )
 98 |         inputs = "This is an example again"
 99 |         result = api(inputs)
100 |         self.assertIsInstance(result, list)
101 | 
102 |     @with_production_testing
103 |     def test_inference_overriding_invalid_task(self):
104 |         with self.assertRaises(
105 |             ValueError, msg="Invalid task invalid-task. Make sure it's valid."
106 |         ):
107 |             InferenceApi("bert-base-uncased", task="invalid-task")
108 | 
109 |     @with_production_testing
110 |     def test_inference_missing_input(self):
111 |         api = InferenceApi("deepset/roberta-base-squad2")
112 |         result = api({"question": "What's my name?"})
113 |         self.assertIsInstance(result, dict)
114 |         self.assertTrue("error" in result)
115 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/utils/_paths.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2022-present, the HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Contains utilities to handle paths in Huggingface Hub."""
 16 | from fnmatch import fnmatch
 17 | from pathlib import Path
 18 | from typing import Callable, Generator, Iterable, List, Optional, TypeVar, Union
 19 | 
 20 | 
 21 | T = TypeVar("T")
 22 | 
 23 | 
 24 | def filter_repo_objects(
 25 |     items: Iterable[T],
 26 |     *,
 27 |     allow_patterns: Optional[Union[List[str], str]] = None,
 28 |     ignore_patterns: Optional[Union[List[str], str]] = None,
 29 |     key: Optional[Callable[[T], str]] = None,
 30 | ) -> Generator[T, None, None]:
 31 |     """Filter repo objects based on an allowlist and a denylist.
 32 | 
 33 |     Input must be a list of paths (`str` or `Path`) or a list of arbitrary objects.
 34 |     In the later case, `key` must be provided and specifies a function of one argument
 35 |     that is used to extract a path from each element in iterable.
 36 | 
 37 |     Patterns are Unix shell-style wildcards which are NOT regular expressions. See
 38 |     https://docs.python.org/3/library/fnmatch.html for more details.
 39 | 
 40 |     Args:
 41 |         items (`Iterable`):
 42 |             List of items to filter.
 43 |         allow_patterns (`str` or `List[str]`, *optional*):
 44 |             Patterns constituing the allowlist. If provided, item paths must match at
 45 |             least one pattern from the allowlist.
 46 |         ignore_patterns (`str` or `List[str]`, *optional*):
 47 |             Patterns constituing the denylist. If provided, item paths must not match
 48 |             any patterns from the denylist.
 49 |         key (`Callable[[T], str]`, *optional*):
 50 |             Single-argument function to extract a path from each item. If not provided,
 51 |             the `items` must already be `str` or `Path`.
 52 | 
 53 |     Returns:
 54 |         Filtered list of objects, as a generator.
 55 | 
 56 |     Raises:
 57 |         :class:`ValueError`:
 58 |             If `key` is not provided and items are not `str` or `Path`.
 59 | 
 60 |     Example usage with paths:
 61 |     ```python
 62 |     >>> # Filter only PDFs that are not hidden.
 63 |     >>> list(filter_repo_objects(
 64 |     ...     ["aaa.PDF", "bbb.jpg", ".ccc.pdf", ".ddd.png"],
 65 |     ...     allow_patterns=["*.pdf"],
 66 |     ...     ignore_patterns=[".*"],
 67 |     ... ))
 68 |     ["aaa.pdf"]
 69 |     ```
 70 | 
 71 |     Example usage with objects:
 72 |     ```python
 73 |     >>> list(filter_repo_objects(
 74 |     ... [
 75 |     ...     CommitOperationAdd(path_or_fileobj="/tmp/aaa.pdf", path_in_repo="aaa.pdf")
 76 |     ...     CommitOperationAdd(path_or_fileobj="/tmp/bbb.jpg", path_in_repo="bbb.jpg")
 77 |     ...     CommitOperationAdd(path_or_fileobj="/tmp/.ccc.pdf", path_in_repo=".ccc.pdf")
 78 |     ...     CommitOperationAdd(path_or_fileobj="/tmp/.ddd.png", path_in_repo=".ddd.png")
 79 |     ... ],
 80 |     ... allow_patterns=["*.pdf"],
 81 |     ... ignore_patterns=[".*"],
 82 |     ... key=lambda x: x.repo_in_path
 83 |     ... ))
 84 |     [CommitOperationAdd(path_or_fileobj="/tmp/aaa.pdf", path_in_repo="aaa.pdf")]
 85 |     ```
 86 |     """
 87 |     if isinstance(allow_patterns, str):
 88 |         allow_patterns = [allow_patterns]
 89 | 
 90 |     if isinstance(ignore_patterns, str):
 91 |         ignore_patterns = [ignore_patterns]
 92 | 
 93 |     if key is None:
 94 | 
 95 |         def _identity(item: T) -> str:
 96 |             if isinstance(item, str):
 97 |                 return item
 98 |             if isinstance(item, Path):
 99 |                 return str(item)
100 |             raise ValueError(
101 |                 f"Please provide `key` argument in `filter_repo_objects`: `{item}` is"
102 |                 " not a string."
103 |             )
104 | 
105 |         key = _identity  # Items must be `str` or `Path`, otherwise raise ValueError
106 | 
107 |     for item in items:
108 |         path = key(item)
109 | 
110 |         # Skip if there's an allowlist and path doesn't match any
111 |         if allow_patterns is not None and not any(
112 |             fnmatch(path, r) for r in allow_patterns
113 |         ):
114 |             continue
115 | 
116 |         # Skip if there's a denylist and path matches any
117 |         if ignore_patterns is not None and any(
118 |             fnmatch(path, r) for r in ignore_patterns
119 |         ):
120 |             continue
121 | 
122 |         yield item
123 | 


--------------------------------------------------------------------------------
/tests/test_utils_validators.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from pathlib import Path
  3 | from unittest.mock import Mock, patch
  4 | 
  5 | from huggingface_hub.utils import (
  6 |     HFValidationError,
  7 |     smoothly_deprecate_use_auth_token,
  8 |     validate_hf_hub_args,
  9 |     validate_repo_id,
 10 | )
 11 | 
 12 | 
 13 | @patch("huggingface_hub.utils._validators.validate_repo_id")
 14 | class TestHfHubValidator(unittest.TestCase):
 15 |     """Test `validate_hf_hub_args` decorator calls all default validators."""
 16 | 
 17 |     def test_validate_repo_id_as_arg(self, validate_repo_id_mock: Mock) -> None:
 18 |         """Test `validate_repo_id` is called when `repo_id` is passed as arg."""
 19 |         self.dummy_function(123)
 20 |         validate_repo_id_mock.assert_called_once_with(123)
 21 | 
 22 |     def test_validate_repo_id_as_kwarg(self, validate_repo_id_mock: Mock) -> None:
 23 |         """Test `validate_repo_id` is called when `repo_id` is passed as kwarg."""
 24 |         self.dummy_function(repo_id=123)
 25 |         validate_repo_id_mock.assert_called_once_with(123)
 26 | 
 27 |     @staticmethod
 28 |     @validate_hf_hub_args
 29 |     def dummy_function(repo_id: str) -> None:
 30 |         pass
 31 | 
 32 | 
 33 | class TestRepoIdValidator(unittest.TestCase):
 34 |     VALID_VALUES = (
 35 |         "123",
 36 |         "foo",
 37 |         "foo/bar",
 38 |         "Foo-BAR_foo.bar123",
 39 |     )
 40 |     NOT_VALID_VALUES = (
 41 |         Path("foo/bar"),  # Must be a string
 42 |         "a" * 100,  # Too long
 43 |         "datasets/foo/bar",  # Repo_type forbidden in repo_id
 44 |         ".repo_id",  # Cannot start with .
 45 |         "repo_id.",  # Cannot end with .
 46 |         "foo--bar",  # Cannot contain "--"
 47 |         "foo..bar",  # Cannot contain "."
 48 |         "foo.git",  # Cannot end with ".git"
 49 |     )
 50 | 
 51 |     def test_valid_repo_ids(self) -> None:
 52 |         """Test `repo_id` validation on valid values."""
 53 |         for repo_id in self.VALID_VALUES:
 54 |             validate_repo_id(repo_id)
 55 | 
 56 |     def test_not_valid_repo_ids(self) -> None:
 57 |         """Test `repo_id` validation on not valid values."""
 58 |         for repo_id in self.NOT_VALID_VALUES:
 59 |             with self.assertRaises(
 60 |                 HFValidationError, msg=f"'{repo_id}' must not be valid"
 61 |             ):
 62 |                 validate_repo_id(repo_id)
 63 | 
 64 | 
 65 | class TestSmoothlyDeprecateUseAuthToken(unittest.TestCase):
 66 |     def test_token_normal_usage_as_arg(self) -> None:
 67 |         self.assertEqual(
 68 |             self.dummy_token_function("this_is_a_token"),
 69 |             ("this_is_a_token", {}),
 70 |         )
 71 | 
 72 |     def test_token_normal_usage_as_kwarg(self) -> None:
 73 |         self.assertEqual(
 74 |             self.dummy_token_function(token="this_is_a_token"),
 75 |             ("this_is_a_token", {}),
 76 |         )
 77 | 
 78 |     def test_token_normal_usage_with_more_kwargs(self) -> None:
 79 |         self.assertEqual(
 80 |             self.dummy_token_function(token="this_is_a_token", foo="bar"),
 81 |             ("this_is_a_token", {"foo": "bar"}),
 82 |         )
 83 | 
 84 |     def test_token_with_smoothly_deprecated_use_auth_token(self) -> None:
 85 |         self.assertEqual(
 86 |             self.dummy_token_function(use_auth_token="this_is_a_use_auth_token"),
 87 |             ("this_is_a_use_auth_token", {}),
 88 |         )
 89 | 
 90 |     def test_input_kwargs_not_mutated_by_smooth_deprecation(self) -> None:
 91 |         initial_kwargs = {"a": "b", "use_auth_token": "token"}
 92 |         kwargs = smoothly_deprecate_use_auth_token(
 93 |             fn_name="name", has_token=False, kwargs=initial_kwargs
 94 |         )
 95 |         self.assertEqual(kwargs, {"a": "b", "token": "token"})
 96 |         self.assertEqual(  # not mutated!
 97 |             initial_kwargs, {"a": "b", "use_auth_token": "token"}
 98 |         )
 99 | 
100 |     def test_with_both_token_and_use_auth_token(self) -> None:
101 |         with self.assertWarns(UserWarning):
102 |             # `use_auth_token` is ignored !
103 |             self.assertEqual(
104 |                 self.dummy_token_function(
105 |                     token="this_is_a_token", use_auth_token="this_is_a_use_auth_token"
106 |                 ),
107 |                 ("this_is_a_token", {}),
108 |             )
109 | 
110 |     def test_not_deprecated_use_auth_token(self) -> None:
111 |         # `use_auth_token` is accepted by `dummy_use_auth_token_function`
112 |         # => `smoothly_deprecate_use_auth_token` is not called
113 |         self.assertEqual(
114 |             self.dummy_use_auth_token_function(
115 |                 use_auth_token="this_is_a_use_auth_token"
116 |             ),
117 |             ("this_is_a_use_auth_token", {}),
118 |         )
119 | 
120 |     @staticmethod
121 |     @validate_hf_hub_args
122 |     def dummy_token_function(token: str, **kwargs) -> None:
123 |         return token, kwargs
124 | 
125 |     @staticmethod
126 |     @validate_hf_hub_args
127 |     def dummy_use_auth_token_function(use_auth_token: str, **kwargs) -> None:
128 |         return use_auth_token, kwargs
129 | 


--------------------------------------------------------------------------------
/utils/check_static_imports.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2022-present, the HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Contains a tool to reformat static imports in `huggingface_hub.__init__.py`."""
 16 | import argparse
 17 | import re
 18 | from pathlib import Path
 19 | from typing import NoReturn
 20 | 
 21 | import isort
 22 | from huggingface_hub import _SUBMOD_ATTRS
 23 | 
 24 | 
 25 | INIT_FILE_PATH = Path(__file__).parents[1] / "src" / "huggingface_hub" / "__init__.py"
 26 | SETUP_CFG_PATH = Path(__file__).parents[1] / "setup.cfg"
 27 | 
 28 | IF_TYPE_CHECKING_LINE = "\nif TYPE_CHECKING:  # pragma: no cover\n"
 29 | SUBMOD_ATTRS_PATTERN = re.compile("_SUBMOD_ATTRS = {[^}]+}")  # match the all dict
 30 | 
 31 | 
 32 | def check_static_imports(update: bool) -> NoReturn:
 33 |     """Check all imports are made twice (1 in lazy-loading and 1 in static checks).
 34 | 
 35 |     For more explanations, see `./src/huggingface_hub/__init__.py`.
 36 |     This script is used in the `make style` and `make quality` checks.
 37 |     """
 38 |     with INIT_FILE_PATH.open() as f:
 39 |         init_content = f.read()
 40 | 
 41 |     # Get first half of the `__init__.py` file.
 42 |     # WARNING: Content after this part will be entirely re-generated which means
 43 |     # human-edited changes will be lost !
 44 |     init_content_before_static_checks = init_content.split(IF_TYPE_CHECKING_LINE)[0]
 45 | 
 46 |     # Search and replace `_SUBMOD_ATTRS` dictionary definition. This ensures modules
 47 |     # and functions that can be lazy-loaded are alphabetically ordered for readability.
 48 |     if SUBMOD_ATTRS_PATTERN.search(init_content_before_static_checks) is None:
 49 |         print(
 50 |             "Error: _SUBMOD_ATTRS dictionary definition not found in"
 51 |             " `./src/huggingface_hub/__init__.py`."
 52 |         )
 53 |         exit(1)
 54 | 
 55 |     _submod_attrs_definition = (
 56 |         "_SUBMOD_ATTRS = {\n"
 57 |         + "\n".join(
 58 |             f'    "{module}": [\n'
 59 |             + "\n".join(f'        "{attr}",' for attr in sorted(_SUBMOD_ATTRS[module]))
 60 |             + "\n    ],"
 61 |             for module in sorted(_SUBMOD_ATTRS.keys())
 62 |         )
 63 |         + "\n}"
 64 |     )
 65 |     reordered_content_before_static_checks = SUBMOD_ATTRS_PATTERN.sub(
 66 |         _submod_attrs_definition, init_content_before_static_checks
 67 |     )
 68 | 
 69 |     # Generate the static imports given the `_SUBMOD_ATTRS` dictionary.
 70 |     static_imports = [
 71 |         f"    from .{module} import {attr} # noqa: F401"
 72 |         for module, attributes in _SUBMOD_ATTRS.items()
 73 |         for attr in attributes
 74 |     ]
 75 | 
 76 |     # Generate the expected `__init__.py` file content and apply formatter on it.
 77 |     expected_init_content = isort.code(
 78 |         reordered_content_before_static_checks
 79 |         + IF_TYPE_CHECKING_LINE
 80 |         + "\n".join(static_imports)
 81 |         + "\n",
 82 |         config=isort.Config(settings_path=SETUP_CFG_PATH),
 83 |     )
 84 | 
 85 |     # If expected `__init__.py` content is different, test fails. If '--update-init-file'
 86 |     # is used, `__init__.py` file is updated before the test fails.
 87 |     if init_content != expected_init_content:
 88 |         if update:
 89 |             with INIT_FILE_PATH.open("w") as f:
 90 |                 f.write(expected_init_content)
 91 | 
 92 |             print(
 93 |                 "✅ Imports have been updated in `./src/huggingface_hub/__init__.py`."
 94 |                 "\n   Please make sure the changes are accurate and commit them."
 95 |             )
 96 |             exit(0)
 97 |         else:
 98 |             print(
 99 |                 "❌ Expected content mismatch in"
100 |                 " `./src/huggingface_hub/__init__.py`.\n   It is most likely that you"
101 |                 " added a module/function to `_SUBMOD_ATTRS` and did not update the"
102 |                 " 'static import'-part.\n   Please run `make style` or `python"
103 |                 " utils/check_static_imports.py --update`."
104 |             )
105 |             exit(1)
106 | 
107 |     print("✅ All good! (static imports)")
108 |     exit(0)
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     parser = argparse.ArgumentParser()
113 |     parser.add_argument(
114 |         "--update",
115 |         action="store_true",
116 |         help=(
117 |             "Whether to fix `./src/huggingface_hub/__init__.py` if a change is"
118 |             " detected."
119 |         ),
120 |     )
121 |     args = parser.parse_args()
122 | 
123 |     check_static_imports(update=args.update)
124 | 


--------------------------------------------------------------------------------
/docs/source/how-to-downstream.mdx:
--------------------------------------------------------------------------------
  1 | # Download files from the Hub
  2 | 
  3 | The `huggingface_hub` library provides functions to download files from the repositories
  4 | stored on the Hub. You can use these functions independently or integrate them into your
  5 | own library, making it more convenient for your users to interact with the Hub. This
  6 | guide will show you how to:
  7 | 
  8 | * Download and store a file from the Hub.
  9 | * Download all the files in a repository.
 10 | 
 11 | ## Download and store a file from the Hub
 12 | 
 13 | The [`hf_hub_download`] function is the main function for downloading files from the Hub.
 14 | 
 15 | It downloads the remote file, stores it on disk (in a version-aware way), and returns its local file path.
 16 | 
 17 | Use the `repo_id` and `filename` parameters to specify which file to download:
 18 | 
 19 | ```python
 20 | >>> from huggingface_hub import hf_hub_download
 21 | >>> hf_hub_download(repo_id="lysandre/arxiv-nlp", filename="config.json")
 22 | '/root/.cache/huggingface/hub/models--lysandre--arxiv-nlp/snapshots/894a9adde21d9a3e3843e6d5aeaaf01875c7fade/config.json'
 23 | ```
 24 | 
 25 | <div class="flex justify-center">
 26 | <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/repo.png"/>
 27 | <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/repo-dark.png"/>
 28 | </div>
 29 | 
 30 | Specify a particular file version by providing the file revision, which can be the
 31 | branch name, a tag, or a commit hash. When using the commit hash, it must be the
 32 | full-length hash instead of a 7-character commit hash:
 33 | 
 34 | ```python
 35 | >>> hf_hub_download(
 36 | ...    repo_id="lysandre/arxiv-nlp",
 37 | ...    filename="config.json",
 38 | ...    revision="877b84a8f93f2d619faa2a6e514a32beef88ab0a",
 39 | ... )
 40 | '/root/.cache/huggingface/hub/models--lysandre--arxiv-nlp/snapshots/877b84a8f93f2d619faa2a6e514a32beef88ab0a/config.json'
 41 | ```
 42 | 
 43 | To specify a file revision with the branch name:
 44 | 
 45 | ```python
 46 | >>> hf_hub_download(repo_id="lysandre/arxiv-nlp", filename="config.json", revision="main")
 47 | ```
 48 | 
 49 | To specify a file revision with a tag identifier. For example, if you want `v1.0` of the
 50 | `config.json` file:
 51 | 
 52 | ```python
 53 | >>> hf_hub_download(repo_id="lysandre/arxiv-nlp", filename="config.json", revision="v1.0")
 54 | ```
 55 | 
 56 | To download from a `dataset` or a `space`, specify the `repo_type`. By default, file will
 57 | be considered as being part of a `model` repo.
 58 | 
 59 | ```python
 60 | >>> hf_hub_download(repo_id="google/fleurs", filename="fleurs.py", repo_type="dataset")
 61 | ```
 62 | 
 63 | ## Construct a download URL
 64 | 
 65 | In case you want to construct the URL used to download a file from a repo, you can use [`hf_hub_url`] which returns a URL.
 66 | Note that it is used internally by [`hf_hub_download`].
 67 | 
 68 | ## Download an entire repository
 69 | 
 70 | [`snapshot_download`] downloads an entire repository at a given revision. Like
 71 | [`hf_hub_download`], all downloaded files are cached on your local disk.
 72 | 
 73 | Download a whole repository as shown in the following:
 74 | 
 75 | ```python
 76 | >>> from huggingface_hub import snapshot_download
 77 | >>> snapshot_download(repo_id="lysandre/arxiv-nlp")
 78 | '/home/lysandre/.cache/huggingface/hub/lysandre__arxiv-nlp.894a9adde21d9a3e3843e6d5aeaaf01875c7fade'
 79 | ```
 80 | 
 81 | [`snapshot_download`] downloads the latest revision by default. If you want a specific
 82 | repository revision, use the `revision` parameter:
 83 | 
 84 | ```python
 85 | >>> from huggingface_hub import snapshot_download
 86 | >>> snapshot_download(repo_id="lysandre/arxiv-nlp", revision="main")
 87 | ```
 88 | 
 89 | In general, it is usually better to download files with [`hf_hub_download`] - if you
 90 | already know the file names you need.
 91 | [`snapshot_download`] is helpful when you are unaware of which files to download.
 92 | 
 93 | However, you don't always want to download the contents of an entire repository with
 94 | [`snapshot_download`]. Even if you don't know the file name, you can download specific
 95 | files if you know the file type with `allow_patterns` and `ignore_patterns`. Use the
 96 | `allow_patterns` and `ignore_patterns` arguments to specify which files to download. These
 97 | parameters accept either a single pattern or a list of patterns.
 98 | 
 99 | Patterns are Standard Wildcards (globbing patterns) as documented
100 | [here](https://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm). The pattern
101 | matching is based on [`fnmatch`](https://docs.python.org/3/library/fnmatch.html).
102 | 
103 | For example, you can use `allow_patterns` to only download JSON configuration files:
104 | 
105 | ```python
106 | >>> from huggingface_hub import snapshot_download
107 | >>> snapshot_download(repo_id="lysandre/arxiv-nlp", allow_patterns="*.json")
108 | ```
109 | 
110 | On the other hand, `ignore_patterns` can exclude certain files from being downloaded. The
111 | following example ignores the `.msgpack` and `.h5` file extensions:
112 | 
113 | ```python
114 | >>> from huggingface_hub import snapshot_download
115 | >>> snapshot_download(repo_id="lysandre/arxiv-nlp", ignore_patterns=["*.msgpack", "*.h5"])
116 | ```
117 | 
118 | Passing a pattern can be especially useful when repositories contain files that are never
119 | expected to be downloaded by [`snapshot_download`].
120 | 


--------------------------------------------------------------------------------
/docs/source/quick-start.mdx:
--------------------------------------------------------------------------------
  1 | # Quickstart
  2 | 
  3 | The [Hugging Face Hub](https://hf.co/) is the go-to place for sharing machine learning
  4 | models, demos, datasets, and metrics. `huggingface_hub` library helps you interact with
  5 | the Hub without leaving your development environment. You can create and manage
  6 | repositories easily, download and upload files, and get useful model and dataset
  7 | metadata from the Hub.
  8 | 
  9 | ## Installation
 10 | 
 11 | To get started, install the `huggingface_hub` library:
 12 | 
 13 | ```bash
 14 | pip install --upgrade huggingface_hub
 15 | ```
 16 | 
 17 | For more details, check out the [installation](installation) guide.
 18 | 
 19 | ## Download files
 20 | 
 21 | Repositories on the Hub are git version controlled, and users can download a single file
 22 | or the whole repository. You can use the [`hf_hub_download`] function to download files.
 23 | This function will download and cache a file on your local disk. The next time you need
 24 | that file, it will load from your cache, so you don't need to re-download it.
 25 | 
 26 | You will need the repository id and the filename of the file you want to download. For
 27 | example, to download the [Pegasus](https://huggingface.co/google/pegasus-xsum) model
 28 | configuration file: 
 29 | 
 30 | ```py
 31 | >>> from huggingface_hub import hf_hub_download
 32 | >>> hf_hub_download(repo_id="google/pegasus-xsum", filename="config.json")
 33 | ```
 34 | 
 35 | To download a specific version of the file, use the `revision` parameter to specify the
 36 | branch name, tag, or commit hash. If you choose to use the commit hash, it must be the
 37 | full-length hash instead of the shorter 7-character commit hash: 
 38 | 
 39 | ```py
 40 | >>> from huggingface_hub import hf_hub_download
 41 | >>> hf_hub_download(
 42 | ...     repo_id="google/pegasus-xsum", 
 43 | ...     filename="config.json", 
 44 | ...     revision="4d33b01d79672f27f001f6abade33f22d993b151"
 45 | ... )
 46 | ```
 47 | 
 48 | For more details and options, see the API reference for [`hf_hub_download`].
 49 | 
 50 | ## Login
 51 | 
 52 | In a lot of cases, you must be logged in with a Hugging Face account to interact with
 53 | the Hub: download private repos, upload files, create PRs,...
 54 | [Create an account](https://hf.co/join) if you don't already have one, and then sign in
 55 | to get your [User Access Token](https://huggingface.co/docs/hub/security-tokens) from
 56 | your [Settings page](https://huggingface.co/settings/tokens). The User Access Token is
 57 | used to authenticate your identity to the Hub.
 58 | 
 59 | Once you have your User Access Token, run the following command in your terminal:
 60 | 
 61 | ```bash
 62 | huggingface-cli login
 63 | ```
 64 | 
 65 | Or if you prefer to work from a Jupyter or Colaboratory notebook, then use [`login`]:
 66 | 
 67 | ```py
 68 | >>> from huggingface_hub import login
 69 | >>> login()
 70 | ```
 71 | 
 72 | <Tip>
 73 | 
 74 | You can also provide your token to the functions and methods. This way you don't need to
 75 | store your token anywhere.
 76 | 
 77 | </Tip>
 78 | 
 79 | <Tip>
 80 | 
 81 | Once you are logged in, all requests to the Hub will use your access token by default.
 82 | If you want to disable implicit use of your token, you should set the
 83 | `HF_HUB_DISABLE_IMPLICIT_TOKEN` environment variable.
 84 | 
 85 | </Tip>
 86 | 
 87 | ## Create a repository
 88 | 
 89 | Once you've registered and logged in, create a repository with the [`create_repo`]
 90 | function:
 91 | 
 92 | ```py
 93 | >>> from huggingface_hub import HfApi
 94 | >>> api = HfApi()
 95 | >>> api.create_repo(repo_id="super-cool-model")
 96 | ```
 97 | 
 98 | If you want your repository to be private, then:
 99 | 
100 | ```py
101 | >>> from huggingface_hub import HfApi
102 | >>> api = HfApi()
103 | >>> api.create_repo(repo_id="super-cool-model", private=True)
104 | ```
105 | 
106 | Private repositories will not be visible to anyone except yourself.
107 | 
108 | <Tip>
109 | 
110 | To create a repository or to push content to the Hub, you must provide a User Access
111 | Token that has the `write` permission. You can choose the permission when creating the
112 | token in your [Settings page](https://huggingface.co/settings/tokens).
113 | 
114 | </Tip>
115 | 
116 | ## Upload files
117 | 
118 | Use the [`upload_file`] function to add a file to your newly created repository. You
119 | need to specify:
120 | 
121 | 1. The path of the file to upload.
122 | 2. The path of the file in the repository.
123 | 3. The repository id of where you want to add the file.
124 | 
125 | ```py
126 | >>> from huggingface_hub import HfApi
127 | >>> api = HfApi()
128 | >>> api.upload_file(path_or_fileobj="/home/lysandre/dummy-test/README.md", 
129 | ...                 path_in_repo="README.md", 
130 | ...                 repo_id="lysandre/test-model",
131 | ... )
132 | ```
133 | 
134 | To upload more than one file at a time, take a look at this [guide](how-to-upstream)
135 | which will introduce you to several methods for uploading files (with or without git).
136 | 
137 | ## Next steps
138 | 
139 | The `huggingface_hub` library provides an easy way for users to interact with the Hub
140 | with Python. To learn more about how you can manage your files and repositories on the
141 | Hub, we recommend reading our how-to guides for how to:
142 | 
143 | - [Create and manage repositories](how-to-manage).
144 | - [Download](how-to-downstream) files from the Hub.
145 | - [Upload](how-to-upstream) files to the Hub.
146 | - [Search the Hub](searching-the-hub) for your desired model or dataset.
147 | - [Access the Inference API](how-to-inference) for fast inference.


--------------------------------------------------------------------------------
/tests/test_utils_http.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import unittest
  3 | from typing import Generator
  4 | from unittest.mock import Mock, call, patch
  5 | 
  6 | from huggingface_hub.utils._http import http_backoff
  7 | from requests import ConnectTimeout, HTTPError
  8 | 
  9 | 
 10 | URL = "https://www.google.com"
 11 | 
 12 | 
 13 | @patch("huggingface_hub.utils._http.requests.request")
 14 | class TestHttpBackoff(unittest.TestCase):
 15 |     def test_backoff_no_errors(self, mock_request: Mock) -> None:
 16 |         """Test normal usage of `http_backoff`."""
 17 |         data_mock = Mock()
 18 |         response = http_backoff("GET", URL, data=data_mock)
 19 |         mock_request.assert_called_once_with(method="GET", url=URL, data=data_mock)
 20 |         self.assertIs(response, mock_request())
 21 | 
 22 |     def test_backoff_3_calls(self, mock_request: Mock) -> None:
 23 |         """Test `http_backoff` with 2 fails."""
 24 |         response_mock = Mock()
 25 |         mock_request.side_effect = (ValueError(), ValueError(), response_mock)
 26 |         response = http_backoff(  # retry on ValueError, instant retry
 27 |             "GET", URL, retry_on_exceptions=ValueError, base_wait_time=0.0
 28 |         )
 29 |         self.assertEqual(mock_request.call_count, 3)
 30 |         mock_request.assert_has_calls(
 31 |             calls=[
 32 |                 call(method="GET", url=URL),
 33 |                 call(method="GET", url=URL),
 34 |                 call(method="GET", url=URL),
 35 |             ]
 36 |         )
 37 |         self.assertIs(response, response_mock)
 38 | 
 39 |     def test_backoff_on_exception_until_max(self, mock_request: Mock) -> None:
 40 |         """Test `http_backoff` until max limit is reached with exceptions."""
 41 |         mock_request.side_effect = ConnectTimeout()
 42 | 
 43 |         with self.assertRaises(ConnectTimeout):
 44 |             http_backoff("GET", URL, base_wait_time=0.0, max_retries=3)
 45 | 
 46 |         self.assertEqual(mock_request.call_count, 4)
 47 | 
 48 |     def test_backoff_on_status_code_until_max(self, mock_request: Mock) -> None:
 49 |         """Test `http_backoff` until max limit is reached with status codes."""
 50 |         mock_503 = Mock()
 51 |         mock_503.status_code = 503
 52 |         mock_504 = Mock()
 53 |         mock_504.status_code = 504
 54 |         mock_504.raise_for_status.side_effect = HTTPError()
 55 |         mock_request.side_effect = (mock_503, mock_504, mock_503, mock_504)
 56 | 
 57 |         with self.assertRaises(HTTPError):
 58 |             http_backoff(
 59 |                 "GET",
 60 |                 URL,
 61 |                 base_wait_time=0.0,
 62 |                 max_retries=3,
 63 |                 retry_on_status_codes=(503, 504),
 64 |             )
 65 | 
 66 |         self.assertEqual(mock_request.call_count, 4)
 67 | 
 68 |     def test_backoff_on_exceptions_and_status_codes(self, mock_request: Mock) -> None:
 69 |         """Test `http_backoff` until max limit with status codes and exceptions."""
 70 |         mock_503 = Mock()
 71 |         mock_503.status_code = 503
 72 |         mock_request.side_effect = (mock_503, ConnectTimeout())
 73 | 
 74 |         with self.assertRaises(ConnectTimeout):
 75 |             http_backoff("GET", URL, base_wait_time=0.0, max_retries=1)
 76 | 
 77 |         self.assertEqual(mock_request.call_count, 2)
 78 | 
 79 |     def test_backoff_on_valid_status_code(self, mock_request: Mock) -> None:
 80 |         """Test `http_backoff` until max limit with a valid status code.
 81 | 
 82 |         Quite a corner case: the user wants to retry is status code is 200. Requests are
 83 |         retried but in the end, the HTTP 200 response is returned if the server returned
 84 |         only 200 responses.
 85 |         """
 86 |         mock_200 = Mock()
 87 |         mock_200.status_code = 200
 88 |         mock_request.side_effect = (mock_200, mock_200, mock_200, mock_200)
 89 | 
 90 |         response = http_backoff(
 91 |             "GET", URL, base_wait_time=0.0, max_retries=3, retry_on_status_codes=200
 92 |         )
 93 | 
 94 |         self.assertEqual(mock_request.call_count, 4)
 95 |         self.assertIs(response, mock_200)
 96 | 
 97 |     def test_backoff_sleep_time(self, mock_request: Mock) -> None:
 98 |         """Test `http_backoff` sleep time goes exponential until max limit.
 99 | 
100 |         Since timing between 2 requests is sleep duration + some other stuff, this test
101 |         can be unstable. However, sleep durations between 10ms and 50ms should be enough
102 |         to make the approximation that measured durations are the "sleep time" waited by
103 |         `http_backoff`. If this is not the case, just increase `base_wait_time`,
104 |         `max_wait_time` and `expected_sleep_times` with bigger values.
105 |         """
106 |         sleep_times = []
107 | 
108 |         def _side_effect_timer() -> Generator[ConnectTimeout, None, None]:
109 |             t0 = time.time()
110 |             while True:
111 |                 yield ConnectTimeout()
112 |                 t1 = time.time()
113 |                 sleep_times.append(round(t1 - t0, 2))
114 |                 t0 = t1
115 | 
116 |         mock_request.side_effect = _side_effect_timer()
117 | 
118 |         with self.assertRaises(ConnectTimeout):
119 |             http_backoff(
120 |                 "GET", URL, base_wait_time=0.01, max_wait_time=0.05, max_retries=5
121 |             )
122 | 
123 |         self.assertEqual(mock_request.call_count, 6)
124 | 
125 |         # Assert sleep times are exponential until plateau
126 |         expected_sleep_times = [0.01, 0.02, 0.04, 0.05, 0.05]
127 |         self.assertListEqual(sleep_times, expected_sleep_times)
128 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/environment_variables.mdx:
--------------------------------------------------------------------------------
  1 | # Environment variables
  2 | 
  3 | `huggingface_hub` can be configured using environment variables.
  4 | 
  5 | If you are unfamiliar with environment variable, here are generic articles about them
  6 | [on macOS and Linux](https://linuxize.com/post/how-to-set-and-list-environment-variables-in-linux/)
  7 | and on [Windows](https://phoenixnap.com/kb/windows-set-environment-variable).
  8 | 
  9 | This page will guide you through all environment variables specific to `huggingface_hub`
 10 | and their meaning.
 11 | 
 12 | ## Generic
 13 | 
 14 | ### HF_ENDPOINT
 15 | 
 16 | To configure the Hub base url. You might want to set this variable if your organization
 17 | is using a [Private Hub](https://huggingface.co/platform).
 18 | 
 19 | Defaults to `"https://huggingface.co"`.
 20 | 
 21 | ### HF_HOME
 22 | 
 23 | To configure where `huggingface_hub` will locally store data. In particular, your token
 24 | and the cache will be stored in this folder.
 25 | 
 26 | Defaults to `"~/.cache/huggingface"` unless [XDG_CACHE_HOME](#xdgcachehome) is set.
 27 | 
 28 | ### HUGGINGFACE_HUB_CACHE
 29 | 
 30 | To configure where repositories from the Hub will be cached locally (models, datasets and
 31 | spaces).
 32 | 
 33 | Defaults to `"$HF_HOME/hub"` (e.g. `"~/.cache/huggingface/hub"` by default).
 34 | 
 35 | ### HUGGINGFACE_ASSETS_CACHE
 36 | 
 37 | To configure where [assets](how-to-cache#caching-assets) created by downstream libraries
 38 | will be cached locally. Those assets can be preprocessed data, files downloaded from GitHub,
 39 | logs,...
 40 | 
 41 | Defaults to `"$HF_HOME/assets"` (e.g. `"~/.cache/huggingface/assets"` by default).
 42 | 
 43 | ### HUGGING_FACE_HUB_TOKEN
 44 | 
 45 | To configure the User Access Token to authenticate to the Hub. If set, this value will
 46 | overwrite the token stored on the machine (under `"~/huggingface/token"`).
 47 | 
 48 | See [login reference](package_reference/login) for more details.
 49 | 
 50 | ### HUGGINGFACE_HUB_VERBOSITY
 51 | 
 52 | Set the verbosity level of the `huggingface_hub`'s logger. Must be one of
 53 | `{"debug", "info", "warning", "error", "critical"}`.
 54 | 
 55 | Defaults to `"warning"`.
 56 | 
 57 | For more details, see [logging reference](package_reference/utilities#huggingface_hub.utils.logging.get_verbosity).
 58 | 
 59 | ## Boolean values
 60 | 
 61 | The following environment variables expect a boolean value. The variable will be considered
 62 | as `True` if its value is one of `{"1", "ON", "YES", "TRUE"}` (case-insensitive). Any other value
 63 | (or undefined) will be considered as `False`.
 64 | 
 65 | ### HF_HUB_OFFLINE
 66 | 
 67 | If set, no HTTP calls will me made when trying to fetch files. Only files that are already
 68 | cached will be accessed. This is useful in case your network is slow and you don't care
 69 | about having absolutely the latest version of a file.
 70 | 
 71 | **Note:** even if the latest version of a file is cached, calling `hf_hub_download` still triggers
 72 | a HTTP request to check that a new version is not available. Setting `HF_HUB_OFFLINE=1` will
 73 | skip this call which speeds up your loading time.
 74 | 
 75 | ### HF_HUB_DISABLE_IMPLICIT_TOKEN
 76 | 
 77 | Authentication is not mandatory for every requests to the Hub. For instance, requesting
 78 | details about `"gpt2"` model does not require to be authenticated. However, if a user is
 79 | [logged in](package_reference/login), the default behavior will be to always send the token
 80 | in order to ease user experience (never get a HTTP 401 Unauthorized) when accessing private or gated repositories. For privacy, you can
 81 | disable this behavior by setting `HF_HUB_DISABLE_IMPLICIT_TOKEN=1`. In this case,
 82 | the token will be sent only for "write-access" calls (example: create a commit).
 83 | 
 84 | **Note:** disabling implicit sending of token can have weird side effects. For example,
 85 | if you want to list all models on the Hub, your private models will not be listed. You
 86 | would need to explicitly pass `token=True` argument in your script.
 87 | 
 88 | ### HF_HUB_DISABLE_PROGRESS_BARS
 89 | 
 90 | For time consuming tasks, `huggingface_hub` displays a progress bar by default (using tqdm).
 91 | You can disable all the progress bars at once by setting `HF_HUB_DISABLE_PROGRESS_BARS=1`.
 92 | 
 93 | ### HF_HUB_DISABLE_SYMLINKS_WARNING
 94 | 
 95 | If you are on a Windows machine, it is recommended to enable the developer mode or to run
 96 | `huggingface_hub` in admin mode. If not, `huggingface_hub` will not be able to create
 97 | symlinks in your cache system. You will be able to execute any script but your user experience
 98 | will be degraded as some huge files might end-up duplicated on your hard-drive. A warning
 99 | message is triggered to warn you about this behavior. Set `HF_HUB_DISABLE_SYMLINKS_WARNING=1`,
100 | to disable this warning.
101 | 
102 | For more details, see [cache limitations](how-to-cache#limitations).
103 | 
104 | ## From external tools
105 | 
106 | Some environment variables are not specific to `huggingface_hub` but still taken into account
107 | when they are set.
108 | 
109 | ### NO_COLOR
110 | 
111 | Boolean value. When set, `huggingface-cli` tool will not print any ANSI color.
112 | See [no-color.org](https://no-color.org/).
113 | 
114 | ### XDG_CACHE_HOME
115 | 
116 | Used only when `HF_HOME` is not set!
117 | 
118 | This is the default way to configure where [user-specific non-essential (cached) data should be written](https://wiki.archlinux.org/title/XDG_Base_Directory)
119 | on linux machines. 
120 | 
121 | If `HF_HOME` is not set, the default home will be `"$XDG_CACHE_HOME/huggingface"` instead
122 | of `"~/.cache/huggingface"`.
123 | 


--------------------------------------------------------------------------------
/src/huggingface_hub/utils/logging.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 Optuna, Hugging Face
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """ Logging utilities."""
 16 | 
 17 | import logging
 18 | import os
 19 | from logging import CRITICAL  # NOQA
 20 | from logging import DEBUG  # NOQA
 21 | from logging import ERROR  # NOQA
 22 | from logging import FATAL  # NOQA
 23 | from logging import INFO  # NOQA
 24 | from logging import NOTSET  # NOQA
 25 | from logging import WARN  # NOQA
 26 | from logging import WARNING  # NOQA
 27 | from typing import Optional
 28 | 
 29 | 
 30 | log_levels = {
 31 |     "debug": logging.DEBUG,
 32 |     "info": logging.INFO,
 33 |     "warning": logging.WARNING,
 34 |     "error": logging.ERROR,
 35 |     "critical": logging.CRITICAL,
 36 | }
 37 | 
 38 | _default_log_level = logging.WARNING
 39 | 
 40 | 
 41 | def _get_library_name() -> str:
 42 |     return __name__.split(".")[0]
 43 | 
 44 | 
 45 | def _get_library_root_logger() -> logging.Logger:
 46 |     return logging.getLogger(_get_library_name())
 47 | 
 48 | 
 49 | def _get_default_logging_level():
 50 |     """
 51 |     If HUGGINGFACE_HUB_VERBOSITY env var is set to one of the valid choices
 52 |     return that as the new default level. If it is not - fall back to
 53 |     `_default_log_level`
 54 |     """
 55 |     env_level_str = os.getenv("HUGGINGFACE_HUB_VERBOSITY", None)
 56 |     if env_level_str:
 57 |         if env_level_str in log_levels:
 58 |             return log_levels[env_level_str]
 59 |         else:
 60 |             logging.getLogger().warning(
 61 |                 f"Unknown option HUGGINGFACE_HUB_VERBOSITY={env_level_str}, "
 62 |                 f"has to be one of: { ', '.join(log_levels.keys()) }"
 63 |             )
 64 |     return _default_log_level
 65 | 
 66 | 
 67 | def _configure_library_root_logger() -> None:
 68 |     library_root_logger = _get_library_root_logger()
 69 |     library_root_logger.addHandler(logging.StreamHandler())
 70 |     library_root_logger.setLevel(_get_default_logging_level())
 71 | 
 72 | 
 73 | def _reset_library_root_logger() -> None:
 74 |     library_root_logger = _get_library_root_logger()
 75 |     library_root_logger.setLevel(logging.NOTSET)
 76 | 
 77 | 
 78 | def get_logger(name: Optional[str] = None) -> logging.Logger:
 79 |     """
 80 |         Returns a logger with the specified name. This function is not supposed
 81 |         to be directly accessed by library users.
 82 | 
 83 |         Args:
 84 |             name (`str`, *optional*):
 85 |                 The name of the logger to get, usually the filename
 86 | 
 87 |         Example:
 88 | 
 89 |     ```python
 90 |     >>> from huggingface_hub import get_logger
 91 | 
 92 |     >>> logger = get_logger(__file__)
 93 |     >>> logger.set_verbosity_info()
 94 |     ```
 95 |     """
 96 | 
 97 |     if name is None:
 98 |         name = _get_library_name()
 99 | 
100 |     return logging.getLogger(name)
101 | 
102 | 
103 | def get_verbosity() -> int:
104 |     """Return the current level for the HuggingFace Hub's root logger.
105 | 
106 |     Returns:
107 |         Logging level, e.g., `huggingface_hub.logging.DEBUG` and
108 |         `huggingface_hub.logging.INFO`.
109 | 
110 |     <Tip>
111 | 
112 |     HuggingFace Hub has following logging levels:
113 | 
114 |     - `huggingface_hub.logging.CRITICAL`, `huggingface_hub.logging.FATAL`
115 |     - `huggingface_hub.logging.ERROR`
116 |     - `huggingface_hub.logging.WARNING`, `huggingface_hub.logging.WARN`
117 |     - `huggingface_hub.logging.INFO`
118 |     - `huggingface_hub.logging.DEBUG`
119 | 
120 |     </Tip>
121 |     """
122 |     return _get_library_root_logger().getEffectiveLevel()
123 | 
124 | 
125 | def set_verbosity(verbosity: int) -> None:
126 |     """
127 |     Sets the level for the HuggingFace Hub's root logger.
128 | 
129 |     Args:
130 |         verbosity (`int`):
131 |             Logging level, e.g., `huggingface_hub.logging.DEBUG` and
132 |             `huggingface_hub.logging.INFO`.
133 |     """
134 |     _get_library_root_logger().setLevel(verbosity)
135 | 
136 | 
137 | def set_verbosity_info():
138 |     """
139 |     Sets the verbosity to `logging.INFO`.
140 |     """
141 |     return set_verbosity(INFO)
142 | 
143 | 
144 | def set_verbosity_warning():
145 |     """
146 |     Sets the verbosity to `logging.WARNING`.
147 |     """
148 |     return set_verbosity(WARNING)
149 | 
150 | 
151 | def set_verbosity_debug():
152 |     """
153 |     Sets the verbosity to `logging.DEBUG`.
154 |     """
155 |     return set_verbosity(DEBUG)
156 | 
157 | 
158 | def set_verbosity_error():
159 |     """
160 |     Sets the verbosity to `logging.ERROR`.
161 |     """
162 |     return set_verbosity(ERROR)
163 | 
164 | 
165 | def disable_propagation() -> None:
166 |     """
167 |     Disable propagation of the library log outputs. Note that log propagation is
168 |     disabled by default.
169 |     """
170 |     _get_library_root_logger().propagate = False
171 | 
172 | 
173 | def enable_propagation() -> None:
174 |     """
175 |     Enable propagation of the library log outputs. Please disable the
176 |     HuggingFace Hub's default handler to prevent double logging if the root
177 |     logger has been configured.
178 |     """
179 |     _get_library_root_logger().propagate = True
180 | 
181 | 
182 | _configure_library_root_logger()
183 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Contributor Covenant Code of Conduct
  3 | 
  4 | ## Our Pledge
  5 | 
  6 | We as members, contributors, and leaders pledge to make participation in our
  7 | community a harassment-free experience for everyone, regardless of age, body
  8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  9 | identity and expression, level of experience, education, socio-economic status,
 10 | nationality, personal appearance, race, religion, or sexual identity
 11 | and orientation.
 12 | 
 13 | We pledge to act and interact in ways that contribute to an open, welcoming,
 14 | diverse, inclusive, and healthy community.
 15 | 
 16 | ## Our Standards
 17 | 
 18 | Examples of behavior that contributes to a positive environment for our
 19 | community include:
 20 | 
 21 | * Demonstrating empathy and kindness toward other people
 22 | * Being respectful of differing opinions, viewpoints, and experiences
 23 | * Giving and gracefully accepting constructive feedback
 24 | * Accepting responsibility and apologizing to those affected by our mistakes,
 25 |   and learning from the experience
 26 | * Focusing on what is best not just for us as individuals, but for the
 27 |   overall community
 28 | 
 29 | Examples of unacceptable behavior include:
 30 | 
 31 | * The use of sexualized language or imagery, and sexual attention or
 32 |   advances of any kind
 33 | * Trolling, insulting or derogatory comments, and personal or political attacks
 34 | * Public or private harassment
 35 | * Publishing others' private information, such as a physical or email
 36 |   address, without their explicit permission
 37 | * Other conduct which could reasonably be considered inappropriate in a
 38 |   professional setting
 39 | 
 40 | ## Enforcement Responsibilities
 41 | 
 42 | Community leaders are responsible for clarifying and enforcing our standards of
 43 | acceptable behavior and will take appropriate and fair corrective action in
 44 | response to any behavior that they deem inappropriate, threatening, offensive,
 45 | or harmful.
 46 | 
 47 | Community leaders have the right and responsibility to remove, edit, or reject
 48 | comments, commits, code, wiki edits, issues, and other contributions that are
 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 50 | decisions when appropriate.
 51 | 
 52 | ## Scope
 53 | 
 54 | This Code of Conduct applies within all community spaces, and also applies when
 55 | an individual is officially representing the community in public spaces.
 56 | Examples of representing our community include using an official e-mail address,
 57 | posting via an official social media account, or acting as an appointed
 58 | representative at an online or offline event.
 59 | 
 60 | ## Enforcement
 61 | 
 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 63 | reported to the community leaders responsible for enforcement at
 64 | feedback@huggingface.co.
 65 | All complaints will be reviewed and investigated promptly and fairly.
 66 | 
 67 | All community leaders are obligated to respect the privacy and security of the
 68 | reporter of any incident.
 69 | 
 70 | ## Enforcement Guidelines
 71 | 
 72 | Community leaders will follow these Community Impact Guidelines in determining
 73 | the consequences for any action they deem in violation of this Code of Conduct:
 74 | 
 75 | ### 1. Correction
 76 | 
 77 | **Community Impact**: Use of inappropriate language or other behavior deemed
 78 | unprofessional or unwelcome in the community.
 79 | 
 80 | **Consequence**: A private, written warning from community leaders, providing
 81 | clarity around the nature of the violation and an explanation of why the
 82 | behavior was inappropriate. A public apology may be requested.
 83 | 
 84 | ### 2. Warning
 85 | 
 86 | **Community Impact**: A violation through a single incident or series
 87 | of actions.
 88 | 
 89 | **Consequence**: A warning with consequences for continued behavior. No
 90 | interaction with the people involved, including unsolicited interaction with
 91 | those enforcing the Code of Conduct, for a specified period of time. This
 92 | includes avoiding interactions in community spaces as well as external channels
 93 | like social media. Violating these terms may lead to a temporary or
 94 | permanent ban.
 95 | 
 96 | ### 3. Temporary Ban
 97 | 
 98 | **Community Impact**: A serious violation of community standards, including
 99 | sustained inappropriate behavior.
100 | 
101 | **Consequence**: A temporary ban from any sort of interaction or public
102 | communication with the community for a specified period of time. No public or
103 | private interaction with the people involved, including unsolicited interaction
104 | with those enforcing the Code of Conduct, is allowed during this period.
105 | Violating these terms may lead to a permanent ban.
106 | 
107 | ### 4. Permanent Ban
108 | 
109 | **Community Impact**: Demonstrating a pattern of violation of community
110 | standards, including sustained inappropriate behavior,  harassment of an
111 | individual, or aggression toward or disparagement of classes of individuals.
112 | 
113 | **Consequence**: A permanent ban from any sort of public interaction within
114 | the community.
115 | 
116 | ## Attribution
117 | 
118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119 | version 2.0, available at
120 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
121 | 
122 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
123 | enforcement ladder](https://github.com/mozilla/diversity).
124 | 
125 | [homepage]: https://www.contributor-covenant.org
126 | 
127 | For answers to common questions about this code of conduct, see the FAQ at
128 | https://www.contributor-covenant.org/faq. Translations are available at
129 | https://www.contributor-covenant.org/translations.


--------------------------------------------------------------------------------
/docs/source/how-to-discussions-and-pull-requests.mdx:
--------------------------------------------------------------------------------
  1 | # Interact with Discussions and Pull Requests 
  2 | 
  3 | The `huggingface_hub` library provides a Python interface to interact with Pull Requests and Discussions on the Hub.
  4 | Visit [the dedicated documentation page](https://huggingface.co/docs/hub/repositories-pull-requests-discussions)
  5 | for a deeper view of what Discussions and Pull Requests on the Hub are, and how they work under the hood.
  6 | 
  7 | ## Retrieve Discussions and Pull Requests from the Hub
  8 | 
  9 | The `HfApi` class allows you to retrieve Discussions and Pull Requests on a given repo:
 10 | 
 11 | ```python
 12 | >>> from huggingface_hub import get_repo_discussions
 13 | >>> for discussion in get_repo_discussions(repo_id="bigscience/bloom-1b3"):
 14 | ...     print(f"{discussion.num} - {discussion.title}, pr: {discussion.is_pull_request}")
 15 | 
 16 | # 11 - Add Flax weights, pr: True
 17 | # 10 - Update README.md, pr: True
 18 | # 9 - Training languages in the model card, pr: True
 19 | # 8 - Update tokenizer_config.json, pr: True
 20 | # 7 - Slurm training script, pr: False
 21 | [...]
 22 | ```
 23 | 
 24 | `HfApi.get_repo_discussions` returns a [generator](https://docs.python.org/3.7/howto/functional.html#generators) that yields
 25 | [`Discussion`] objects. To get all the Discussions in a single list, run:
 26 | 
 27 | ```python
 28 | >>> from huggingface_hub import get_repo_discussions
 29 | >>> discussions_list = list(get_repo_discussions(repo_id="bert-base-uncased"))
 30 | ```
 31 | 
 32 | The [`Discussion`] object returned by [`HfApi.get_repo_discussions`] contains high-level overview of the
 33 | Discussion or Pull Request. You can also get more detailed information using [`HfApi.get_discussion_details`]:
 34 | 
 35 | ```python
 36 | >>> from huggingface_hub import get_discussion_details
 37 | 
 38 | >>> get_discussion_details(
 39 | ...     repo_id="bigscience/bloom-1b3",
 40 | ...     discussion_num=2
 41 | ... )
 42 | DiscussionWithDetails(
 43 |     num=2,
 44 |     author='cakiki',
 45 |     title='Update VRAM memory for the V100s',
 46 |     status='open',
 47 |     is_pull_request=True,
 48 |     events=[
 49 |         DiscussionComment(type='comment', author='cakiki', ...),
 50 |         DiscussionCommit(type='commit', author='cakiki', summary='Update VRAM memory for the V100s', oid='1256f9d9a33fa8887e1c1bf0e09b4713da96773a', ...),
 51 |     ],
 52 |     conflicting_files=[],
 53 |     target_branch='refs/heads/main',
 54 |     merge_commit_oid=None,
 55 |     diff='diff --git a/README.md b/README.md\nindex a6ae3b9294edf8d0eda0d67c7780a10241242a7e..3a1814f212bc3f0d3cc8f74bdbd316de4ae7b9e3 100644\n--- a/README.md\n+++ b/README.md\n@@ -132,7 +132,7 [...]',
 56 | )
 57 | ```
 58 | 
 59 | [`HfApi.get_discussion_details`] returns a [`DiscussionWithDetails`] object, which is a subclass of [`Discussion`]
 60 | with more detailed information about the Discussion or Pull Request. Information includes all the comments, status changes,
 61 | and renames of the Discussion via [`DiscussionWithDetails.events`].
 62 | 
 63 | In case of a Pull Request, you can retrieve the raw git diff with [`DiscussionWithDetails.diff`]. All the commits of the
 64 | Pull Request are listed in [`DiscussionWithDetails.events`].
 65 | 
 66 | 
 67 | ## Create and edit a Discussion or Pull Request programmatically
 68 | 
 69 | The [`HfApi`] class also offers ways to create and edit Discussions and Pull Requests.
 70 | You will need an [access token](https://huggingface.co/docs/hub/security-tokens) to create and edit Discussions
 71 | or Pull Requests.
 72 | 
 73 | The simplest way to propose changes on a repo on the Hub is via the [`create_commit`] API: just 
 74 | set the `create_pr` parameter to `True`. This parameter is also available on other methods that wrap [`create_commit`]:
 75 | 
 76 |     * [`upload_file`]
 77 |     * [`upload_folder`]
 78 |     * [`delete_file`]
 79 |     * [`delete_folder`]
 80 |     * [`metadata_update`]
 81 | 
 82 | ```python
 83 | >>> from huggingface_hub import metadata_update
 84 | 
 85 | >>> metadata_update(
 86 | ...     repo_id="username/repo_name",
 87 | ...     metadata={"tags": ["computer-vision", "awesome-model"]},
 88 | ...     create_pr=True,
 89 | ... )
 90 | ```
 91 | 
 92 | You can also use [`HfApi.create_discussion`] (respectively [`HfApi.create_pull_request`]) to create a Discussion (respectively a Pull Request) on a repo.
 93 | Opening a Pull Request this way can be useful if you need to work on changes locally. Pull Requests opened this way will be in `"draft"` mode.
 94 | 
 95 | ```python
 96 | >>> from huggingface_hub import create_discussion, create_pull_request
 97 | 
 98 | >>> create_discussion(
 99 | ...     repo_id="username/repo-name",
100 | ...     title="Hi from the huggingface_hub library!",
101 | ...     token="<insert your access token here>",
102 | ... )
103 | DiscussionWithDetails(...)
104 | 
105 | >>> create_pull_request(
106 | ...     repo_id="username/repo-name",
107 | ...     title="Hi from the huggingface_hub library!",
108 | ...     token="<insert your access token here>",
109 | ... )
110 | DiscussionWithDetails(..., is_pull_request=True)
111 | ```
112 | 
113 | Managing Pull Requests and Discussions can be done entirely with the [`HfApi`] class. For example:
114 | 
115 |     * [`comment_discussion`] to add comments
116 |     * [`edit_discussion_comment`] to edit comments
117 |     * [`rename_discussion`] to rename a Discussion or Pull Request 
118 |     * [`change_discussion_status`] to open or close a Discussion / Pull Request 
119 |     * [`merge_pull_request`] to merge a Pull Request 
120 | 
121 | 
122 | Visit the [`HfApi`] documentation page for an exhaustive reference of all available methods.
123 | 
124 | ## Push changes to a Pull Request
125 | 
126 | *Coming soon !*
127 | 
128 | ## See also
129 | 
130 | For a more detailed reference, visit the [community](/source/package_reference/community) and the [hf_api](/source/package_reference/hf_api) documentation page.
131 | 


--------------------------------------------------------------------------------
/docs/source/installation.mdx:
--------------------------------------------------------------------------------
  1 | # Installation
  2 | 
  3 | Before you start, you will need to setup your environment by installing the appropriate packages.
  4 | 
  5 | `huggingface_hub` is tested on **Python 3.7+**.
  6 | 
  7 | ## Install with pip
  8 | 
  9 | It is highly recommended to install `huggingface_hub` in a [virtual environment](https://docs.python.org/3/library/venv.html).
 10 | If you are unfamiliar with Python virtual environments, take a look at this [guide](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/).
 11 | A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
 12 | 
 13 | Start by creating a virtual environment in your project directory:
 14 | 
 15 | ```bash
 16 | python -m venv .env
 17 | ```
 18 | 
 19 | Activate the virtual environment. On Linux and macOS:
 20 | 
 21 | ```bash
 22 | source .env/bin/activate
 23 | ```
 24 | 
 25 | Activate virtual environment on Windows:
 26 | 
 27 | ```bash
 28 | .env/Scripts/activate
 29 | ```
 30 | 
 31 | Now you're ready to install `huggingface_hub` [from the PyPi registry](https://pypi.org/project/huggingface-hub/):
 32 | 
 33 | ```bash
 34 | pip install --upgrade huggingface_hub
 35 | ```
 36 | 
 37 | Once done, [check installation](#check-installation) is working correctly.
 38 | 
 39 | ### Install optional dependencies
 40 | 
 41 | Some dependencies of `huggingface_hub` are [optional](https://setuptools.pypa.io/en/latest/userguide/dependency_management.html#optional-dependencies) because they are not required to run the core features of `huggingface_hub`. However, some features of the `huggingface_hub` may not be available if the optional dependencies aren't installed.
 42 | 
 43 | You can install optional dependencies via `pip`:
 44 | ```bash
 45 | # Install dependencies for tensorflow-specific features
 46 | # /!\ Warning: this is not equivalent to `pip install tensorflow`
 47 | pip install 'huggingface_hub[tensorflow]'
 48 | 
 49 | # Install dependencies for both torch-specific and CLI-specific features.
 50 | pip install 'huggingface_hub[cli,torch]'
 51 | ```
 52 | 
 53 | Here is the list of optional dependencies in `huggingface_hub`:
 54 | - `cli`: provide a more convenient CLI interface for `huggingface_hub`.
 55 | - `fastai`, `torch`, `tensorflow`: dependencies to run framework-specific features.
 56 | - `dev`: dependencies to contribute to the lib. Includes `testing` (to run tests), `typing` (to run type checker) and `quality` (to run linters).
 57 | 
 58 | 
 59 | 
 60 | ### Install from source
 61 | 
 62 | In some cases, it is interesting to install `huggingface_hub` directly from source.
 63 | This allows you to use the bleeding edge `main` version rather than the latest stable version.
 64 | The `main` version is useful for staying up-to-date with the latest developments, for instance
 65 | if a bug has been fixed since the last official release but a new release hasn't been rolled out yet.
 66 | 
 67 | However, this means the `main` version may not always be stable. We strive to keep the
 68 | `main` version operational, and most issues are usually resolved
 69 | within a few hours or a day. If you run into a problem, please open an Issue so we can
 70 | fix it even sooner!
 71 | 
 72 | ```bash
 73 | pip install git+https://github.com/huggingface/huggingface_hub
 74 | ```
 75 | 
 76 | When installing from source, you can also specify a specific branch. This is useful if you
 77 | want to test a new feature or a new bug-fix that has not been merged yet:
 78 | 
 79 | ```bash
 80 | pip install git+https://github.com/huggingface/huggingface_hub@my-feature-branch
 81 | ```
 82 | 
 83 | Once done, [check installation](#check-installation) is working correctly.
 84 | 
 85 | ### Editable install
 86 | 
 87 | Installing from source allows you to setup an [editable install](https://pip.pypa.io/en/stable/topics/local-project-installs/#editable-installs).
 88 | This is a more advanced installation if you plan to contribute to `huggingface_hub`
 89 | and need to test changes in the code. You need to clone a local copy of `huggingface_hub`
 90 | on your machine.
 91 | 
 92 | ```bash
 93 | # First, clone repo locally
 94 | git clone https://github.com/huggingface/huggingface_hub.git
 95 | 
 96 | # Then, install with -e flag
 97 | cd huggingface_hub
 98 | pip install -e .
 99 | ```
100 | 
101 | These commands will link the folder you cloned the repository to and your Python library paths.
102 | Python will now look inside the folder you cloned to in addition to the normal library paths.
103 | For example, if your Python packages are typically installed in `./.venv/lib/python3.11/site-packages/`,
104 | Python will also search the folder you cloned `./huggingface_hub/`.
105 | 
106 | ## Install with conda
107 | 
108 | If you are more familiar with it, you can install `huggingface_hub` using the [conda-forge channel](https://anaconda.org/conda-forge/huggingface_hub):
109 | 
110 | 
111 | ```bash
112 | conda install -c conda-forge huggingface_hub
113 | ```
114 | 
115 | Once done, [check installation](#check-installation) is working correctly.
116 | 
117 | ## Check installation
118 | 
119 | Once installed, check that `huggingface_hub` works properly by running the following command:
120 | 
121 | ```bash
122 | python -c "from huggingface_hub import model_info; print(model_info('gpt2'))"
123 | ```
124 | 
125 | This command will fetch information from the Hub about the [gpt2](https://huggingface.co/gpt2) model.
126 | Output should look like this:
127 | 
128 | ```text
129 | Model Name: gpt2
130 | Tags: ['pytorch', 'tf', 'jax', 'tflite', 'rust', 'safetensors', 'gpt2', 'text-generation', 'en', 'doi:10.57967/hf/0039', 'transformers', 'exbert', 'license:mit', 'has_space']
131 | Task: text-generation
132 | ```
133 | 
134 | ## Next steps
135 | 
136 | Once `huggingface_hub` is properly installed on your machine, you might want
137 | [configure environment variables](package_reference/environment_variables) or [check one of our guides](guides/overview) to get started.


--------------------------------------------------------------------------------