├── .editorconfig ├── .github ├── ISSUE_TEMPLATE.md ├── dependabot.yml └── workflows │ ├── community-contribution-labeling.yml │ ├── notify_team_new_comment.yml │ ├── pre-commit.yml │ ├── python-publish.yml │ ├── pythontest.yml │ ├── unassign-inactive.yaml │ └── update-pr-spreadsheet.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── AUTHORS.rst ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── docs ├── 404.rst ├── Makefile ├── README.rst ├── _templates │ ├── footer.html │ └── layout.html ├── chefops.md ├── community │ ├── authors.rst │ ├── contributing.md │ └── index.rst ├── concepts │ ├── content_workflows.md │ ├── developer_workflows.md │ ├── index.rst │ ├── introduction.md │ ├── reviewing_channels.md │ └── terminology.md ├── conf.py ├── csv_metadata │ ├── README.rst │ ├── csv_exercises.md │ ├── csv_workflow.md │ └── index.rst ├── developer │ ├── corrections.md │ ├── design_cli.md │ ├── ids.md │ ├── index.rst │ ├── kolibripreview.md │ ├── sushops.md │ └── uploadprocess.md ├── downloader.md ├── examples │ ├── detokenify.pl │ ├── document_conversion.ipynb │ ├── exercises.ipynb │ ├── index.rst │ ├── languages.ipynb │ └── samplefiles │ │ └── documents │ │ ├── doc_EN.pdf │ │ ├── doc_ES.pdf │ │ └── doc_FR.pdf ├── exercises.md ├── figures │ ├── HandBrake │ │ ├── handbrake_steps.png │ │ ├── handbreake_audio_settings.png │ │ ├── handbreake_resizing_settings.png │ │ └── handbreake_screenshot_video_settings.png │ ├── content_pipeline_diagram.png │ ├── content_pipeline_diagram_with_highlight.png │ ├── kolibri_logo.png │ ├── logo.ico │ └── ricecooker_domain.png ├── files.md ├── history.rst ├── htmlapps.md ├── index.rst ├── index_api_reference.rst ├── index_utils.rst ├── installation.md ├── languages.md ├── make.bat ├── nodes.md ├── parsing_html.md ├── pdfutils.md ├── requirements.txt ├── tutorial │ ├── explanations.md │ ├── gettingstarted.rst │ ├── index.rst │ ├── quickstart.rst │ └── tutorial.rst ├── usage.md └── video_compression.md ├── examples ├── README.md ├── gettingstarted │ └── sushichef.py ├── oldexamples │ ├── README.md │ ├── content │ │ ├── 0a0c0f1a1a40226d8d227a07dd143f8c08a4b8a5-data.json │ │ ├── 0a0c0f1a1a40226d8d227a07dd143f8c08a4b8a5.svg │ │ ├── captions.vtt │ │ ├── htmltest.zip │ │ ├── sushirolls.pdf │ │ └── video.mp4 │ ├── data │ │ ├── perseus_graph_question.json │ │ ├── sample_perseus01.json │ │ ├── sample_perseus02.json │ │ └── sample_tree.json │ ├── large_wikipedia_chef.py │ ├── sample_program.py │ └── wikipedia_video_chef.py ├── studiocontent │ └── sushichef.py ├── tutorial │ └── sushichef.py └── wikipedia │ ├── README.md │ └── sushichef.py ├── pytest.ini ├── resources ├── scripts │ ├── convertvideo.bat │ └── convertvideo.sh └── templates │ └── csv_channel │ ├── Channel.csv │ ├── Content.csv │ ├── ExerciseQuestions.csv │ ├── Exercises.csv │ └── csvchef.py ├── ricecooker ├── __init__.py ├── chefs.py ├── classes │ ├── __init__.py │ ├── files.py │ ├── licenses.py │ ├── nodes.py │ └── questions.py ├── commands.py ├── config.py ├── exceptions.py ├── managers │ ├── __init__.py │ ├── progress.py │ └── tree.py └── utils │ ├── __init__.py │ ├── audio.py │ ├── browser.py │ ├── caching.py │ ├── corrections.py │ ├── downloader.py │ ├── encodings.py │ ├── html.py │ ├── html_writer.py │ ├── images.py │ ├── jsontrees.py │ ├── kolibripreview.py │ ├── libstudio.py │ ├── linecook.py │ ├── metadata_provider.py │ ├── paths.py │ ├── pdf.py │ ├── pipeline │ ├── __init__.py │ ├── context.py │ ├── convert.py │ ├── exceptions.py │ ├── extract_metadata.py │ ├── file_handler.py │ ├── mime.types │ └── transfer.py │ ├── proxy.py │ ├── subtitles.py │ ├── thumbscropping.py │ ├── tokens.py │ ├── utils.py │ ├── videos.py │ ├── web.py │ ├── youtube.py │ └── zip.py ├── setup.cfg ├── setup.py ├── tests ├── cassettes │ ├── test_gdrive_audio.yaml │ ├── test_gdrive_channel_spreadsheet.yaml │ ├── test_gdrive_doc.yaml │ ├── test_gdrive_pdf.yaml │ ├── test_gdrive_slideshow.yaml │ ├── test_gdrive_video.yaml │ ├── test_gdrive_vtt.yaml │ └── test_youtubevideo_process_file.yaml ├── chefs │ └── fake_chef.py ├── conftest.py ├── media_utils │ ├── README.md │ ├── __init__.py │ ├── files │ │ ├── Wilhelm_Scream.mp3 │ │ ├── assets │ │ │ ├── css │ │ │ │ ├── empty.css │ │ │ │ └── empty2.css │ │ │ ├── images │ │ │ │ ├── 4933759886_098e9acf93_m.jpg │ │ │ │ └── copyright.txt │ │ │ └── js │ │ │ │ └── empty.js │ │ ├── audio │ │ │ └── file_example_MP3_700KB.mp3 │ │ ├── file_metadata.txt │ │ ├── generate_thumbnail │ │ │ ├── sample.epub │ │ │ ├── sample.pdf │ │ │ └── sample.zip │ │ ├── kepub.epub │ │ ├── page_with_links.html │ │ ├── subtitles │ │ │ ├── basic.srt │ │ │ ├── basic.vtt │ │ │ ├── empty.ttml │ │ │ ├── encapsulated.sami │ │ │ ├── encapsulated.vtt │ │ │ └── not.txt │ │ └── thumbnails │ │ │ ├── BRAlogo1.png │ │ │ ├── toosquare.png │ │ │ ├── tootall.png │ │ │ └── toowide.png │ ├── test_audio.py │ ├── test_proxy.py │ ├── test_subtitles.py │ ├── test_thumbnails.py │ ├── test_videos.py │ ├── test_web.py │ └── test_youtube.py ├── pipeline │ ├── __init__.py │ └── test_transfer.py ├── test_argparse.py ├── test_chef_integration.py ├── test_csv_metadata.py ├── test_data.py ├── test_downloader.py ├── test_exercises.py ├── test_files.py ├── test_licenses.py ├── test_links.py ├── test_pdfutils.py ├── test_requests.py ├── test_settings.py ├── test_thumbnails.py ├── test_tree.py ├── test_videos.py ├── test_youtube.py ├── test_zip.py ├── testchannels │ └── csv_channel_with_exercises │ │ ├── Channel.csv │ │ ├── Content.csv │ │ ├── ExerciseQuestions.csv │ │ ├── Exercises.csv │ │ └── channeldir │ │ ├── algebra_exercise_thumb.png │ │ ├── channel_thumbnail.jpg │ │ ├── contentnodes │ │ └── audio │ │ │ └── WZ_exercise_thumbnail.png │ │ └── exercises │ │ └── .gitkeep ├── testcontent │ ├── downloaded │ │ └── .gitkeep │ ├── exercises │ │ ├── eb3f3bf7c317408ee90995b5bcf4f3a59606aedd-data.json │ │ ├── eb3f3bf7c317408ee90995b5bcf4f3a59606aedd.svg │ │ ├── no-wifi.png │ │ ├── perseus_question_new_bar_graphs.json │ │ ├── perseus_question_x43bbec76d5f14f88_bg.json │ │ ├── perseus_question_x43bbec76d5f14f88_en.json │ │ └── test_image_base64.data │ ├── generated │ │ └── .gitkeep │ ├── samples │ │ ├── sample_doc_with_toc.pdf │ │ ├── testdocument.epub │ │ ├── testsubtitles_ar.srt │ │ ├── testsubtitles_ar.ttml │ │ ├── thumbnail.jpg │ │ └── thumbnail.png │ └── youtubecache │ │ └── .gitkeep ├── utils │ └── test_extensions.py └── vcr_config.py └── tox.ini /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | charset = utf-8 11 | end_of_line = lf 12 | 13 | [*.bat] 14 | indent_style = tab 15 | end_of_line = crlf 16 | 17 | [LICENSE] 18 | insert_final_newline = false 19 | 20 | [Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | * ricecooker version: 2 | * Python version: 3 | * Operating System: 4 | 5 | ### Description 6 | 7 | Describe what you were trying to get done. 8 | Tell us what happened, what went wrong, and what you expected to happen. 9 | 10 | ### What I Did 11 | 12 | ``` 13 | Paste the command(s) you ran and the output. 14 | If there was a crash, please include the traceback here. 15 | ``` 16 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Automatically update versions for pip 2 | 3 | version: 2 4 | updates: 5 | # Maintain dependencies for Python 6 | - package-ecosystem: "pip" 7 | directory: "/" 8 | schedule: 9 | interval: "weekly" 10 | day: "friday" 11 | time: "00:00" 12 | # Maintain dependencies for Github Actions 13 | - package-ecosystem: "github-actions" 14 | directory: "/" 15 | schedule: 16 | interval: "weekly" 17 | day: "friday" 18 | time: "00:00" 19 | groups: 20 | github: 21 | patterns: 22 | - "actions/*" 23 | -------------------------------------------------------------------------------- /.github/workflows/community-contribution-labeling.yml: -------------------------------------------------------------------------------- 1 | name: Community Contribution Label 2 | on: 3 | issues: 4 | types: [assigned, unassigned] 5 | jobs: 6 | call-label-action: 7 | uses: learningequality/.github/.github/workflows/community-contribution-label.yml@main 8 | secrets: 9 | LE_BOT_APP_ID: ${{ secrets.LE_BOT_APP_ID }} 10 | LE_BOT_PRIVATE_KEY: ${{ secrets.LE_BOT_PRIVATE_KEY }} 11 | -------------------------------------------------------------------------------- /.github/workflows/notify_team_new_comment.yml: -------------------------------------------------------------------------------- 1 | name: Send a slack notification when a contributor comments on issue 2 | on: 3 | issue_comment: 4 | types: [created] 5 | jobs: 6 | contributor_issue_comment: 7 | uses: learningequality/.github/.github/workflows/notify_team_new_comment.yml@main 8 | secrets: 9 | SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} 10 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | name: Linting 2 | on: 3 | push: 4 | branches: 5 | - develop 6 | - main 7 | pull_request: 8 | branches: 9 | - develop 10 | - main 11 | jobs: 12 | pre_job: 13 | name: Path match check 14 | runs-on: ubuntu-latest 15 | # Map a step output to a job output 16 | outputs: 17 | should_skip: ${{ steps.skip_check.outputs.should_skip }} 18 | steps: 19 | - id: skip_check 20 | uses: fkirc/skip-duplicate-actions@master 21 | with: 22 | github_token: ${{ github.token }} 23 | paths_ignore: '["**.po", "**.json"]' 24 | linting: 25 | name: All file linting 26 | needs: pre_job 27 | if: ${{ needs.pre_job.outputs.should_skip != 'true' }} 28 | runs-on: ubuntu-latest 29 | steps: 30 | - uses: actions/checkout@v4 31 | - uses: actions/setup-python@v5 32 | - uses: pre-commit/action@v3.0.1 33 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using pypa/gh-action-pypi-publish when a release is created 2 | 3 | name: Upload Python Package 4 | on: 5 | release: 6 | types: [published] 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | permissions: 11 | # IMPORTANT: this permission is mandatory for trusted publishing 12 | id-token: write 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Set up Python 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: 3.9 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install setuptools wheel pre-commit 23 | - name: Build distribution 24 | run: make dist 25 | - name: Publish package distributions to PyPI 26 | uses: pypa/gh-action-pypi-publish@release/v1 27 | -------------------------------------------------------------------------------- /.github/workflows/unassign-inactive.yaml: -------------------------------------------------------------------------------- 1 | name: "Unassign Inactive Contributors" 2 | run-name: Unassign Inactive Contributors 3 | on: 4 | schedule: 5 | - cron: "1 0 * * 1" # Every Monday at 00:01 UTC 6 | workflow_dispatch: 7 | jobs: 8 | unassign-inactive: 9 | uses: learningequality/.github/.github/workflows/unassign-inactive-issues.yaml@main 10 | secrets: 11 | LE_BOT_APP_ID: ${{ secrets.LE_BOT_APP_ID }} 12 | LE_BOT_PRIVATE_KEY: ${{ secrets.LE_BOT_PRIVATE_KEY }} 13 | SLACK_COMMUNITY_NOTIFICATIONS_WEBHOOK_URL: ${{ secrets.SLACK_COMMUNITY_NOTIFICATIONS_WEBHOOK_URL }} 14 | -------------------------------------------------------------------------------- /.github/workflows/update-pr-spreadsheet.yml: -------------------------------------------------------------------------------- 1 | name: Update community pull requests spreadsheet 2 | on: 3 | pull_request_target: 4 | types: [assigned, unassigned, opened, closed, reopened] 5 | jobs: 6 | call-update-spreadsheet: 7 | uses: learningequality/.github/.github/workflows/update-pr-spreadsheet.yml@main 8 | secrets: 9 | CONTRIBUTIONS_SPREADSHEET_ID: ${{ secrets.CONTRIBUTIONS_SPREADSHEET_ID }} 10 | CONTRIBUTIONS_SHEET_NAME: ${{ secrets.CONTRIBUTIONS_SHEET_NAME }} 11 | GH_UPLOADER_GCP_SA_CREDENTIALS: ${{ secrets.GH_UPLOADER_GCP_SA_CREDENTIALS }} 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Standard credentials (make sure never commited) 3 | credentials/studio.json 4 | credentials/studio_token.txt 5 | credentials/.studiotoken 6 | 7 | # Code examples not ready for prime time yet 8 | 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Development notebooks 19 | WIP 20 | 21 | 22 | # Distribution / packaging 23 | .Python 24 | env/ 25 | build/ 26 | develop-eggs/ 27 | dist/ 28 | downloads/ 29 | eggs/ 30 | .eggs/ 31 | lib/ 32 | lib64/ 33 | parts/ 34 | sdist/ 35 | var/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | 40 | # PyInstaller 41 | # Usually these files are written by a python script from a template 42 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 43 | *.manifest 44 | *.spec 45 | 46 | # Installer logs 47 | pip-log.txt 48 | pip-delete-this-directory.txt 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *,cover 59 | .hypothesis/ 60 | .pytest_cache 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/drafts 79 | docs/examples/drafts 80 | docs/build/ 81 | docs/_build/ 82 | docs/ricecooker.classes.rst 83 | docs/ricecooker.managers.rst 84 | docs/ricecooker.utils.rst 85 | 86 | # Links checker cache 87 | .brokdb 88 | 89 | # PyBuilder 90 | target/ 91 | 92 | # IPython Notebook 93 | .ipynb_checkpoints 94 | 95 | # pyenv 96 | .python-version 97 | 98 | # celery beat schedule file 99 | celerybeat-schedule 100 | 101 | # dotenv 102 | .env 103 | 104 | # virtualenv 105 | venv/ 106 | venv3/ 107 | ENV/ 108 | 109 | # Spyder project settings 110 | .spyderproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | docs/out/ 115 | docs/_build 116 | 117 | storage/ 118 | restore/ 119 | 120 | # nodejs modules 121 | node_modules/ 122 | 123 | sushi_chefs/ 124 | tests/testcontent/downloaded/ 125 | tests/testcontent/generated/ 126 | tests/testcontent/youtubecache/ 127 | video_cache_py3.sqlite 128 | .webcache 129 | .ricecookerfilecache 130 | 131 | # IDE project dirs 132 | .idea/ 133 | .vscode/ 134 | .vim/ 135 | 136 | cache.sqlite 137 | 138 | chefdata/ 139 | audio_cache.sqlite 140 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/python/black 3 | rev: 21.12b0 4 | hooks: 5 | - id: black 6 | types_or: [python, pyi] 7 | additional_dependencies: ['click==8.0.4'] 8 | - repo: https://github.com/pycqa/flake8 9 | rev: 7.1.1 10 | hooks: 11 | - id: flake8 12 | exclude: (?x)(.*examples.*) 13 | - repo: https://github.com/pre-commit/pre-commit-hooks 14 | rev: v4.1.0 15 | hooks: 16 | - id: trailing-whitespace 17 | - id: check-yaml 18 | - id: check-added-large-files 19 | exclude: '^tests/cassettes' 20 | - id: debug-statements 21 | - id: end-of-file-fixer 22 | exclude: '^.+?\.json$' 23 | - repo: https://github.com/asottile/reorder_python_imports 24 | rev: v2.6.0 25 | hooks: 26 | - id: reorder-python-imports 27 | - repo: https://github.com/google/yamlfmt 28 | rev: v0.14.0 29 | hooks: 30 | - id: yamlfmt 31 | exclude: '^tests/cassettes' 32 | - repo: https://github.com/rhysd/actionlint 33 | rev: v1.7.7 34 | hooks: 35 | - id: actionlint 36 | additional_dependencies: 37 | # actionlint has a shellcheck integration which extracts shell scripts in `run:` steps from GitHub Actions 38 | # and checks these with shellcheck. This is arguably its most useful feature, 39 | # but the integration only works if shellcheck is installed 40 | - "github.com/wasilibs/go-shellcheck/cmd/shellcheck@v0.10.0" 41 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | formats: all 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.11" 7 | jobs: 8 | pre_install: 9 | - pip install -e . 10 | sphinx: 11 | configuration: docs/conf.py 12 | python: 13 | install: 14 | - requirements: docs/requirements.txt 15 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Credits 3 | ======= 4 | 5 | * Jordan Yoshihara 6 | * Aron Asor 7 | * Jamie Alexandre 8 | * Benjamin Bach 9 | * Ivan Savov 10 | * David Hu 11 | * Kevin Ollivier 12 | * Alejandro Martinez Romero 13 | * Blaine Jester 14 | 15 | 16 | .. TODOC: use gource of ricecooker repo & embed video here 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016, 2017, 2018 Learning Equality. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | 2 | include AUTHORS.rst 3 | include CONTRIBUTING.rst 4 | include HISTORY.rst 5 | include LICENSE 6 | include README.md 7 | 8 | recursive-include tests * 9 | recursive-exclude tests/testcontent * 10 | recursive-exclude * __pycache__ 11 | recursive-exclude * *.py[co] 12 | 13 | recursive-include docs *.md *.rst conf.py Makefile make.bat *.jpg *.png *.gif 14 | 15 | recursive-exclude docs/tutorial/storage * 16 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help 2 | .DEFAULT_GOAL := help 3 | define BROWSER_PYSCRIPT 4 | import os, webbrowser, sys 5 | try: 6 | from urllib import pathname2url 7 | except: 8 | from urllib.request import pathname2url 9 | 10 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 11 | endef 12 | export BROWSER_PYSCRIPT 13 | 14 | define PRINT_HELP_PYSCRIPT 15 | import re, sys 16 | 17 | for line in sys.stdin: 18 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 19 | if match: 20 | target, help = match.groups() 21 | print("%-20s %s" % (target, help)) 22 | endef 23 | export PRINT_HELP_PYSCRIPT 24 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 25 | 26 | help: 27 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 28 | 29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 30 | 31 | 32 | clean-build: ## remove build artifacts 33 | rm -fr build/ 34 | rm -fr dist/ 35 | rm -fr .eggs/ 36 | find . -name '*.egg-info' -exec rm -fr {} + 37 | find . -name '*.egg' -exec rm -f {} + 38 | 39 | clean-pyc: ## remove Python file artifacts 40 | find . -name '*.pyc' -exec rm -f {} + 41 | find . -name '*.pyo' -exec rm -f {} + 42 | find . -name '*~' -exec rm -f {} + 43 | find . -name '__pycache__' -exec rm -fr {} + 44 | 45 | clean-test: ## remove test and coverage artifacts 46 | rm -fr .tox/ 47 | rm -f .coverage 48 | rm -fr htmlcov/ 49 | rm -rf tests/testcontent/downloaded/* 50 | rm -rf tests/testcontent/generated/* 51 | 52 | lint: ## check style with flake8 53 | flake8 ricecooker tests 54 | 55 | test: clean-test ## run tests quickly with the default Python 56 | pytest 57 | 58 | 59 | test-all: clean-test ## run tests on every Python version with tox 60 | tox 61 | 62 | integration-test: 63 | echo "Testing against hotfixes" 64 | CONTENTWORKSHOP_URL=https://hotfixes.studio.learningequality.org python tests/test_chef_integration.py 65 | echo "Testing against unstable" 66 | CONTENTWORKSHOP_URL=https://unstable.studio.learningequality.org python tests/test_chef_integration.py 67 | echo "Testing against production" 68 | CONTENTWORKSHOP_URL=https://studio.learningequality.org python tests/test_chef_integration.py 69 | 70 | coverage: ## check code coverage quickly with the default Python 71 | pip install coverage pytest 72 | coverage run --source ricecooker -m pytest 73 | coverage report -m 74 | coverage html 75 | $(BROWSER) htmlcov/index.html 76 | 77 | docsclean: 78 | $(MAKE) -C docs clean 79 | rm -f docs/_build/* 80 | 81 | docs: ## generate Sphinx HTML documentation 82 | pip install -r docs/requirements.txt 83 | $(MAKE) -C docs clean 84 | $(MAKE) -C docs html 85 | # $(BROWSER) docs/build/html/index.html 86 | 87 | latexdocs: 88 | pip install -r docs/requirements.txt 89 | $(MAKE) -C docs clean 90 | $(MAKE) -C docs latex 91 | 92 | servedocs: docs ## compile the docs watching for changes 93 | watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . 94 | 95 | dist: clean 96 | pip install setuptools wheel 97 | python setup.py sdist bdist_wheel 98 | 99 | release: dist ## package and upload a release 100 | pip install twine 101 | twine upload dist/* 102 | 103 | install: clean ## install the package to the active Python's site-packages 104 | python setup.py install 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ricecooker 2 | ========== 3 | [![PyPI pyversions](https://img.shields.io/pypi/pyversions/ricecooker.svg)](https://pypi.python.org/pypi/ricecooker/) 4 | [![build](https://github.com/learningequality/ricecooker/actions/workflows/pythontest.yml/badge.svg?branch=master)](https://github.com/learningequality/ricecooker/actions) 5 | [![docs](https://readthedocs.org/projects/ricecooker/badge/?version=latest&style=flat)](https://ricecooker.readthedocs.io/) 6 | 7 | 8 | The `ricecooker` library is a framework for automating the conversion of educational content into 9 | Kolibri content channels and uploading them to [Kolibri Studio](https://studio.learningequality.org/), 10 | which is the central content server for [Kolibri](http://learningequality.org/kolibri/). 11 | 12 | 13 | ## [📚 Ricecooker docs](https://ricecooker.readthedocs.io/) 14 | 15 | Visit the documentation site at [📚 ricecooker.readthedocs.io](https://ricecooker.readthedocs.io/) 16 | for the full details about [installation](https://ricecooker.readthedocs.io/en/latest/installation.html), 17 | [getting started](https://ricecooker.readthedocs.io/en/latest/tutorial/gettingstarted.html), 18 | [API reference](https://ricecooker.readthedocs.io/en/latest/index_api_reference.html), 19 | and [code examples](https://ricecooker.readthedocs.io/en/latest/examples/index.html). 20 | 21 | 22 | 23 | ## Overview of Kolibri content 24 | 25 | `ricecooker` is used to take openly licensed educational content available on the 26 | web and convert it into an offline-friendly package that can be imported into Kolibri. 27 | 28 | The basic process of getting new content into Kolibri is as follows: 29 | 30 | - **UPLOAD** your content to Kolibri Studio either manually through the Kolibri Studio 31 | web interface or programmatically using a `ricecooker`-based content integration script. 32 | - **PUBLISH** the channel on Kolibri Studio to make it accessible for use in Kolibri. 33 | - **IMPORT** the the channel into Kolibri using the channel token displayed in 34 | Kolibri Studio after the PUBLISH step is done. 35 | 36 | The diagram below illustrates how content flows within the Kolibri ecosystem 37 | and highlights the part which is covered by the `ricecooker` framework (bottom left). 38 | 39 | ![Overview of steps for integrating external content sources for use in the Kolibri Learning Platform](docs/figures/content_pipeline_diagram_with_highlight.png) 40 | *External content sources (left) are first uploaded to [Kolibri Studio](https://studio.learningequality.org/) (middle), so they can be used in the [Kolibri Learning Platform](http://learningequality.org/kolibri/) (right).* 41 | 42 | 43 | 44 | 45 | ##### Further reading 46 | The [Ricecooker docs](https://ricecooker.readthedocs.io/) website is the best 47 | place to learn about writing automated content integration scripts. 48 | 49 | Here are some links to other documents and guides you can read to learn about 50 | the other parts of the Kolibri content platform: 51 | 52 | - The [Kolibri Content Integration Guide](https://learningequality.org/r/integration-guide) 53 | is a comprehensive guide to the decisions, processes, and tools for integrating 54 | external content sources for use in the Kolibri Learning Platform. 55 | - Read the [Kolibri Studio docs](http://kolibri-studio.readthedocs.io/en/latest/) 56 | to learn more about the Kolibri Studio features 57 | - Read the [Kolibri docs](http://kolibri.readthedocs.io/en/latest/) to learn 58 | how to install Kolibri on your machine (useful for testing channels) 59 | -------------------------------------------------------------------------------- /docs/404.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | Page not found 4 | ============== 5 | 6 | It seems you are searching for a topic that has been moved elsewhere in our documentation. Please try the following to find what you were looking for: 7 | 8 | * Browse the table of content in the sidebar 9 | * Use the search box 10 | 11 | 12 | We apologize for the inconvenience! 13 | -------------------------------------------------------------------------------- /docs/README.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. Note this page exists for backward compatibility (since we sent this link) 4 | to partners, we don't want them to hit a 404. 5 | 6 | 7 | Ricecooker 8 | ========== 9 | 10 | The following links will get you started with content integration process in no time! 11 | 12 | - `Install `_ Python, the ``ricecooker`` package, and system prerequisites (5–20 mins) 13 | - `Getting started `_: upload your first channel to Kolibri Studio and import it in Kolibri (10 mins) 14 | - For more info see the `ricecooker docs main page <../index.html>`_ 📚. 15 | 16 | Welcome to the team! 17 | 18 | 19 | License 20 | ------- 21 | 22 | .. image:: https://i.creativecommons.org/l/by-sa/4.0/88x31.png 23 | :alt: Creative Commons License 24 | 25 | This work is licensed under a `Creative Commons Attribution-ShareAlike 4.0 International License `__ 26 | -------------------------------------------------------------------------------- /docs/_templates/footer.html: -------------------------------------------------------------------------------- 1 | {% extends "!footer.html" %} 2 | {%- block extrafooter %} 3 | 4 | © {% trans %}Copyright{% endtrans %} {{ copyright }} 5 |
6 | 7 | Licensed under a Creative Commons Attribution-ShareAlike 4.0 International License. 8 | 9 | {{ super() }} 10 | {% endblock %} 11 | -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | {% block footer %} {{ super() }} 3 | 4 | 63 | {% endblock %} 64 | -------------------------------------------------------------------------------- /docs/community/authors.rst: -------------------------------------------------------------------------------- 1 | ../../AUTHORS.rst 2 | -------------------------------------------------------------------------------- /docs/community/contributing.md: -------------------------------------------------------------------------------- 1 | ../../CONTRIBUTING.md 2 | -------------------------------------------------------------------------------- /docs/community/index.rst: -------------------------------------------------------------------------------- 1 | Community 2 | ========= 3 | Learn how to contribute to the project and how you can become part of the community 4 | of content developers working to integrate content into the Kolibri platform, 5 | and more broadly into all offline learning tools. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | contributing 11 | authors 12 | ../history 13 | 14 | .. TODOC: volunteers progam, step-by-step guide to small contribution, etc. 15 | -------------------------------------------------------------------------------- /docs/concepts/content_workflows.md: -------------------------------------------------------------------------------- 1 | Content integration methods 2 | =========================== 3 | 4 | There are two methods that you can use to create Kolibri channels: 5 | 6 | * **Manual content upload**: 7 | This method is suitable for content that is saved on your local computer such as files or folders. 8 | You can directly upload your content through the Kolibri Studio web interface. 9 | This method is appropriate for small and medium content sets. 10 | See the [Kolibri Studio User Guide](https://kolibri-studio.readthedocs.io/en/latest/) for more information. 11 | 12 | * **Uploading content using a content integration script**: 13 | You can use a content integration script (a.k.a. sushichef script) to 14 | integrate content from websites, content repositories, APIs, or other external sources. 15 | A content integration script is a Python program. 16 | 17 | 18 | More information about each of these methods provided below. 19 | 20 | 21 | ## Manual content upload 22 | You can use the [Kolibri Studio](https://studio.learningequality.org/) web interface 23 | to upload various content types and organize them into channels. Kolibri Studio 24 | allows you to explore pre-organized libraries of open educational resources, 25 | and reuse them in your channels. You can also add tags, re-order, re-mix content, 26 | and create exercises to support student's learning process. 27 | 28 | To learn more about Studio, we recommend reading the following pages in the 29 | [Kolibri Studio User Guide](https://kolibri-studio.readthedocs.io/en/latest/): 30 | - [Accessing Studio](https://kolibri-studio.readthedocs.io/en/latest/access_studio.html) 31 | - [Working with channels](https://kolibri-studio.readthedocs.io/en/latest/working_channels.html) 32 | - [Adding content to channels](https://kolibri-studio.readthedocs.io/en/latest/add_content.html) 33 | 34 | When creating large channels (100+ content items) or channels that need to be 35 | updated regularly, you should consider using a content integration script, 36 | as described below. 37 | 38 | 39 | 40 | 41 | ## Content integration scripts 42 | 43 | The [`ricecooker`](https://github.com/learningequality/ricecooker) framework is a 44 | tool that programmers can use to upload content to Kolibri Studio in an automated 45 | fashion. We refer to these import scripts as **sushi chefs**, because their job 46 | is to chop-up the source material (e.g. an educational website) and package the 47 | content items into tasty morsels (content items) with all the associated metadata. 48 | 49 | Using the bulk import option requires the a content developer (sushi chef author) 50 | to prepare the content, content metadata, and run the chef script to perform the 51 | upload to Kolibri Studio. 52 | 53 | Educators and content specialists can assist the developers by preparing a **spec sheet** 54 | for the content source that provides detailed guidance for how content should be 55 | structured and organized within the channel. The content specialist also plays a role 56 | during the channel [review process](reviewing_channels.md). 57 | 58 | 59 | 60 | The following alternative options are available for specifying the metadata for 61 | content nodes that can be used in special circumstances. 62 | 63 | ### CSV metadata workflow 64 | In addition to the web interface and the Python interface (`ricecooker`), there 65 | exists an option for creating Kolibri channels by: 66 | - Organizing content items (documents, videos, mp3 files) into a folder hierarchy on the local file system 67 | - Specifying metadata in the form of CSV files created using Excel 68 | 69 | The CSV-based workflow is a good fit for non-technical users since it doesn't 70 | require writing any code, but instead can use Excel to provide all the metadata. 71 | 72 | - [CSV-based workflow README](https://github.com/learningequality/sample-channels/tree/master/channels/csv_channel) 73 | - [Example content folder](https://github.com/learningequality/sample-channels/tree/master/channels/csv_exercises/content) 74 | - [Example Channel.csv metadata file](https://github.com/learningequality/sample-channels/blob/master/channels/csv_channel/content/Channel.csv) 75 | - [Example Content.csv metadata file](https://github.com/learningequality/sample-channels/blob/master/channels/csv_channel/content/Content.csv) 76 | - [CSV-based exercises info](https://github.com/learningequality/sample-channels/tree/master/channels/csv_exercises) 77 | 78 | Organizing the content into folders and creating the CSV metadata files is most 79 | of the work, and can be done by non-programmers. 80 | The generic sushi chef script (`LineCook`) is then used to upload the channel. 81 | -------------------------------------------------------------------------------- /docs/concepts/index.rst: -------------------------------------------------------------------------------- 1 | Concepts 2 | ======== 3 | 4 | The purpose of this page is to help you understand how content integration work 5 | fits more broadly within the Kolibri ecosystem. 6 | 7 | The links below establish the shared vocabulary to facilitate communication between 8 | content partners, the Learning Equality team, Kolibri users, and other stakeholders. 9 | 10 | .. toctree:: 11 | :maxdepth: 1 12 | 13 | introduction 14 | terminology 15 | content_workflows 16 | developer_workflows 17 | reviewing_channels 18 | -------------------------------------------------------------------------------- /docs/concepts/introduction.md: -------------------------------------------------------------------------------- 1 | Kolibri content ecosystem overview 2 | ================================== 3 | 4 | Educational content in the Kolibri platform is organized into **content channels**. 5 | The `ricecooker` framework is used for creating content channels and uploading them 6 | to [Kolibri Studio](https://studio.learningequality.org/), which is the central 7 | content server that [Kolibri](https://learningequality.org/kolibri/) applications 8 | talk to when importing their content. 9 | 10 | Content flow within the Kolibri ecosystem is pictured below. 11 | 12 | ![The Kolibri Content Pipeline](../figures/content_pipeline_diagram.png) 13 | 14 | This `ricecooker` framework is the main tool used to facilitate **Integration Method 2**. 15 | 16 | 17 | 18 | Kolibri channels 19 | ---------------- 20 | A Kolibri channel is the combination of a topic tree (a nested folder structure) 21 | and number of self-contained "content items" packaged for offline use and distribution. 22 | Each content item within the channel is represented as a content node with one 23 | or more files associated with it. In summary, a channel is a nested structure of 24 | `TopicNodes` (folders) that contain `ContentNode` objects similar to how files 25 | are organized into folders on computers. 26 | 27 | The Kolibri channel is the fundamental structure common to all parts of the Kolibri ecosystem: 28 | the Kolibri Learning Platform is where Kolibri channels are used by learners and teachers, 29 | Kolibri Studio is the editor for Kolibri Channels (think five Rs), 30 | and Ricecooker scripts are used for content integrations that pull in OER from 31 | external sources, package them for offline use, and upload them to Kolibri Studio. 32 | 33 | 34 | Supported content kinds 35 | ----------------------- 36 | Kolibri channels are tree-like structures that consist of the following types of nodes: 37 | 38 | * **Topic nodes** (folders): the nested folders structure is the is main way of 39 | representing structured content in Kolibri. Depending on the particular channel, 40 | a topic node could be a language, a subject, a course, a unit, a module, a section, 41 | a lesson, or any other structural element. Rather than impose a particular fixed structure, 42 | we let educators decide the folder structure that is best suited for the learners needs. 43 | 44 | * **Content nodes**: 45 | 46 | - Document (either an `epub` or a `pdf` file) 47 | - Audio (`mp3` files of audio lessons, audiobooks, podcasts, radio shows, etc.) 48 | - Video (`mp4` files with `h264` video codec and `aac` audio codec) 49 | - HTML5App (`zip` files containing web content like HTML, JavaScript, css and images) 50 | - H5PApp (self-contained `h5p` files) 51 | - Slideshow (a sequence of `jpg` and `png` slide images) 52 | - Exercises containing questions like multiple choice, multiple selection, and numeric inputs 53 | 54 | 55 | Further reading 56 | --------------- 57 | - [Kolibri channel](https://kolibri.readthedocs.io/en/latest/manage/resources.html#channels-and-resources) 58 | as explained in the Kolibri documentation. 59 | - [Kolibri Studio User Guide](https://kolibri-studio.readthedocs.io/en/latest/index.html) 60 | -------------------------------------------------------------------------------- /docs/concepts/reviewing_channels.md: -------------------------------------------------------------------------------- 1 | Reviewing Kolibri content channels 2 | ================================== 3 | Every content channel on the Kolibri platform benefits from a the review process 4 | that ensures the content structure, metadata, and functionality is up to standard. 5 | This is broadly referred to as "channel review," "providing feedback," or "QA." 6 | Everyone on the LE team is a potential channel reviewer, and external partners 7 | can also be asked to review channels when they have capacity. 8 | 9 | 10 | 11 | Issue tracker 12 | ------------- 13 | Channel reviewers can use the "Issue tracker" table to report problems so that 14 | developers responsible for creating the channel can address them. 15 | 16 | ### Issue tracker columns 17 | - `Issue ID`: internal numeric identifier 18 | (or `github:nn` for two-way-synced issues with the chef's github repo) 19 | - `Type` (multi select): what type of issue is this (see full list of options below) 20 | - `Severity` (Blocker || Nice to have): how bad is the issue 21 | - `URL`: A link to studio, a demo server, or the source website where the issue is visible 22 | - `Screenshots` (files): screenshot that shows the issue in action 23 | - `Issue description` (text): provide detailed description of what the issue is, how to reproduce, and any additional info (e.g. copy-paste of errors from the JavaScript console) 24 | - `Possible fixes` (text): provide suggestions (technical or not) for how issue could be fixed and ideas for workarounds 25 | - `Assigned to` (notion user): track the person that is supposed to fix this issue 26 | - `Status` (Not started||In progress||Fixed): track progress on issue fix 27 | - `Created`: record the date when the issue was added 28 | - `Created by`: record who filed the issue 29 | 30 | #### Issue types 31 | - `Missing content`: some content from the source was not imported 32 | - `Structure`: problem with the channel structure 33 | - `Title`: problem with titles, e.g. titles that are too long or not informative 34 | - `Description`: use to flag description problems (non-informative or repeating junk text) 35 | - `Metadata`: problem with metadata associated with nodes (language, licensing info, author, role visibility, tags) 36 | - `Thumbnails`: flag broken or missing thumbnails on the channel, topics, or content nodes 37 | - `Display issue`: the content doesn't look right (HTML/CSS issues) or doesn't work as expected (JavaScript issues) 38 | - `Learning UX`: any problem that might interfere with learning user experience 39 | - `Video compression`: if videos are not compressed enough (files too large) 40 | or alternatively too compressed (cannot read text) 41 | - `Bulk corrections`: flag issues that might require bulk metadata edits on numerous content nodes 42 | - `Translation`: content files or metadata are partially or completely in the wrong language 43 | - `Enhancement`: use to keep track of possible enhancements or additions that could be made to improve coach or learner experience 44 | 45 | 46 | #### Issue severity 47 | - `Blocker`: this issue must be fixed before the channel can go into QA 48 | - `Nice to have`: non-blocking issues like corrections, enhancements, 49 | and minor learning UX problems 50 | 51 | 52 | 53 | Who can be a channel reviewer? 54 | ------------------------------ 55 | You can. Whenever you need a distraction, take 20 minutes and place yourself in 56 | the learner's shoes and go explore the channel on the demo server link provided 57 | on the notion card. If you notice any issues while browsing, add them to the 58 | Issue tracker table. That's it. Learn something today. 59 | -------------------------------------------------------------------------------- /docs/concepts/terminology.md: -------------------------------------------------------------------------------- 1 | Terminology 2 | =========== 3 | 4 | This page lists key concepts and technical terminology used as part of the 5 | content integration work within Learning Equality. 6 | 7 | 8 | Content Pipeline 9 | ---------------- 10 | The combination of software tools and procedures used to convert content 11 | from an external content source to becoming a Kolibri Channel available 12 | for use in the Kolibri Learning Platform. The Kolibri Content Pipeline is 13 | a collaborative effort between educational experts and software developers. 14 | 15 | 16 | 17 | Channel Spec 18 | ------------ 19 | A content specification document, or Channel Spec, is a blueprint document 20 | that specifies the structure of the Kolibri channel that is to be created. 21 | 22 | Channel Specs are an important aspect of the content integration process for two reasons: 23 | 24 | 1. It specifies what needs to be done. 25 | The channel spec establishes an agreement between the curriculum specialist 26 | and the developer who will be writing the content integration script. 27 | 28 | 2. It serves to define when the work is done. 29 | Used as part of the [review process](reviewing_channels.md) to know when the 30 | channel is "Spec Compliant," i.e. the channel structure in Kolibri matches the blueprint. 31 | 32 | A Channel Spec document includes the following information: 33 | 34 | - Channel Title: usually of the form `{Source Name} ({lang})` where `{Source Name}` 35 | is chosen to be short and descriptive, and `{lang}` is included in the title 36 | to make it easy to search for content in this language. 37 | - Channel Description: a description (up to 400 characters) of the channel and its contents. 38 | - Languages: notes about content language, and special handling for multilingual content, subtitles, or missing translations 39 | - Files Types: info about what content kinds and file types to look for 40 | - Channel Structure: a specification of the desired topic structure for the channel. 41 | This is the key element in the Channel Spec and often requires domain expertise 42 | to take into account the needs of the teachers and learners who will be accessing this content. 43 | - Links and sample content 44 | - Credentials: info about how to access the content (e.g. info about API access) 45 | - Technical notes: The Channel Spec can include guidance about technical aspects 46 | like content transformations (for example, the need to compress the videos so that they take up less space). 47 | 48 | For more info about each of these aspects, see the section "Creating a Content Channel Spec" 49 | in the [Kolibri Content Integration Guide](https://learningequality.org/r/integration-guide). 50 | 51 | 52 | Content Integration Script (aka SushiChef) 53 | ------------------------------------------ 54 | The content integration scripts that use the `ricecooker` library to 55 | generate Kolibri Channels are commonly referred to as **SushiChef** 56 | scripts. The responsibility of a `SushiChef` script is to download the source 57 | content, perform any necessary format or structure conversions to create 58 | a content tree viewable in Kolibri, then to upload the output of this 59 | process to Kolibri Studio for review and publishing. 60 | 61 | Conceptually, `SushiChef` scripts are very similar to web scrapers, 62 | but with specialized functions for optimizing the content for Kolibri's 63 | data structures and capabilities. 64 | -------------------------------------------------------------------------------- /docs/csv_metadata/README.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. This is a added to avoid 404s. 4 | 5 | See the `index page `__ to learn about CSV metadata workflow. 6 | -------------------------------------------------------------------------------- /docs/csv_metadata/csv_workflow.md: -------------------------------------------------------------------------------- 1 | CSV Metadata Workflow 2 | ===================== 3 | 4 | It is possible to create Kolibri channels by: 5 | - Organizing content items (documents, videos, mp3 files) into a folder hierarchy 6 | on the local file system 7 | - Specifying metadata in the form of CSV files 8 | 9 | 10 | The CSV-based workflow is a good fit for non-technical users since it doesn't 11 | require writing any code, but instead can use the Excel to provide all the metadata. 12 | 13 | - [CSV-based workflow README](https://github.com/learningequality/sample-channels/tree/master/channels/csv_channel) 14 | - [Example content folder](https://github.com/learningequality/sample-channels/tree/master/channels/csv_exercises/content) 15 | - [Example Channel.csv metadata file](https://github.com/learningequality/sample-channels/blob/master/channels/csv_channel/content/Channel.csv) 16 | - [Example Content.csv metadata file](https://github.com/learningequality/sample-channels/blob/master/channels/csv_channel/content/Content.csv) 17 | 18 | Organizing the content into folders and creating the CSV metadata files is most 19 | of the work, and can be done by non-programmers. 20 | The generic sushi chef script (`LineCook`) is then used to upload the channel. 21 | 22 | 23 | CSV Exercises 24 | ------------- 25 | You can also use the CSV metadata workflow to upload simple exercises to Kolibri Studio. 26 | See [this doc](./csv_exercises.md) for the technical details about creating exercises. 27 | -------------------------------------------------------------------------------- /docs/csv_metadata/index.rst: -------------------------------------------------------------------------------- 1 | Spreadsheet Metadata Workflow 2 | ============================= 3 | 4 | It is possible to create Kolibri channels by specifying all the channel metadata 5 | in the form of spreadsheet or CSV files instead of through Python code. 6 | 7 | 8 | .. toctree:: 9 | :maxdepth: 1 10 | 11 | csv_workflow 12 | csv_exercises 13 | -------------------------------------------------------------------------------- /docs/developer/corrections.md: -------------------------------------------------------------------------------- 1 | Studio bulk corrections 2 | ======================= 3 | The command line script `corrections` allows to perform bulk corrections of 4 | titles, descriptions, and other attributes for the content nodes of a channel. 5 | 6 | 7 | Use cases: 8 | - Bulk modify titles and descriptions (e.g. to fix typos) 9 | - Translate titles and/or descriptions (for sources with missing structure translations) 10 | - Enhance content by adding description (case by case detail work done during QA) 11 | - Add missing metadata like author, copyright holder, and tags to content nodes 12 | - Perform basic structural edits to channel (remove unwanted topics and content nodes) 13 | 14 | Not use cases: 15 | - Modify a few node attributes (better do manually through the Studio web interface) 16 | - Structural changes (the corrections workflow does not support node moves) 17 | - Global changes (if the same modification must be performed on all nodes in the 18 | channel, it would be better to implement these changes during cheffing) 19 | 20 | 21 | Credentials 22 | ----------- 23 | In order to use the corrections workflow as part of a chef script, you need to 24 | create the file `credentials/studio.json` in the chef repo that contains the 25 | following information: 26 | 27 | { 28 | "token": "YOURTOKENHERE9139139f3a23232fefefefefefe", 29 | "username": "your.name@yourdomain.org", 30 | "password": "yourstudiopassword", 31 | "studio_url": "https://studio.learningequality.org" 32 | } 33 | 34 | These credentials will be used to make the necessary Studio API calls. Make sure 35 | you have edit rights for this channel. 36 | 37 | 38 | Corrections workflow 39 | -------------------- 40 | The starting point is an existing channel available on Studio, which we will 41 | identify through its Channel ID, denoted `` in code examples below. 42 | 43 | ### Step 1: Export the channel metadata to CSV 44 | Export the complete metadata of the source channel as a local `.csv` file using: 45 | 46 | corrections export 47 | 48 | This will create the file `corrections-export.csv` which can be opened with a 49 | spreadsheet program (e.g. LibreOffice). In order to allow for collaboration, 50 | the content of the spreadsheet must be copied to a shared google sheet with 51 | permissions set to allow external edits. 52 | 53 | 54 | ### Step 2: Edit metadata 55 | In this step the content expert (internal or external) edits the metadata for 56 | each content node in the shared google sheet. 57 | The possible actions (first column) to apply to each row are as follows: 58 | - `modify`: to apply metadata modifications to the topic or content node 59 | - `delete`: to remove the topic or content node from the channel 60 | - Leaving the Action column blank will leave the content node unchanged 61 | 62 | All rows with the `modify` keyword in the Action column will undergo metadata 63 | modifications according to the text specified in the `New *` columns of the sheet. 64 | 65 | For example, to correct typos in the title and description of a content node you must: 66 | - Mark the row with Action=`modify` (first column) 67 | - Add the desired title text in the column `New Title` 68 | - Add the desired description text in the column `New Description` 69 | 70 | Note that not all metadata columns need to be specified. The choice of fields 71 | that will be edited during the `modify` operation will be selected in the next step. 72 | 73 | 74 | ### Step 3: Apply the corrections from a google sheet 75 | Once the google sheet has been edited to contain all desired changes in the 76 | `New *` columns, the next step is apply the corrections: 77 | 78 | corrections apply --gsheet_id='' --gid= 79 | 80 | where `` is the google sheets document identifier (take from the URL) 81 | and `` is identifier of the particular sheet within the spreadsheet 82 | document that contains the corrections (usually `=0`). 83 | 84 | The attributes that will be edited during the `modify` operation is specified 85 | using the `--modifyattrs` command line argument. For example to apply modifications 86 | only to the `title` and `description` attributes use the following command: 87 | 88 | corrections apply --gsheet_id='' --gid= --modifyattrs='title,description' 89 | 90 | Using the above command will apply only the modifications only from the 91 | `New Title` and `New Description` columns and ignore modifications to copyright holder, 92 | author, and tags attributes. 93 | The default settings is `--modifyattrs=title,description,author,copyright_holder`. 94 | 95 | 96 | Status 97 | ------ 98 | Note the corrections workflows is considered "experimental" and to be used only 99 | when no other options are viable (too many edits to do manually through the Studio 100 | web interface). 101 | -------------------------------------------------------------------------------- /docs/developer/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Developer docs 3 | ============== 4 | To learn about the inner workings of the ``ricecooker`` library, consult the following pages: 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | ../chefops 10 | sushops 11 | kolibripreview 12 | ids 13 | uploadprocess 14 | design_cli 15 | corrections 16 | -------------------------------------------------------------------------------- /docs/developer/sushops.md: -------------------------------------------------------------------------------- 1 | SushOps 2 | ======= 3 | SushOps engineers (also called ETL engineers) are responsible for making sure 4 | the overall content pipeline runs smoothly. Assuming the [chefops](../chefops) 5 | is done right, running the chef script should be as simple as running a single command. 6 | SushOps engineers need to make sure not only that chef is running correctly, 7 | but also monitor content in Kolibri Studio, in downstream remixed channels, 8 | and in Kolibri installations. 9 | 10 | SushOps is an internal role to Learning Equality but we'll document the responsibilities 11 | here for convenience, since this role is closely related to the `ricecooker` library. 12 | 13 | 14 | 15 | Project management and support 16 | ------------------------------ 17 | SushOps manage and support developers working on new chefs scripts, by reviewing 18 | spec sheets, writing technical specs, crating necessary git repos, reviewing 19 | pull requests, chefops, and participating in QA. 20 | 21 | 22 | Cheffing servers 23 | ---------------- 24 | Chef scripts run on various cheffing servers, equipped with appropriate storage 25 | space and processing power (if needed for video transcoding). Currently we have: 26 | - CPU-intensive chefs running on `vader` 27 | - various other chefs running on partner orgs infrastructure 28 | 29 | ### Cheffing servers conventions 30 | - Put all the chef repos in `/data` (usually a multi-terabyte volume), e.g., 31 | use the directory `/data/sushi-chef-{{nickname}}/` for the `nickcname` chef. 32 | - Use the name `sushichef.py` for the chef script 33 | - Document all the instructions and options needed to run the chef script in 34 | the chef's `README.md` 35 | - Use the directory `/data/sushi-chef-{{nickname}}/chefdata/tmp/` to store tmp 36 | files to avoid cluttering the global `/tmp` directory. 37 | - For long running chefs, use the command `nohup &` to run the chef 38 | so you can close the ssh session (hangup) without the process being terminated. 39 | 40 | 41 | 42 | SushOps tooling and automation 43 | ------------------------------ 44 | Some of the more repetitive system administration tasks have been automated using `fab` commands: 45 | 46 | fab -R vader setup_chef:nickname # clones the nickname repo and installs requirements 47 | fab -R vader update:nickname # git fetch and git reset --hard to get latest chef code 48 | fab -R vader run_chef:nickname # runs the chef 49 | 50 | See the [content-automation-scripts](https://github.com/learningequality/content-automation-scripts) 51 | project for more details. 52 | -------------------------------------------------------------------------------- /docs/examples/detokenify.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl -pi 2 | # multi-line in place substitute 3 | use strict; 4 | use warnings; 5 | 6 | BEGIN {undef $/;} 7 | 8 | # remove access tokens in case left by mistake 9 | # #################################################################### 10 | s/a5c5fb[\da-f]{34}/YOURTOKENHERE9139139f3a23232/g; 11 | s/70aec3[\da-f]{34}/YOURTOKENHERE9139139f3a23232/g; 12 | s/563554[\da-f]{34}/YOURTOKENHERE9139139f3a23232/g; 13 | -------------------------------------------------------------------------------- /docs/examples/index.rst: -------------------------------------------------------------------------------- 1 | Examples 2 | ======== 3 | 4 | Below are some examples that demonstrate certain aspects of the content integration 5 | process that require careful consideration and are best explained in code: 6 | 7 | 8 | .. toctree:: 9 | :titlesonly: 10 | 11 | Learn how to work with language codes 12 | How to create exercises and questions 13 | Document conversions 14 | Step-by-step tutorial 15 | Wikipedia scraping example 16 | Kitchen sink example that includes all content kinds 17 | 18 | 19 | Jupyter notebooks 20 | ----------------- 21 | Jypyter notebooks are a very powerful tool for interactive programming. 22 | You type in commands into an online shell, and you immediately see the results. 23 | 24 | To install jupyter notebook on your machine, you run: 25 | 26 | .. code:: 27 | 28 | pip install jupyter 29 | 30 | then to start the jupyter notebook server, run 31 | 32 | .. code:: 33 | 34 | jupyter notebook 35 | 36 | If you then navigate to the directory `docs/examples/` in the ricecooker source 37 | code repo, you'll find the same examples described above in the form of runnable 38 | notebooks that will allow you to experiment and learn hands-on. 39 | 40 | 41 | You'll need to press CTRL+C in the terminal to stop the jupyter notebook server, 42 | or use the Shutdown button in the web interface. 43 | 44 | Watch the beginning of this `Video tutorial `__ 45 | to learn how to use the Jypyter notebook environment for interactively coding parts of the chef logic. 46 | 47 | .. raw:: html 48 | 49 | 50 | 51 | 52 |
 
53 | 54 | 55 | Advanced examples 56 | ----------------- 57 | The links below will take you to the GitHub repositories of content integration 58 | scripts we use to create some of the most popular Kolibri channels in the library: 59 | 60 | * `Khan Academy chef `__ 61 | * `Open Stax chef `__ 62 | * `SHLS Toolkit chef `__ 63 | 64 | You can get a list of ALL the content integration scripts by searching for 65 | `sushi-chef `__ 66 | on GitHub. 67 | -------------------------------------------------------------------------------- /docs/examples/samplefiles/documents/doc_EN.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/examples/samplefiles/documents/doc_EN.pdf -------------------------------------------------------------------------------- /docs/examples/samplefiles/documents/doc_ES.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/examples/samplefiles/documents/doc_ES.pdf -------------------------------------------------------------------------------- /docs/examples/samplefiles/documents/doc_FR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/examples/samplefiles/documents/doc_FR.pdf -------------------------------------------------------------------------------- /docs/exercises.md: -------------------------------------------------------------------------------- 1 | Exercises 2 | ========= 3 | 4 | Exercises (assessment activities) are an important part of every learning experience. 5 | Kolibri exercises are graded automatically and provide immediate feedback learners. 6 | Student answers to be logged and enable progress reports for teachers and coaches. 7 | Exercises can also be used as part of lessons and quizzes. 8 | 9 | An `ExerciseNode`s are special kind of content node contains one or more questions. 10 | In order to set the criteria for completing exercises, you must set __exercise_data__ 11 | to a dict containing a `mastery_model` field based on the mastery models provided 12 | in `le_utils.constants.exercises`. 13 | If no data is provided, `ricecooker` will default to mastery at 3 of 5 correct. 14 | For example: 15 | ``` 16 | node = ExerciseNode( 17 | exercise_data={ 18 | 'mastery_model': exercises.M_OF_N, 19 | 'randomize': True, 20 | 'm': 3, 21 | 'n': 5, 22 | }, 23 | ... 24 | ) 25 | ``` 26 | 27 | 28 | To add a question to an exercise node, you must first create a question model from 29 | `ricecooker.classes.questions`. Your sushi chef is responsible for determining 30 | which question type to create. Here are the available question types: 31 | - __SingleSelectQuestion__: questions that only have one right answer (e.g. radio button questions) 32 | - __MultipleSelectQuestion__: questions that have multiple correct answers (e.g. check all that apply) 33 | - __InputQuestion__: questions that have text-based answers (e.g. fill in the blank) 34 | - __PerseusQuestion__: special question type for pre-formatted perseus questions 35 | 36 | 37 | Each question class has the following attributes that can be set at initialization: 38 | - __id__ (str): question's unique id 39 | - __question__ (str): question body, in plaintext or Markdown format; 40 | math expressions must be in Latex format, surrounded by `$`, e.g. `$f(x) = 2^3$`. 41 | - __correct_answer__ (str) or __answers__ ([str]): the answer(s) to question as plaintext or Markdown 42 | - __all_answers__ ([str]): list of choices for single select and multiple select questions as plaintext or Markdown 43 | - __hints__ (str or [str]): optional hints on how to answer question, also in plaintext or Markdown 44 | 45 | To set the correct answer(s) for MultipleSelectQuestions, you must provide a list 46 | of all of the possible choices as well as an array of the correct answers 47 | (`all_answers [str]`) and `correct_answers [str]` respectively). 48 | ``` 49 | question = MultipleSelectQuestion( 50 | question = "Select all prime numbers.", 51 | correct_answers = ["2", "3", "5"], 52 | all_answers = ["1", "2", "3", "4", "5"], 53 | ... 54 | ) 55 | ``` 56 | 57 | To set the correct answer(s) for SingleSelectQuestions, you must provide a list 58 | of all possible choices as well as the correct answer (`all_answers [str]` and 59 | `correct_answer str` respectively). 60 | 61 | ``` 62 | question = SingleSelectQuestion( 63 | question = "What is 2 x 3?", 64 | correct_answer = "6", 65 | all_answers = ["2", "3", "5", "6"], 66 | ... 67 | ) 68 | ``` 69 | 70 | To set the correct answer(s) for InputQuestions, you must provide an array of 71 | all of the accepted answers (`answers [str]`). 72 | ``` 73 | question = InputQuestion( 74 | question = "Name a factor of 10.", 75 | answers = ["1", "2", "5", "10"], 76 | ) 77 | ``` 78 | 79 | To add images to a question's question, answers, or hints, format the image path 80 | with `'![](path/to/some/file.png)'` and `ricecooker` will parse them automatically. 81 | 82 | 83 | Once you have created the appropriate question object, add it to an exercise object 84 | with `exercise_node.add_question(question)`. 85 | 86 | 87 | Further reading 88 | --------------- 89 | 90 | - See also the section `Exercise Nodes `__ on the nodes page. 91 | -------------------------------------------------------------------------------- /docs/figures/HandBrake/handbrake_steps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/HandBrake/handbrake_steps.png -------------------------------------------------------------------------------- /docs/figures/HandBrake/handbreake_audio_settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/HandBrake/handbreake_audio_settings.png -------------------------------------------------------------------------------- /docs/figures/HandBrake/handbreake_resizing_settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/HandBrake/handbreake_resizing_settings.png -------------------------------------------------------------------------------- /docs/figures/HandBrake/handbreake_screenshot_video_settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/HandBrake/handbreake_screenshot_video_settings.png -------------------------------------------------------------------------------- /docs/figures/content_pipeline_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/content_pipeline_diagram.png -------------------------------------------------------------------------------- /docs/figures/content_pipeline_diagram_with_highlight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/content_pipeline_diagram_with_highlight.png -------------------------------------------------------------------------------- /docs/figures/kolibri_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/kolibri_logo.png -------------------------------------------------------------------------------- /docs/figures/logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/logo.ico -------------------------------------------------------------------------------- /docs/figures/ricecooker_domain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/ricecooker_domain.png -------------------------------------------------------------------------------- /docs/index_api_reference.rst: -------------------------------------------------------------------------------- 1 | Ricecooker API reference 2 | ======================== 3 | The detailed information for content developers (chef authors) is presented here: 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | nodes 9 | files 10 | htmlapps 11 | exercises 12 | languages 13 | chefops 14 | 15 | 16 | Code examples 17 | ------------- 18 | 19 | - See the `examples directory on GitHub `__ 20 | full code examples. 21 | - See the `examples page `__ for literate code examples 22 | that explain how to do specific tasks (find language codes, download subtitles, 23 | and exercises questions, etc). These examples are available as runnable 24 | Jupyter notebooks so you can try things out interactively and learn. 25 | - See the `Cheffing techniques doc `__ 26 | which provides links to tips and code examples for handling various special cases and content sources. 27 | -------------------------------------------------------------------------------- /docs/index_utils.rst: -------------------------------------------------------------------------------- 1 | Working with content 2 | ================ 3 | Ricecooker includes a number of utility functions to help chef authors 4 | with common content extraction and transformation tasks. 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | 10 | downloader 11 | parsing_html 12 | developer/kolibripreview 13 | pdfutils 14 | video_compression 15 | csv_metadata/index 16 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | You can install `ricecooker` by running the command `pip install ricecooker`, 5 | which will install the Python package and all its Python dependencies. 6 | You'll need version 3.9 or higher of Python to use the `ricecooker` framework, 7 | as well as some software for media file conversions (`ffmpeg` and `poppler`). 8 | 9 | In the next fifteen minutes or so, we'll setup your computer with all these things 10 | so you can get started writing your first content integration scripts. 11 | 12 | 13 | System prerequisites 14 | -------------------- 15 | The first step will will be to make sure you have `python3` installed on your 16 | computer and two additional file conversion tools: `ffmpeg` for video compression, 17 | and the `poppler` library for manipulating PDFs. 18 | 19 | Jump to the specific instructions for your operating system, and be sure to try 20 | the *Checklist* commands to know the installation was successful. 21 | 22 | 23 | ### Linux 24 | On a Debian or Ubuntu GNU/Linux, you can install the necessary packages using: 25 | 26 | sudo apt-get install git python3 ffmpeg poppler-utils 27 | 28 | You may need to adjust the package names for other Linux distributions (ContOS/Fedora/OpenSuSE). 29 | 30 | *Checklist*: verify your python version is 3.9 or higher by running `python3 --version`. 31 | If no `python3` command exists, then try `python --version`. 32 | Run the commands `ffmpeg -h` and `pdftoppm -h` to make sure they are available. 33 | 34 | 35 | ### Mac 36 | Mac OS X users can install the necessary software using [Homebrew](https://brew.sh/): 37 | 38 | brew install git python3 ffmpeg poppler 39 | 40 | *Checklist*: verify you python version is 3.9 or higher by running `python3 --version`. 41 | Also run the commands `ffmpeg -h` and `pdftoppm -h` to make sure they are available. 42 | 43 | 44 | 45 | ### Windows 46 | On windows the process is a little longer since we'll have to download and install 47 | several programs and make sure their `bin`-directories are added to the `Path` variable: 48 | 49 | 1. Download Python from [https://www.python.org/downloads/windows/](https://www.python.org/downloads/windows/). 50 | Look under the **Python 3.9.x** heading and choose the "Windows x86-64 executable installer" 51 | option to download the latest installer and follow usual installation steps. 52 | During the installation, make sure to check the box **"Add Python 3.9 to path"**. 53 | - *Checklist*: after installation, open a new command prompt (`cmd.exe`) and 54 | type in `python --version` and `pip --version` to make sure the commands are available. 55 | 2. Download `ffmpeg` from [https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip](https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip). 56 | Extract the zip file to a permanent location where you store your code, 57 | like `C:\Users\User\Tools` for example. Next, you must add the `bin` folder 58 | that contains `ffmpeg` (e.g. `C:\Users\User\Tools\ffmpeg-4.1.4-win64-static\bin`) 59 | to your user Path variable following [these instructions](https://www.computerhope.com/issues/ch000549.htm). 60 | - *Checklist*: Open a new command prompt and type in `ffmpeg -h` and `ffprobe -h` 61 | to verify the commands `ffmpeg` and `ffprobe` are available on your Path. 62 | 3. Download the file linked under "Latest binary" from [poppler-windows](http://blog.alivate.com.au/poppler-windows/). 63 | You will need to download and install [7-zip](https://www.7-zip.org/) to "unzip" 64 | the `.7z` archive. Extract the file to a some permanent location in your files. 65 | Add the `bin` folder `poppler-0.xx.y\bin` to your Path variable. 66 | - *Checklist*: after installation, open a command terminal and type in 67 | `pdftoppm -h` to make sure the command `pdftoppm` is available. 68 | 69 | We recommend you also download and install Git from [https://git-scm.com/downloads](https://git-scm.com/downloads). 70 | Using git is not a requirement for the getting started, but it's a great tool to 71 | have for borrowing code from others and sharing back your own code on the web. 72 | 73 | If you find the text descriptions to be confusing, you can watch this 74 | [video walkthrough](http://youtube.com/watch?v=LxK8_BOSy-8) that shows the 75 | installation steps and also explains the adding-to-Path process. 76 | 77 | 78 | 79 |
 
80 | 81 | 82 | 83 | Installing Ricecooker 84 | --------------------- 85 | To install the `ricecooker` package, simply run this command in a command prompt: 86 | 87 | pip install ricecooker 88 | 89 | You will see lots of lines scroll on the screen as `pip`, the package installer for Python, 90 | installs all the Python packages required to create content integration scripts. 91 | 92 | **Reporting issues**: If you run into problems or encounter an error in any of the above steps, 93 | please let us know by [opening an issue on github](https://github.com/learningequality/ricecooker/issues). 94 | 95 | ------ 96 | 97 | Okay so now we have all the system software and Python libraries installed. 98 | [Let's get started!](tutorial/gettingstarted.html) 99 | -------------------------------------------------------------------------------- /docs/languages.md: -------------------------------------------------------------------------------- 1 | Kolibri Language Codes 2 | ---------------------- 3 | 4 | The file [le_utils/constants/languages.py](https://github.com/learningequality/le-utils/blob/master/le_utils/constants/languages.py) 5 | and the lookup table in [le_utils/resources/languagelookup.json](https://github.com/learningequality/le-utils/blob/master/le_utils/resources/languagelookup.json) 6 | define the internal representation for languages codes used by Ricecooker, Kolibri, 7 | and Kolibri Studio to identify content items in different languages. 8 | 9 | The internal representation uses a mixture of two-letter codes (e.g. `en`), 10 | two-letter-and-country code (e.g. `pt-BR` for Brazilian Portuguese), 11 | and three-letter codes (e.g., `zul` for Zulu). 12 | 13 | In order to make sure you have the correct language code when interfacing with 14 | the Kolibri ecosystem (e.g. when uploading new content to Kolibri Studio), you 15 | must lookup the language object using the helper method `getlang`: 16 | 17 | ``` 18 | >>> from le_utils.constants.languages import getlang 19 | >>> language_obj = getlang('en') # lookup language using language code 20 | >>> language_obj 21 | Language(native_name='English', primary_code='en', subcode=None, name='English', ka_name=None) 22 | ``` 23 | The function `getlang` will return `None` if the lookup fails. In such cases, you 24 | can try lookup by name or lookup by alpha2 code (ISO_639-1) methods defined below. 25 | 26 | Once you've successfully looked up the language object, you can obtain the internal 27 | representation language code from the language object's `code` attribute: 28 | ``` 29 | >>> language_obj.code 30 | 'en' 31 | ``` 32 | The `ricecooker` API expects these internal representation language codes will be 33 | supplied for all `language` attributes (channel language, node language, and files language). 34 | 35 | 36 | 37 | ### More lookup helper methods 38 | 39 | The helper method `getlang_by_name` allows you to lookup a language by name: 40 | ``` 41 | >>> from le_utils.constants.languages import getlang_by_name 42 | >>> language_obj = getlang_by_name('English') # lookup language by name 43 | >>> language_obj 44 | Language(native_name='English', primary_code='en', subcode=None, name='English', ka_name=None) 45 | ``` 46 | 47 | The module `le_utils.constants.languages` defines two other language lookup methods: 48 | - Use `getlang_by_native_name` for lookup up names by native language name, 49 | e.g., you look for 'Français' to find French. 50 | - Use `getlang_by_alpha2` to perform lookups using the standard two-letter codes 51 | defined in [ISO_639-1](https://en.wikipedia.org/wiki/ISO_639-1) that are 52 | supported by the `pycountries` library. 53 | -------------------------------------------------------------------------------- /docs/parsing_html.md: -------------------------------------------------------------------------------- 1 | Parsing HTML using BeautifulSoup 2 | ================================ 3 | BeautifulSoup is an HTML parsing library that allows you to "select" various DOM 4 | elements, and extract their attributes and text contents. 5 | 6 | 7 | 8 | Video tutorial 9 | -------------- 10 | To get started, you can watch this [cheffing video tutorial](http://35.196.115.213/en/learn/#/topics/c/73470ad1a3015769ace455fbfdf17d48) 11 | that will show the basic steps of using `requests` and `BeautifulSoup` for crawling a website. 12 | See the [sushi-chef-shls code repo](https://github.com/learningequality/sushi-chef-shls/blob/master/sushichef.py#L226-L340) 13 | for the final version of the web crawling code that was used for this content source. 14 | 15 | 16 | 17 | 18 |
 
19 | 20 | 21 | Scraping 101 22 | ------------ 23 | The basic code to GET the HTML source of a webpage and parse it: 24 | 25 | ```python 26 | import requests 27 | from bs4 import BeautifulSoup 28 | 29 | url = 'https://somesite.edu' 30 | html = requests.get(url).content 31 | doc = BeautifulSoup(html, "html5lib") 32 | ``` 33 | 34 | You can now call `doc.find` and `doc.find_all` methods to select various DOM elements: 35 | 36 | ```python 37 | special_ul = doc.find('ul', class_='some-special-class') 38 | section_lis = special_ul.find_all('li', recursive=False) # search only immediate children 39 | for section_li in section_lis: 40 | print('processing a section
  • right now...') 41 | print(section_li.prettify()) # useful seeing HTML in when developing... 42 | ``` 43 | 44 | The most commonly used parts of the BeautifulSoup API are: 45 | - `.find(tag_name, )`: find the next occurrence of the tag `tag_name` that 46 | has attributes specified in `` (given as a dictionary), or can use the 47 | shortcut options `id` and `class_` (note extra underscore). 48 | - `.find_all(tag_name, )`: same as above but returns a list of all matching 49 | elements. Use the optional keyword argument `recursive=False` to select only 50 | immediate child nodes (instead of including children of children, etc.). 51 | - `.next_sibling`: find the next element (for badly formatted pages with no useful selectors) 52 | - `.get_text()` extracts the text contents of the node. See also helper method 53 | called `get_text` that performs additional cleanup of newlines and spaces. 54 | - `.extract()`: to extract an element from the DOM tree 55 | - `.decompose()`: useful to remove any unwanted DOM elements 56 | (same as `.extract()` but throws away the extracted element) 57 | 58 | 59 | ### Example 1 60 | Here is some sample code for getting the text of the LE mission statement: 61 | 62 | ```python 63 | from bs4 import BeautifulSoup 64 | from ricecooker.utils.downloader import read 65 | 66 | url = 'https://learningequality.org/' 67 | html = read(url) 68 | doc = BeautifulSoup(html, 'html5lib') 69 | 70 | main_div = doc.find('div', {'id': 'body-content'}) 71 | mission_el = main_div.find('h3', class_='mission-state') 72 | mission = mission_el.get_text().strip() 73 | print(mission) 74 | ``` 75 | 76 | ### Example 2 77 | To print a list of all the links on the page, use the following code: 78 | ```python 79 | links = doc.find_all('a') 80 | for link in links: 81 | print(link.get_text().strip(), '-->', link['href']) 82 | ``` 83 | 84 | 85 | 86 | Further reading 87 | --------------- 88 | For more info about BeautifulSoup, see [the docs](https://www.crummy.com/software/BeautifulSoup/bs4/doc/). 89 | 90 | There are also some excellent tutorials online you can read: 91 | - [http://akul.me/blog/2016/beautifulsoup-cheatsheet/](http://akul.me/blog/2016/beautifulsoup-cheatsheet/) 92 | - [http://youkilljohnny.blogspot.com/2014/03/beautifulsoup-cheat-sheet-parse-html-by.html](http://youkilljohnny.blogspot.com/2014/03/beautifulsoup-cheat-sheet-parse-html-by.html) 93 | - [http://www.compjour.org/warmups/govt-text-releases/intro-to-bs4-lxml-parsing-wh-press-briefings/](http://www.compjour.org/warmups/govt-text-releases/intro-to-bs4-lxml-parsing-wh-press-briefings/) 94 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==8.2.3 2 | sphinx_rtd_theme 3 | recommonmark 4 | nbsphinx 5 | ipython 6 | sphinx-autobuild 7 | sphinx-notfound-page 8 | -------------------------------------------------------------------------------- /docs/tutorial/index.rst: -------------------------------------------------------------------------------- 1 | Getting started 2 | =============== 3 | 4 | The purpose of these pages is to help you learn how to use the ``ricecooker`` framework. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | gettingstarted 10 | tutorial 11 | explanations 12 | -------------------------------------------------------------------------------- /docs/tutorial/quickstart.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. Note this page exists for backward compatibility (since we sent this link) 4 | to partners, we don't want them to hit a 404. The materials from docs/tutorial 5 | now lives in the better named docs/examples. 6 | 7 | 8 | Ricecooker quickstart 9 | ===================== 10 | 11 | The following links will get you started with content integration process in no time! 12 | 13 | - `Install <../installation.html>`_ Python, the ``ricecooker`` package, and system prerequisites (5–20 mins) 14 | - `Getting started `_: upload your first channel to Kolibri Studio and import it in Kolibri (10 mins) 15 | - For more info see the `ricecooker docs main page <../index.html>`_ 📚. 16 | 17 | Welcome to the team! 18 | -------------------------------------------------------------------------------- /docs/tutorial/tutorial.rst: -------------------------------------------------------------------------------- 1 | Hands-on tutorial 2 | ================= 3 | 4 | In this tutorial, you'll start with a basic content integration script (sushi chef) 5 | and extend the code to construct a bigger channel based on your own content. 6 | In the process you'll learn about all the features of the ``ricecooker`` framework. 7 | 8 | 9 | Prerequisite steps 10 | ------------------ 11 | The steps in this tutorial assume you have: 12 | 13 | 1. Completed the `Installation <../installation.html>`__ steps 14 | 2. Created an account on `Kolibri Studio `__ 15 | and obtained your access token, which you'll need to to use instead of the text 16 | ```` in the examples below 17 | 3. Successfully managed to run the basic chef example in the `Getting started `__ tutorial 18 | 19 | 20 | Step 1: Setup your environment 21 | ------------------------------ 22 | Create a directory called ``tutorial`` where you will run this code. 23 | In general it is recommended to have separate directories for each content 24 | integration script you will be working on. 25 | In order to prepare for the upcoming **Step 6**, find a ``.pdf`` document, 26 | a small ``.mp4`` video file, and an ``.mp3`` audio file. 27 | Save these files somewhere inside the ``tutorial`` directory. 28 | 29 | 30 | Step 2: Copy the sample code 31 | ---------------------------- 32 | To begin, download the sample code from `here `__ 33 | and save it as the file `sushichef.py` in the tutorial directory. 34 | 35 | Note all the ``TODO`` items in the code. These are the places left for you to edit. 36 | 37 | 38 | Step 3: Edit the channel metadata 39 | --------------------------------- 40 | 1. Open your terminal and ``cd`` into the folder where ``sushichef.py`` is located. 41 | 2. Open ``sushichef.py`` in a text editor. 42 | 3. Change ```` to any domain. The source domain specifies who is supplying the content. 43 | 4. Change ```` to any id. The source_id will distinguish your channel from other channels. 44 | 5. Change ``The Tutorial Channel`` to any channel name. 45 | 46 | Try running the sushi chef by entering the following command in your terminal:: 47 | 48 | python sushichef.py --token= 49 | 50 | Click the link to `Kolibri Studio `__ that 51 | shows up in the final step and make sure your channel looks OK. 52 | 53 | 54 | 55 | Step 4: Create a Topic 56 | ---------------------- 57 | 1. Locate the first **TODO** in the ``sushichef.py`` file. 58 | Here, you will create your first topic. 59 | 2. Copy/paste the example code and change ``exampletopic`` to ``mytopic``. 60 | 3. Set the ``source_id`` to be something other than ``topic-1`` 61 | (the ``source_id`` will distinguish your node from other nodes in the tree) 62 | 4. Set the title. 63 | 5. Go to the next **TODO** and add ``mytopic`` to channel (use example code as guide) 64 | 65 | :: 66 | 67 | Check Run sushi chef from your terminal. Your channel should look like this: 68 | Channel 69 | | Example Topic 70 | | Your Topic 71 | 72 | 73 | 74 | 75 | Step 5: Create a Subtopic 76 | ------------------------- 77 | 1. Go to the next **TODO** in the ``sushichef.py`` file. Here, you will create a subtopic 78 | 2. Copy/paste the example code and change ``examplesubtopic`` to ``mysubtopic`` 79 | 3. Set the ``source_id`` and ``title`` 80 | 4. Go to the next **TODO** and add ``mysubtopic`` to ``mytopic`` (use example code as guide) 81 | 82 | :: 83 | 84 | Check Run the sushi chef from your terminal. Your channel should look like this: 85 | Channel 86 | | Example Topic 87 | | | Example Subtopic 88 | | Your Topic 89 | | | Your Subtopic 90 | 91 | 92 | Step 6: Create Files 93 | -------------------- 94 | 1. Go to the next **TODO** in the sushichef.py file. Here, you will create a pdf file 95 | 2. Copy/paste the example code and change ``examplepdf`` to ``mypdf``. 96 | ``DocumentFile(...)`` will automatically download a pdf file from the given path. 97 | 3. Set the ``source_id``, the ``title``, and the ``path`` (any url to a pdf file) 98 | 4. Repeat steps 1-3 for video files and audio files. 99 | 5. Finally, add your files to your channel (see last \*\* statements) 100 | 101 | :: 102 | 103 | Check: Run the sushi chef from your terminal. Your channel should look like this: 104 | Channel 105 | | Example Topic 106 | | | Example Subtopic 107 | | | | Example Audio 108 | | | Example Video 109 | | Your Topic 110 | | | Your Subtopic 111 | | | | Your Audio 112 | | | Your Video 113 | | Example PDF 114 | | Your PDF 115 | 116 | 117 | 118 | 119 | 120 | 121 | Next steps 122 | ---------- 123 | You're now ready to start writing your own content integration scripts. 124 | The following links will guide you to the next steps: 125 | 126 | - `Ricecooker API reference <../index_api_reference.html>`_ 127 | - `Code examples <../examples/index.html>`_ 128 | - `Learn about the ricecooker utilities and helpers <../index_utils.html>`_ 129 | -------------------------------------------------------------------------------- /docs/usage.md: -------------------------------------------------------------------------------- 1 | Using the ricecooker framework 2 | ============================== 3 | 4 | 6 | 7 | The following links will get you started with content integration process in no time! 8 | 9 | - [Installation](installation.html): info about install the `ricecooker` package 10 | and system prerequisites (5–20 mins) 11 | - [Getting started](tutorial/gettingstarted.html): upload your first channel to 12 | Kolibri Studio and import it in Kolibri (10 mins) 13 | - For more info see the [ricecooker docs main page](index.html) 📚. 14 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | Complete script examples 2 | ======================== 3 | 4 | This directory contains examples of `ricecooker` content integration scripts (sushi chefs). 5 | 6 | - [`gettingstarted`](./gettingstarted)/[`sushichef.py`](./gettingstarted/sushichef.py) 7 | is a basic "Hello, World!" example used in the [Getting started](https://ricecooker.readthedocs.io/en/latest/gettingstarted.html) guide. 8 | - `tutorial/sushichf.py` the code that goes with the sushi chef tutorial doc 9 | https://docs.google.com/document/d/1iiwce8B_AyJ2d6K8dYBl66n9zjz0zQ3G4gTrubdk9ws/edit 10 | - `kitchensink/sushichef.py` is a comprehensive example that creates audio, video, and exercise nodes. 11 | - `wikipedia/sushichef.py` an example that creates a channel from two Wikipedia categories 12 | 13 | To run each of these, you you'll need to edit the `SOURCE_DOMAIN` and `SOURCE_ID` 14 | in each chef script and then call them on the command line: 15 | 16 | git clone https://github.com/learningequality/ricecooker.git 17 | cd ricecooker/examples/examplename 18 | # Follow the instructions in the README.md file... 19 | # ...then run the sushichef script by calling: 20 | python suschief.py --token=YOURSTUDIOTOKENHERE9139139f3a23232 21 | 22 | 23 | Further reading 24 | --------------- 25 | - See the [examples](https://ricecooker.readthedocs.io/en/latest/examples/) 26 | page in the ricecooker docs site for more code samples related to specific tasks. 27 | - See also the [sample-channels](https://github.com/learningequality/sample-channels) 28 | repository which contains even more examples that cover special cases and needs. 29 | -------------------------------------------------------------------------------- /examples/gettingstarted/sushichef.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ricecooker.chefs import SushiChef 3 | from ricecooker.classes.files import DocumentFile 4 | from ricecooker.classes.licenses import get_license 5 | from ricecooker.classes.nodes import DocumentNode 6 | from ricecooker.classes.nodes import TopicNode 7 | 8 | 9 | class SimpleChef(SushiChef): 10 | channel_info = { 11 | "CHANNEL_TITLE": "Potatoes info channel", 12 | "CHANNEL_SOURCE_DOMAIN": "", # where content comes from 13 | "CHANNEL_SOURCE_ID": "", # CHANGE ME!!! 14 | "CHANNEL_LANGUAGE": "en", # le_utils language code 15 | "CHANNEL_THUMBNAIL": "https://upload.wikimedia.org/wikipedia/commons/b/b7/A_Grande_Batata.jpg", # (optional) 16 | "CHANNEL_DESCRIPTION": "What is this channel about?", # (optional) 17 | } 18 | 19 | def construct_channel(self, **kwargs): 20 | channel = self.get_channel(**kwargs) 21 | potato_topic = TopicNode(title="Potatoes!", source_id="") 22 | channel.add_child(potato_topic) 23 | document_node = DocumentNode( 24 | title="Growing potatoes", 25 | description="An article about growing potatoes on your rooftop.", 26 | source_id="pubs/mafri-potatoe", 27 | license=get_license("CC BY", copyright_holder="University of Alberta"), 28 | language="en", 29 | files=[ 30 | DocumentFile( 31 | path="https://www.gov.mb.ca/inr/pdf/pubs/mafri-potatoe.pdf", 32 | language="en", 33 | ) 34 | ], 35 | ) 36 | potato_topic.add_child(document_node) 37 | return channel 38 | 39 | 40 | if __name__ == "__main__": 41 | """ 42 | Run this script on the command line using: 43 | python sushichef.py --token=YOURTOKENHERE9139139f3a23232 44 | """ 45 | simple_chef = SimpleChef() 46 | simple_chef.main() 47 | -------------------------------------------------------------------------------- /examples/oldexamples/README.md: -------------------------------------------------------------------------------- 1 | Old examples 2 | ============ 3 | 4 | The code examples and procedures of these examples are still accurate, 5 | but we don't recommend as starting point for learning since more involved tests 6 | and use advanced features like parsing json, compression, etc. 7 | 8 | 9 | Need to fix URLs: 10 | 11 | ``` 12 | WARNING Video 6cafe8: http://www.youtube.com/watch?v=kpCJyQ2usJ4 13 | Subtitle with langauge en is not available for http://www.youtube.com/watch?v=kpCJyQ2usJ4 14 | WARNING Audio aaaa4d: https://upload.wikimedia.org/wikipedia/commons/b/ba/Rice_grains_(IRRI) 15 | 404 Client Error: Not Found for url: https://upload.wikimedia.org/wikipedia/commons/b/ba/Rice_grains_(IRRI) 16 | WARNING Audio aaaa4d: https://ia801407.us.archive.org/21/items/ah_Rice/Rice.mp3 17 | 403 Client Error: Forbidden for url: https://archive.org/download/ah_Rice/Rice.mp3 18 | WARNING Exercise 6cafe3: http://www.publicdomainpictures.net/pictures/110000/nahled/bowl-of-rice.jpg 19 | 503 Server Error: Service Temporarily Unavailable for url: http://www.publicdomainpictures.net/pictures/110000/nahled/bowl-of-rice.jpg 20 | WARNING Question ddddd: ka-perseus-graphie.s3.amazonaws.com/907dec1b45fb177f0937fa521b7af03fb837f0bd 21 | [Errno 2] No such file or directory: 'ka-perseus-graphie.s3.amazonaws.com/907dec1b45fb177f0937fa521b7af03fb837f0bd.svg' 22 | ``` 23 | -------------------------------------------------------------------------------- /examples/oldexamples/content/0a0c0f1a1a40226d8d227a07dd143f8c08a4b8a5-data.json: -------------------------------------------------------------------------------- 1 | svgData0a0c0f1a1a40226d8d227a07dd143f8c08a4b8a5({"range":[[-0.5,10.5],[-1,1]],"labels":[{"content":"\\small{0}","coordinates":[0,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{1}","coordinates":[1,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{2}","coordinates":[2,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{3}","coordinates":[3,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{4}","coordinates":[4,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{5}","coordinates":[5,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{6}","coordinates":[6,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{7}","coordinates":[7,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{8}","coordinates":[8,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{9}","coordinates":[9,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{10}","coordinates":[10,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\scriptsize{2}","coordinates":[6,0.2],"alignment":"above","typesetAsMath":true,"style":{}},{"content":"\\scriptsize{3}","coordinates":[4,0.2],"alignment":"above","typesetAsMath":true,"style":{}}]}); -------------------------------------------------------------------------------- /examples/oldexamples/content/0a0c0f1a1a40226d8d227a07dd143f8c08a4b8a5.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /examples/oldexamples/content/captions.vtt: -------------------------------------------------------------------------------- 1 | WEBVTT 2 | Kind: captions 3 | Language: sw 4 | 5 | 00:00:00.042 --> 00:00:03.067 6 | Kabla ya kuingia katika nyama ya algebra, 7 | 8 | 00:00:03.067 --> 00:00:06.060 9 | i alitaka kuwapa quote kutoka kwa mmoja wa akili mkuu katika historia ya binadamu, 10 | 11 | 00:00:06.060 --> 00:00:11.726 12 | Galileo Galilei, kwa sababu nadhani quote hii encapsulates hatua ya kweli ya algebra 13 | 14 | 00:00:11.726 --> 00:00:14.234 15 | na kwa kweli hisabati kwa ujumla. 16 | 17 | 00:00:14.234 --> 00:00:19.133 18 | Akasema: "Falsafa, imeandikwa katika kitabu ile kuu ambayo milele uongo mbele ya macho yetu 19 | 20 | 00:00:19.133 --> 00:00:21.444 21 | - I mean ulimwengu - lakini sisi hawezi kuelewa kama hatuwezi kwanza kujifunza lugha 22 | 23 | 00:00:25.313 --> 00:00:27.980 24 | na kufahamu alama ambayo imeandikwa. 25 | 26 | 00:00:27.980 --> 00:00:30.800 27 | Kitabu hii imeandikwa kwa lugha ya hisabati ... 28 | 29 | 00:00:30.800 --> 00:00:35.933 30 | bila ambayo moja wanders bure kwa njia ya labyrinth giza. " 31 | 32 | 00:00:35.933 --> 00:00:41.106 33 | Sana lakini kwa kiasi kikubwa na kina kirefu sana na kwa kweli hii ni hatua ya hisabati 34 | -------------------------------------------------------------------------------- /examples/oldexamples/content/htmltest.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/examples/oldexamples/content/htmltest.zip -------------------------------------------------------------------------------- /examples/oldexamples/content/sushirolls.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/examples/oldexamples/content/sushirolls.pdf -------------------------------------------------------------------------------- /examples/oldexamples/content/video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/examples/oldexamples/content/video.mp4 -------------------------------------------------------------------------------- /examples/oldexamples/data/sample_perseus01.json: -------------------------------------------------------------------------------- 1 | { 2 | "answerArea":{ 3 | "chi2Table":false, 4 | "periodicTable":false, 5 | "tTable":false, 6 | "zTable":false, 7 | "calculator":false 8 | }, 9 | "hints":[ 10 | { 11 | "widgets":{ 12 | 13 | }, 14 | "images":{ 15 | }, 16 | "content":"Hint #1", 17 | "replace":false 18 | }, 19 | { 20 | "widgets":{ 21 | 22 | }, 23 | "images":{ 24 | 25 | }, 26 | "content":"Hint #2", 27 | "replace":false 28 | } 29 | ], 30 | "question":{ 31 | "widgets":{ 32 | "radio 1":{ 33 | "type":"radio", 34 | "alignment":"default", 35 | "graded":true, 36 | "static":false, 37 | "options":{ 38 | "deselectEnabled":false, 39 | "multipleSelect":false, 40 | "choices":[ 41 | { 42 | "correct":true, 43 | "content":"Yes" 44 | }, 45 | { 46 | "correct":false, 47 | "content":"No" 48 | }, 49 | { 50 | "correct":false, 51 | "content":">" 52 | } 53 | ], 54 | "displayCount":null, 55 | "hasNoneOfTheAbove":false, 56 | "randomize":false, 57 | "onePerLine":true 58 | }, 59 | "version":{ 60 | "minor":0, 61 | "major":1 62 | } 63 | } 64 | }, 65 | "images":{ 66 | }, 67 | "content":"Do you like rice?\\\"\\n\\n![](web+graphie:ka-perseus-graphie.s3.amazonaws.com/907dec1b45fb177f0937fa521b7af03fb837f0bd)\\n\\n[[\\u2603 radio 1]]" 68 | }, 69 | "itemDataVersion":{ 70 | "minor":1, 71 | "major":0 72 | } 73 | } -------------------------------------------------------------------------------- /examples/oldexamples/data/sample_perseus02.json: -------------------------------------------------------------------------------- 1 | { 2 | "hints":[ 3 | { 4 | "replace":false, 5 | "content":"Numbers are equivalent when they are located at the same point on the number line.\n\nLet's see what fraction is at the same location as $\\\\tealD{\\\\dfrac48}$ on the number line.\n", 6 | "widgets":{ 7 | 8 | }, 9 | "images":{ 10 | "web+graphie:file:///C:/Users/Jordan/contentcuration-dump/ddb3feb4c8e3740ca4f10c2ebad70b5797f60ebd":{ 11 | "width":460, 12 | "height":120 13 | } 14 | } 15 | }, 16 | { 17 | "replace":false, 18 | "content":"![](web+graphie:file:///home/ralphie/Desktop/ka-sushi-chef-sw/build/a61/a61ac6f4038cb3e2c3bd6e69f6e75da10632a3d4\\n)\\n\\n $\\\\purpleC{\\\\dfrac24}$ is at the same location on the number line as $\\\\tealD{\\\\dfrac48}$.\\n", 19 | "widgets":{ 20 | 21 | }, 22 | "images":{ 23 | 24 | } 25 | }, 26 | { 27 | "replace":false, 28 | "content":" $\\\\purpleC{\\\\dfrac24}$ is equivalent to $\\\\tealD{\\\\dfrac48}$.\\n\\n![]( web+graphie:file:///home/ralphie/Desktop/ka-sushi-chef-sw/build/e84/e84b6d5fa1410f002ef8f9446a999d4a09266edd)", 29 | "widgets":{ 30 | 31 | }, 32 | "images":{ 33 | "web+graphie:file:///home/ralphie/Desktop/ka-sushi-chef-sw/build/6a1/6a1bf04c8df3d217c846362e8902008d84d10ff4":{ 34 | "width":460, 35 | "height":120 36 | } 37 | } 38 | } 39 | ], 40 | "question":{ 41 | "content":"![](web+graphie:file:///home/ralphie/Desktop/ka-sushi-chef-sw/build/749/749d2d16db0cfc94e8685f3eb7302394448d8c8c)\\n\\n**Move the dot to a fraction equivalent to $\\\\tealD{\\\\dfrac48}$ on the number line.**\\n\\n\\n[[\\u2603 number-line 1]]\\n", 42 | "widgets":{ 43 | "number-line 1":{ 44 | "type":"number-line", 45 | "static":false, 46 | "options":{ 47 | "initialX":null, 48 | "labelRange":[ 49 | null, 50 | null 51 | ], 52 | "divisionRange":[ 53 | null, 54 | null 55 | ], 56 | "correctX":0.5, 57 | "labelStyle":"non-reduced", 58 | "labelTicks":true, 59 | "snapDivisions":2, 60 | "correctRel":"eq", 61 | "static":false, 62 | "numDivisions":null, 63 | "range":[ 64 | null, 65 | null 66 | ], 67 | "tickStep":0.25 68 | }, 69 | "graded":true, 70 | "version":{ 71 | "minor":0, 72 | "major":0 73 | }, 74 | "alignment":"default" 75 | } 76 | }, 77 | "images":{ 78 | 79 | } 80 | }, 81 | "itemDataVersion":{ 82 | "minor":1, 83 | "major":0 84 | }, 85 | "answerArea":{ 86 | "periodicTable":false, 87 | "zTable":false, 88 | "chi2Table":false, 89 | "calculator":false, 90 | "tTable":false 91 | } 92 | } -------------------------------------------------------------------------------- /examples/studiocontent/sushichef.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ricecooker.chefs import SushiChef 3 | from ricecooker.classes.files import AudioFile 4 | from ricecooker.classes.files import DocumentFile 5 | from ricecooker.classes.licenses import get_license 6 | from ricecooker.classes.nodes import AudioNode 7 | from ricecooker.classes.nodes import DocumentNode 8 | from ricecooker.classes.nodes import StudioContentNode 9 | from ricecooker.classes.nodes import TopicNode 10 | 11 | """ 12 | This example shows how to use the StudioContentNode to create a channel that 13 | curates content from another channel already on Studio into a new channel. 14 | """ 15 | 16 | SOURCE_DOMAIN = "testdomain.org" ## change me! 17 | 18 | # global dict to retain state between the two chef runs 19 | original_channel_data = { 20 | "channel_id": None, 21 | "doc_node_id": None, 22 | "audio_node_id": None, 23 | } 24 | 25 | 26 | class OriginalChannelChef(SushiChef): 27 | channel_info = { 28 | "CHANNEL_TITLE": "Original channel", 29 | "CHANNEL_SOURCE_DOMAIN": SOURCE_DOMAIN, 30 | "CHANNEL_SOURCE_ID": "originalchannel", 31 | "CHANNEL_LANGUAGE": "en", 32 | } 33 | 34 | def construct_channel(self, **kwargs): 35 | channel = self.get_channel(**kwargs) 36 | 37 | document_node = DocumentNode( 38 | title="Growing potatoes", 39 | description="An article about growing potatoes on your rooftop.", 40 | source_id="pubs/mafri-potatoe", 41 | license=get_license("CC BY", copyright_holder="University of Alberta"), 42 | files=[ 43 | DocumentFile( 44 | path="https://www.gov.mb.ca/inr/pdf/pubs/mafri-potatoe.pdf", 45 | language="en", 46 | ) 47 | ], 48 | ) 49 | channel.add_child(document_node) 50 | 51 | audio_node = AudioNode( 52 | source_id="also-sprach", 53 | title="Also Sprach Zarathustra", 54 | author="Kevin MacLeod / Richard Strauss", 55 | description="Also Sprach Zarathustra, Op. 30, is a tone poem by Richard Strauss, composed in 1896.", 56 | license=get_license("CC BY", copyright_holder="Kevin MacLeod"), 57 | files=[ 58 | AudioFile( 59 | "https://ia600702.us.archive.org/33/items/Classical_Sampler-9615/Kevin_MacLeod_-_Also_Sprach_Zarathustra.mp3" 60 | ) 61 | ], 62 | ) 63 | channel.add_child(audio_node) 64 | 65 | return channel 66 | 67 | 68 | class CuratedChannelChef(SushiChef): 69 | channel_info = { 70 | "CHANNEL_TITLE": "Curated channel", 71 | "CHANNEL_SOURCE_DOMAIN": SOURCE_DOMAIN, 72 | "CHANNEL_SOURCE_ID": "curatedchannel", 73 | "CHANNEL_LANGUAGE": "en", 74 | } 75 | 76 | def construct_channel(self, **kwargs): 77 | channel = self.get_channel(**kwargs) 78 | 79 | document_topic = TopicNode( 80 | title="Documents", 81 | source_id="documents", 82 | ) 83 | channel.add_child(document_topic) 84 | remote_document = StudioContentNode( 85 | title="Glorious new title for the potato doc", 86 | source_channel_id=original_channel_data["channel_id"], 87 | source_node_id=original_channel_data["doc_node_id"], 88 | ) 89 | document_topic.add_child(remote_document) 90 | 91 | audio_topic = TopicNode( 92 | title="Audio", 93 | source_id="audio", 94 | ) 95 | channel.add_child(audio_topic) 96 | remote_audio = StudioContentNode( 97 | source_channel_id=original_channel_data["channel_id"], 98 | source_node_id=original_channel_data["audio_node_id"], 99 | ) 100 | audio_topic.add_child(remote_audio) 101 | 102 | return channel 103 | 104 | 105 | if __name__ == "__main__": 106 | """ 107 | Run this script on the command line using: 108 | python sushichef.py --token=YOURTOKENHERE9139139f3a23232 109 | """ 110 | original_chef = OriginalChannelChef() 111 | original_chef.main() 112 | original_channel = original_chef.construct_channel() 113 | 114 | original_channel_data["channel_id"] = original_channel.get_node_id().hex 115 | original_channel_data["doc_node_id"] = ( 116 | original_channel.children[0].get_node_id().hex 117 | ) 118 | original_channel_data["audio_node_id"] = ( 119 | original_channel.children[1].get_node_id().hex 120 | ) 121 | 122 | input( 123 | "Please visit the URL above and deploy the channel, and wait for it to finish. Then press enter to continue..." 124 | ) 125 | 126 | curated_chef = CuratedChannelChef() 127 | curated_chef.main() 128 | -------------------------------------------------------------------------------- /examples/wikipedia/README.md: -------------------------------------------------------------------------------- 1 | Wikipedia example 2 | ================= 3 | 4 | The content integration script `sushichef.py` scrapes several wikipedia pages, 5 | packages their contents as standalone `HTMLZipFile`s and uploads them to Studio. 6 | 7 | 8 | ## Running the script 9 | 10 | ./sushichef.py --token=YOURSTUDIOTOKENHERE9139139f3a23232 11 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = tests/ 3 | norecursedirs = docs examples resources 4 | env = 5 | # cleaned up in conftest.py fixture 6 | RICECOOKER_STORAGE=./.pytest_storage 7 | RICECOOKER_FILECACHE=./.pytest_filecache 8 | -------------------------------------------------------------------------------- /resources/scripts/convertvideo.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | TITLE Video conversion and compression script 3 | REM Video conversion and compression script Learning Equality 2018 4 | REM Usage: 5 | REM convertvideo.bat inputfile.mpg [outputfile.mp4] 6 | REM 7 | REM This script will perform the following conversion steps: 8 | REM - Apply CRF 32 compression (very aggressive; may need to adjust below) 9 | REM - Limit the audio track to 32k/sec 10 | REM - Resize the video to max_height=480 11 | REM You can manually edit the command below to customize the oprations performed. 12 | setlocal 13 | 14 | 15 | REM 1. Check we have ffmpeg 16 | REM ############################################################################ 17 | WHERE ffmpeg >nul 2>nul 18 | IF %ERRORLEVEL% NEQ 0 ( 19 | echo Error: ffmpeg not installed. 20 | echo Please download zip from https://web.archive.org/web/20200918193047/https://ffmpeg.zeranoe.com/builds/ 21 | echo Then copy the files ffmpeg.exe and ffprobe.exe from bin/ folder to this folder. 22 | exit /b 1 23 | ) 24 | 25 | 26 | REM 2. Parse input filename 27 | REM ############################################################################ 28 | IF NOT "%~1" == "" ( 29 | set "INFILE=%~1" 30 | ) else ( 31 | echo ERROR: Missing argument inputfile.mp4 32 | echo Usage: convertvideo.bat inputfile.mp4 [outputfile.mp4] 33 | exit /b 2 34 | ) 35 | 36 | REM 3. Prepare output filename 37 | REM ############################################################################ 38 | IF NOT "%~2" == "" ( 39 | set "OUTFILE=%~2" 40 | ) else ( 41 | set "OUTFILE=%INFILE:~0,-4%-converted.mp4" 42 | ) 43 | 44 | 45 | REM 4. Do conversion 46 | REM ############################################################################ 47 | echo Calling ffmpeg to convert: %INFILE% --to--^> %OUTFILE% 48 | ffmpeg -i "%INFILE%" ^ 49 | -b:a 32k -ac 1 ^ 50 | -vf scale="'w=-2:h=trunc(min(ih,480)/2)*2'" ^ 51 | -crf 32 ^ 52 | -profile:v baseline -level 3.0 -preset slow -v error -strict -2 -stats ^ 53 | -y "%OUTFILE%" 54 | 55 | 56 | echo Conversion done. 57 | endlocal 58 | -------------------------------------------------------------------------------- /resources/scripts/convertvideo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Video conversion and compression script Learning Equality 2018 3 | # Usage: 4 | # ./convertvideo.sh inputfile.mp4 [outputfile.mp4] 5 | # 6 | # This script will perform the following conversion steps: 7 | # - Apply CRF 32 compression (very aggressive; may need to adjust below) 8 | # - Limit the audio track to 32k/sec 9 | # - Resize the video to max_height=480 10 | # You can manually edit the command below to customize the oprations performed. 11 | set -e 12 | 13 | 14 | # 1. Check we have ffmpeg 15 | ################################################################################ 16 | if [ ! -x "$(command -v ffmpeg)" ] 17 | then 18 | echo "Error: ffmpeg not installed. Please download from https://www.ffmpeg.org/" 19 | exit 1 20 | fi 21 | 22 | # 2. Parse input filename 23 | ################################################################################ 24 | if [ ! -z "$1" ] 25 | then 26 | INFILE=$1; 27 | else 28 | echo "ERROR: Missing argument " 29 | echo "Usage: ./convertvideo.sh inputfile.mp4 [outputfile.mp4]" 30 | exit 2 31 | fi 32 | 33 | # 3. Prepare output filename 34 | ################################################################################ 35 | DEFULTPREFIX="converted-" 36 | if [ ! -z "$2" ] 37 | then 38 | OUTFILE=$2; 39 | else 40 | filename=$(basename -- "$INFILE"); 41 | filename="${filename%.*}"; 42 | extension="${filename##*.}"; 43 | OUTFILE=$DEFULTPREFIX"$filename"".mp4"; 44 | fi 45 | 46 | 47 | # 4. Do conversion 48 | ################################################################################ 49 | echo "Calling ffmpeg to convert: $INFILE --> $OUTFILE" 50 | ffmpeg -i "$INFILE" \ 51 | -b:a 32k -ac 1 \ 52 | -vf scale="'w=-2:h=trunc(min(ih,480)/2)*2'" \ 53 | -crf 32 \ 54 | -profile:v baseline -level 3.0 -preset slow -v error -strict -2 -stats \ 55 | -y "$OUTFILE" 56 | 57 | 58 | echo "Conversion done." 59 | -------------------------------------------------------------------------------- /resources/templates/csv_channel/Channel.csv: -------------------------------------------------------------------------------- 1 | Title,Description,Domain,Source ID,Language,Thumbnail 2 | -------------------------------------------------------------------------------- /resources/templates/csv_channel/Content.csv: -------------------------------------------------------------------------------- 1 | Path *,Title *,Source ID,Description,Author,Language,License ID *,License Description,Copyright Holder,Thumbnail 2 | -------------------------------------------------------------------------------- /resources/templates/csv_channel/ExerciseQuestions.csv: -------------------------------------------------------------------------------- 1 | Source ID *,Question ID *,Question type *,Question *,Option A,Option B,Option C,Option D,Option E,Options F...,Correct Answer *,Correct Answer 2,Correct Answer 3,Hint 1,Hint 2,Hint 3,Hint 4,Hint 5,Hint 6+ 2 | -------------------------------------------------------------------------------- /resources/templates/csv_channel/Exercises.csv: -------------------------------------------------------------------------------- 1 | Path *,Title *,Source ID *,Description,Author,Language,License ID *,License Description,Copyright Holder,Number Correct,Out of Total,Randomize,Thumbnail 2 | -------------------------------------------------------------------------------- /resources/templates/csv_channel/csvchef.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ricecooker.chefs import LineCook 3 | 4 | 5 | class CsvChef(LineCook): 6 | """ 7 | Sushi chef for creating Kolibri Studio channels from local files and metdata 8 | provided in Channel.csv and Content.csv. 9 | """ 10 | 11 | # no custom methods needed: the `LineCook` base class will do the cheffing. 12 | # Run `python csvchef.py -h` to see all the supported command line options 13 | 14 | 15 | if __name__ == "__main__": 16 | chef = CsvChef() 17 | chef.main() 18 | -------------------------------------------------------------------------------- /ricecooker/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __author__ = "Learning Equality" 4 | __email__ = "info@learningequality.org" 5 | __version__ = "0.8.0" 6 | 7 | 8 | import sys 9 | 10 | if sys.version_info < (3, 9, 0): 11 | raise RuntimeError("Ricecooker only supports Python 3.9+") 12 | -------------------------------------------------------------------------------- /ricecooker/classes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/ricecooker/classes/__init__.py -------------------------------------------------------------------------------- /ricecooker/exceptions.py: -------------------------------------------------------------------------------- 1 | # Exceptions that might be raised during tree uploading process 2 | 3 | 4 | class InvalidCommandException(Exception): 5 | """InvalidCommandException: raised when unrecognized command is entered""" 6 | 7 | def __init__(self, *args, **kwargs): 8 | Exception.__init__(self, *args, **kwargs) 9 | 10 | 11 | class InvalidUsageException(Exception): 12 | """InvalidUsageException: raised when command line syntax is invalid""" 13 | 14 | def __init__(self, *args, **kwargs): 15 | Exception.__init__(self, *args, **kwargs) 16 | 17 | 18 | class InvalidFormatException(Exception): 19 | """InvalidFormatException: raised when file format is unrecognized""" 20 | 21 | def __init__(self, *args, **kwargs): 22 | Exception.__init__(self, *args, **kwargs) 23 | 24 | 25 | class FileNotFoundException(Exception): 26 | """FileNotFoundException: raised when file path is not found""" 27 | 28 | def __init__(self, *args, **kwargs): 29 | Exception.__init__(self, *args, **kwargs) 30 | 31 | 32 | class UnknownContentKindError(Exception): 33 | """UnknownContentKindError: raised when content kind is unrecognized""" 34 | 35 | def __init__(self, *args, **kwargs): 36 | Exception.__init__(self, *args, **kwargs) 37 | 38 | 39 | class UnknownQuestionTypeError(Exception): 40 | """UnknownQuestionTypeError: raised when question type is unrecognized""" 41 | 42 | def __init__(self, *args, **kwargs): 43 | Exception.__init__(self, *args, **kwargs) 44 | 45 | 46 | class UnknownFileTypeError(Exception): 47 | """UnknownFileTypeError: raised when file type is unrecognized""" 48 | 49 | def __init__(self, *args, **kwargs): 50 | Exception.__init__(self, *args, **kwargs) 51 | 52 | 53 | class UnknownLicenseError(Exception): 54 | """UnknownLicenseError: raised when license is unrecognized""" 55 | 56 | def __init__(self, *args, **kwargs): 57 | Exception.__init__(self, *args, **kwargs) 58 | 59 | 60 | class InvalidNodeException(Exception): 61 | """InvalidNodeException: raised when node is improperly formatted""" 62 | 63 | def __init__(self, *args, **kwargs): 64 | Exception.__init__(self, *args, **kwargs) 65 | 66 | 67 | class InvalidQuestionException(Exception): 68 | """InvalidQuestionException: raised when question is improperly formatted""" 69 | 70 | def __init__(self, *args, **kwargs): 71 | Exception.__init__(self, *args, **kwargs) 72 | 73 | 74 | def raise_for_invalid_channel(channel): 75 | pass 76 | -------------------------------------------------------------------------------- /ricecooker/managers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/ricecooker/managers/__init__.py -------------------------------------------------------------------------------- /ricecooker/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/ricecooker/utils/__init__.py -------------------------------------------------------------------------------- /ricecooker/utils/audio.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import subprocess 3 | from enum import Enum 4 | 5 | LOGGER = logging.getLogger("AudioResource") 6 | LOGGER.setLevel(logging.DEBUG) 7 | 8 | 9 | class AudioCompressionError(Exception): 10 | """ 11 | Custom error returned when `ffmpeg` compression exits with a non-zero status. 12 | """ 13 | 14 | 15 | AudioEncoding = Enum("AudioEncoding", ["CBR", "VBR"]) 16 | 17 | # Allowed Constant Bit Rate values for MP3 encoding. 18 | CBR_VALUES = {8, 16, 24, 32, 40, 48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320} 19 | # Allowed Variable Bit Rate values for MP3 encoding. 20 | VBR_VALUES = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} 21 | 22 | 23 | def compress_audio( 24 | source_file_path, 25 | target_file, 26 | overwrite=False, 27 | encoding=AudioEncoding.CBR, 28 | bit_rate=96, 29 | vbr=7, 30 | ): 31 | """ 32 | Compress audio at `source_file_path` using setting provided: 33 | - encoding: Use Constant or Variable Bit Rate encoding (default CBR) 34 | - bit_rate (int): CBR bit_rate 35 | - vbr (int): lame setting for VBR 36 | Save compressed output audio to `target_file`. 37 | """ 38 | 39 | if not isinstance(encoding, AudioEncoding): 40 | raise TypeError("encoding value must be {} enum value".format(AudioEncoding)) 41 | 42 | if not isinstance(bit_rate, int): 43 | raise TypeError("bit_rate must be an integer") 44 | 45 | if bit_rate not in CBR_VALUES: 46 | raise ValueError("bit_rate must be one of {}".format(CBR_VALUES)) 47 | 48 | if not isinstance(vbr, int): 49 | raise TypeError("vbr must be an integer") 50 | 51 | if vbr not in VBR_VALUES: 52 | raise ValueError("vbr must be one of {}".format(VBR_VALUES)) 53 | 54 | if encoding is AudioEncoding.CBR: 55 | option_name = "-b:a" 56 | value = bit_rate 57 | else: 58 | option_name = "-qscale:a" 59 | value = vbr 60 | 61 | # run command 62 | command = [ 63 | "ffmpeg", 64 | "-y" if overwrite else "-n", 65 | "-i", 66 | source_file_path, 67 | "-codec:a", 68 | "libmp3lame", 69 | option_name, 70 | str(value), 71 | target_file, 72 | ] 73 | try: 74 | subprocess.check_output(command, stderr=subprocess.STDOUT) 75 | except subprocess.CalledProcessError as e: 76 | raise AudioCompressionError("{}: {}".format(e, e.output)) 77 | except (BrokenPipeError, IOError) as e: 78 | raise AudioCompressionError("{}".format(e)) 79 | -------------------------------------------------------------------------------- /ricecooker/utils/browser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import posixpath 3 | import urllib 4 | import webbrowser 5 | from http.server import HTTPServer 6 | from http.server import SimpleHTTPRequestHandler 7 | 8 | 9 | def preview_in_browser(directory, filename="index.html", port=8282): 10 | class RequestHandler(SimpleHTTPRequestHandler): 11 | def translate_path(self, path): 12 | # abandon query parameters 13 | path = path.split("?", 1)[0] 14 | path = path.split("#", 1)[0] 15 | path = posixpath.normpath(urllib.parse.unquote(path)) 16 | words = path.split("/") 17 | words = filter(None, words) 18 | path = directory 19 | for word in words: 20 | drive, word = os.path.splitdrive(word) 21 | head, word = os.path.split(word) 22 | if word in (os.curdir, os.pardir): 23 | continue 24 | path = os.path.join(path, word) 25 | return path 26 | 27 | httpd = HTTPServer(("127.0.0.1", port), RequestHandler) 28 | 29 | webbrowser.open("http://127.0.0.1:{}/{}".format(port, filename)) 30 | 31 | httpd.serve_forever() 32 | -------------------------------------------------------------------------------- /ricecooker/utils/caching.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from datetime import datetime 4 | from datetime import timedelta 5 | 6 | from cachecontrol import CacheControlAdapter 7 | from cachecontrol.caches.file_cache import FileCache 8 | from cachecontrol.heuristics import BaseHeuristic 9 | from cachecontrol.heuristics import datetime_to_header 10 | from cachecontrol.heuristics import expire_after 11 | 12 | from ricecooker import config 13 | from ricecooker.utils.utils import get_hash 14 | from ricecooker.utils.utils import is_valid_url 15 | 16 | 17 | # Cache for filenames 18 | FILECACHE = FileCache(config.FILECACHE_DIRECTORY, forever=True) 19 | 20 | 21 | class NeverCache(BaseHeuristic): 22 | """ 23 | Don't cache the response at all. 24 | """ 25 | 26 | def update_headers(self, response): 27 | return {"cache-control": "no-cache"} 28 | 29 | 30 | class CacheForeverHeuristic(BaseHeuristic): 31 | """ 32 | Cache the response effectively forever. 33 | """ 34 | 35 | def update_headers(self, response): 36 | headers = {} 37 | expires = expire_after(timedelta(weeks=10 * 52), date=datetime.now()) 38 | headers["expires"] = datetime_to_header(expires) 39 | headers["cache-control"] = "public" 40 | 41 | return headers 42 | 43 | 44 | class InvalidatingCacheControlAdapter(CacheControlAdapter): 45 | """ 46 | Cache control adapter that deletes items from the cache as they're requested. 47 | Default heuristic is also set to a non-caching heuristic. 48 | """ 49 | 50 | def __init__(self, heuristic=None, *args, **kw): 51 | if not heuristic: 52 | heuristic = NeverCache() 53 | super(InvalidatingCacheControlAdapter, self).__init__( 54 | *args, heuristic=heuristic, **kw 55 | ) 56 | 57 | def send(self, request, **kw): 58 | 59 | # delete any existing cached value from the cache 60 | try: 61 | cache_url = self.controller.cache_url(request.url) 62 | self.cache.delete(cache_url) 63 | except FileNotFoundError: 64 | pass 65 | 66 | resp = super(InvalidatingCacheControlAdapter, self).send(request, **kw) 67 | 68 | return resp 69 | 70 | 71 | def generate_key(action, path_or_id, settings=None, default=" (default)"): 72 | """generate_key: generate key used for caching 73 | Args: 74 | action (str): how video is being processed (e.g. COMPRESSED or DOWNLOADED) 75 | path_or_id (str): path to video or youtube_id 76 | settings (dict): settings for compression or downloading passed in by user 77 | default (str): if settings are None, default to this extension (avoid overwriting keys) 78 | Returns: filename 79 | """ 80 | if settings and "postprocessors" in settings: 81 | # get determinisic dict serialization for nested dicts under Python 3.5 82 | settings_str = json.dumps(settings, sort_keys=True) 83 | else: 84 | # keep using old strategy to avoid invalidating all chef caches 85 | settings_str = ( 86 | "{}".format(str(sorted(settings.items()))) if settings else default 87 | ) 88 | return "{}: {} {}".format(action.upper(), path_or_id, settings_str) 89 | 90 | 91 | def set_cache_data(key, file_metadata): 92 | if not key: 93 | return None 94 | FILECACHE.set(key, bytes(json.dumps(file_metadata), "utf-8")) 95 | 96 | 97 | def get_cache_data(key): 98 | if not key: 99 | return None 100 | file_metadata = FILECACHE.get(key) 101 | 102 | if not file_metadata: 103 | return None 104 | file_metadata = file_metadata.decode("utf-8") 105 | 106 | try: 107 | file_metadata = json.loads(file_metadata) 108 | except json.JSONDecodeError: 109 | file_metadata = { 110 | "filename": file_metadata, 111 | } 112 | if not os.path.exists(config.get_storage_path(file_metadata["filename"])): 113 | return None 114 | return file_metadata 115 | 116 | 117 | def get_cache_filename(key): 118 | cache_file = get_cache_data(key) 119 | if not cache_file: 120 | return None 121 | return cache_file["filename"] 122 | 123 | 124 | def cache_is_outdated(path, cache_file): 125 | outdated = True 126 | if not cache_file: 127 | return True 128 | 129 | if is_valid_url(path): 130 | # Downloading is expensive, so always use cache if we don't explicitly try to update. 131 | outdated = False 132 | else: 133 | # check if the on disk file has changed 134 | cache_hash = get_hash(path) 135 | outdated = not cache_hash or not cache_file.startswith(cache_hash) 136 | 137 | return outdated 138 | -------------------------------------------------------------------------------- /ricecooker/utils/encodings.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import re 3 | 4 | BASE64_REGEX_STR = r"data:image\/([A-Za-z]*);base64,((?:[A-Za-z0-9+\/]{4})*(?:[A-Za-z0-9+\/]{2}==|[A-Za-z0-9+\/]{3}=)*)" 5 | BASE64_REGEX = re.compile(BASE64_REGEX_STR, flags=re.IGNORECASE) 6 | 7 | 8 | def get_base64_encoding(text): 9 | """get_base64_encoding: Get the first base64 match or None 10 | Args: 11 | text (str): text to check for base64 encoding 12 | Returns: First match in text 13 | """ 14 | return BASE64_REGEX.search(text) 15 | 16 | 17 | def write_base64_to_file(encoding, fpath_out): 18 | """write_base64_to_file: Convert base64 image to file 19 | Args: 20 | encoding (str): base64 encoded string 21 | fpath_out (str): path to file to write 22 | Returns: None 23 | """ 24 | 25 | encoding_match = get_base64_encoding(encoding) 26 | 27 | assert encoding_match, "Error writing to file: Invalid base64 encoding" 28 | 29 | with open(fpath_out, "wb") as target_file: 30 | target_file.write(base64.decodebytes(encoding_match.group(2).encode("utf-8"))) 31 | 32 | 33 | def encode_file_to_base64(fpath_in, prefix): 34 | """encode_file_to_base64: gets base64 encoding of file 35 | Args: 36 | fpath_in (str): path to file to encode 37 | prefix (str): file data for encoding (e.g. 'data:image/png;base64,') 38 | Returns: base64 encoding of file 39 | """ 40 | with open(fpath_in, "rb") as file_obj: 41 | return prefix + base64.b64encode(file_obj.read()).decode("utf-8") 42 | -------------------------------------------------------------------------------- /ricecooker/utils/html_writer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | from ricecooker.utils.downloader import read 5 | 6 | 7 | class HTMLWriter: 8 | """ 9 | Class for writing zipfiles 10 | """ 11 | 12 | zf = None # Zip file to write to 13 | write_to_path = None # Where to write zip file 14 | 15 | def __init__(self, write_to_path, mode="w"): 16 | """Args: write_to_path: (str) where to write zip file""" 17 | self.map = {} # Keeps track of content to write to csv 18 | self.write_to_path = write_to_path # Where to write zip file 19 | self.mode = mode # What mode to open zipfile in 20 | 21 | def __enter__(self): 22 | """Called when opening context (e.g. with HTMLWriter() as writer: )""" 23 | self.open() 24 | return self 25 | 26 | def __exit__(self, type, value, traceback): 27 | """Called when closing context""" 28 | self.close() 29 | 30 | def _write_to_zipfile(self, filename, content): 31 | if not self.contains(filename): 32 | info = zipfile.ZipInfo(filename, date_time=(2013, 3, 14, 1, 59, 26)) 33 | info.comment = "HTML FILE".encode() 34 | info.compress_type = zipfile.ZIP_STORED 35 | info.create_system = 0 36 | self.zf.writestr(info, content) 37 | 38 | def _copy_to_zipfile(self, filepath, arcname=None): 39 | filename = arcname or filepath 40 | if not self.contains(filename): 41 | self.zf.write(filepath, arcname=arcname) 42 | 43 | """ USER-FACING METHODS """ 44 | 45 | def open(self): 46 | """open: Opens zipfile to write to 47 | Args: None 48 | Returns: None 49 | """ 50 | self.zf = zipfile.ZipFile(self.write_to_path, self.mode) 51 | 52 | def close(self): 53 | """close: Close zipfile when done 54 | Args: None 55 | Returns: None 56 | """ 57 | index_present = self.contains("index.html") 58 | self.zf.close() # Make sure zipfile closes no matter what 59 | if not index_present: 60 | raise ReferenceError( 61 | "Invalid Zip at {}: missing index.html file (use write_index_contents method)".format( 62 | self.write_to_path 63 | ) 64 | ) 65 | 66 | def contains(self, filename): 67 | """contains: Checks if filename is in the zipfile 68 | Args: filename: (str) name of file to check 69 | Returns: boolean indicating whether or not filename is in the zip 70 | """ 71 | return filename in self.zf.namelist() 72 | 73 | def write_contents(self, filename, contents, directory=None): 74 | """write_contents: Write contents to filename in zip 75 | Args: 76 | contents: (str) contents of file 77 | filename: (str) name of file in zip 78 | directory: (str) directory in zipfile to write file to (optional) 79 | Returns: path to file in zip 80 | """ 81 | filepath = ( 82 | "{}/{}".format(directory.rstrip("/"), filename) if directory else filename 83 | ) 84 | self._write_to_zipfile(filepath, contents) 85 | return filepath 86 | 87 | def write_file(self, filepath, filename=None, directory=None): 88 | """write_file: Write local file to zip 89 | Args: 90 | filepath: (str) location to local file 91 | directory: (str) directory in zipfile to write file to (optional) 92 | Returns: path to file in zip 93 | 94 | Note: filepath must be a relative path 95 | """ 96 | arcname = None 97 | if filename or directory: 98 | directory = directory.rstrip("/") + "/" if directory else "" 99 | filename = filename or os.path.basename(filepath) 100 | arcname = "{}{}".format(directory, filename) 101 | self._copy_to_zipfile(filepath, arcname=arcname) 102 | return arcname or filepath 103 | 104 | def write_url(self, url, filename, directory=None): 105 | """write_url: Write contents from url to filename in zip 106 | Args: 107 | url: (str) url to file to download 108 | filename: (str) name of file in zip 109 | directory: (str) directory in zipfile to write file to (optional) 110 | Returns: path to file in zip 111 | """ 112 | filepath = ( 113 | "{}/{}".format(directory.rstrip("/"), filename) if directory else filename 114 | ) 115 | if not self.contains(filepath): 116 | self._write_to_zipfile(filepath, read(url)) 117 | return filepath 118 | 119 | def write_index_contents(self, contents): 120 | """write_index_contents: Write main index file to zip 121 | Args: 122 | contents: (str) contents of file 123 | Returns: path to file in zip 124 | """ 125 | self._write_to_zipfile("index.html", contents) 126 | -------------------------------------------------------------------------------- /ricecooker/utils/kolibripreview.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import os 4 | import shutil 5 | import sys 6 | 7 | 8 | def validate(srcdir): 9 | """ 10 | Check if `srcdir` has an index.html in it. 11 | """ 12 | indexpath = os.path.join(srcdir, "index.html") 13 | if not os.path.exists(indexpath): 14 | print("Missing index.html file in", srcdir) 15 | return False 16 | return True 17 | 18 | 19 | def main(args): 20 | """ 21 | Command line utility for previewing HTML5App content in Kolbri. 22 | """ 23 | if not os.path.exists(args.srcdir) or not os.path.isdir(args.srcdir): 24 | print("Error:", args.srcdir, "is not a directory.") 25 | sys.exit(1) 26 | if not validate(args.srcdir): 27 | print("Validation failed; exiting.") 28 | sys.exit(2) 29 | # Write the contents of `srcdir` to `destzip` 30 | destzipbase, _ = os.path.splitext(args.destzip) 31 | shutil.make_archive(destzipbase, "zip", args.srcdir) 32 | 33 | 34 | if __name__ == "__main__": 35 | parser = argparse.ArgumentParser(description=main.__doc__) 36 | parser.add_argument( 37 | "--srcdir", help="HTML5 webroot (source directory)", default="." 38 | ) 39 | parser.add_argument( 40 | "--destzip", 41 | help="Path to a HTML5 zip file in local Kolibri installation", 42 | required=True, 43 | ) 44 | args = parser.parse_args() 45 | main(args) 46 | -------------------------------------------------------------------------------- /ricecooker/utils/paths.py: -------------------------------------------------------------------------------- 1 | import ntpath 2 | import os 3 | from pathlib import Path 4 | 5 | 6 | def dir_exists(filepath): 7 | file_ = Path(filepath) 8 | return file_.is_dir() 9 | 10 | 11 | def file_exists(filepath): 12 | my_file = Path(filepath) 13 | return my_file.is_file() 14 | 15 | 16 | def get_name_from_url(url): 17 | """ 18 | get the filename from a url 19 | url = http://abc.com/xyz.txt 20 | get_name_from_url(url) -> xyz.txt 21 | """ 22 | 23 | head, tail = ntpath.split(url) 24 | params_index = tail.find("&") 25 | if params_index != -1: 26 | tail = tail[:params_index] 27 | params_index = tail.find("?") 28 | if params_index != -1: 29 | tail = tail[:params_index] 30 | 31 | basename = ntpath.basename(url) 32 | params_b_index = basename.find("&") 33 | if params_b_index != -1: 34 | basename = basename[:params_b_index] 35 | return tail or basename 36 | 37 | 38 | def get_name_from_url_no_ext(url): 39 | """ 40 | get the filename without the extension name from a url 41 | url = http://abc.com/xyz.txt 42 | get_name_from_url(url) -> xyz 43 | """ 44 | path = get_name_from_url(url) 45 | return os.path.splitext(path)[0] 46 | 47 | 48 | def build_path(levels): 49 | """ 50 | make a linear directory structure from a list of path levels names 51 | levels = ["chefdir", "trees", "test"] 52 | builds ./chefdir/trees/test/ 53 | """ 54 | path = os.path.join(*levels) 55 | if not dir_exists(path): 56 | os.makedirs(path) 57 | return path 58 | -------------------------------------------------------------------------------- /ricecooker/utils/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | import mimetypes 2 | import os 3 | from copy import deepcopy 4 | from typing import Dict 5 | from typing import Optional 6 | 7 | from .convert import ConversionStageHandler 8 | from .extract_metadata import ExtractMetadataStageHandler 9 | from .file_handler import CompositeHandler 10 | from .transfer import DownloadStageHandler 11 | from ricecooker.utils.pipeline.context import FileMetadata 12 | 13 | 14 | # Do this to prevent import of broken Windows filetype registry that makes guesstype not work. 15 | # https://www.thecodingforums.com/threads/mimetypes-guess_type-broken-in-windows-on-py2-7-and-python-3-x.952693/ 16 | mimetypes.init([os.path.abspath(os.path.join(os.path.dirname(__file__), "mime.types"))]) 17 | 18 | 19 | class FilePipeline(CompositeHandler): 20 | """ 21 | A class to manage a sequence of handlers and execute them in order. 22 | Each handler should be a subclass of Handler. 23 | The pipeline object will store global context that will be passed to each handler, 24 | but will be overridden by the context generated during the course of a file's processing. 25 | 26 | This pipeline can be customized by passing `children` as an argument to the constructor. 27 | 28 | For example to add a custom stage to the pipeline, you can do: 29 | ```python 30 | from ricecooker.utils.pipeline import FilePipeline 31 | from ricecooker.utils.pipeline.custom_stage import CustomStageHandler 32 | pipeline = FilePipeline(children=[CustomStageHandler()]) 33 | ``` 34 | 35 | To just modify one of the existing stages, you can do: 36 | ```python 37 | from ricecooker.utils.pipeline import FilePipeline 38 | from ricecooker.utils.pipeline.convert import ConversionStageHandler 39 | from ricecooker.utils.pipeline.extract_metadata import ExtractMetadataStageHandler 40 | from ricecooker.utils.pipeline.transfer import DownloadStageHandler 41 | from ricecooker.utils.pipeline.transfer import DiskResourceHandler 42 | 43 | download_stage = DownloadStageHandler(children=[DiskResourceHandler()]) 44 | pipeline = FilePipeline(children=[download_stage, ConversionStageHandler(), ExtractMetadataStageHandler()]) 45 | ``` 46 | This will replace the default `DownloadStageHandler` with a new one that has a `DiskResourceHandler` as its only child. 47 | """ 48 | 49 | DEFAULT_CHILDREN = [ 50 | DownloadStageHandler, 51 | ConversionStageHandler, 52 | ExtractMetadataStageHandler, 53 | ] 54 | 55 | def execute( 56 | self, 57 | path: str, 58 | context: Optional[Dict] = None, 59 | skip_cache: Optional[bool] = False, 60 | ) -> list[FileMetadata]: 61 | """ 62 | Execute the pipeline for a given file path. 63 | """ 64 | context = context or {} 65 | file_metadata_list = [FileMetadata(path=path)] 66 | for handler in self._children: 67 | updated_file_metadata_list = [] 68 | for file_metadata in file_metadata_list: 69 | if handler.should_handle(file_metadata.path): 70 | # Pass in any context from the previous handler 71 | scoped_context = deepcopy(context) 72 | scoped_context.update(file_metadata.to_dict()) 73 | # Execute the handler and get the new list of metadata 74 | new_metadata_list = handler.execute( 75 | file_metadata.path, 76 | context=scoped_context, 77 | skip_cache=skip_cache, 78 | ) 79 | for new_metadata in new_metadata_list: 80 | # For each new metadata in the returned list 81 | # make a unique copy of the existing metadata and 82 | # merge the new metadata into the existing metadata 83 | updated_file_metadata_list.append( 84 | file_metadata.merge(new_metadata) 85 | ) 86 | else: 87 | # Otherwise, it's a noop 88 | updated_file_metadata_list.append(file_metadata) 89 | file_metadata_list = updated_file_metadata_list 90 | return file_metadata_list 91 | -------------------------------------------------------------------------------- /ricecooker/utils/pipeline/context.py: -------------------------------------------------------------------------------- 1 | from dataclasses import asdict 2 | from dataclasses import dataclass 3 | from typing import Optional 4 | from typing import Type 5 | 6 | 7 | class AutoDataClassMetaClass(type): 8 | def __new__(mcs, name: str, bases: tuple, namespace: dict) -> Type: 9 | cls = super().__new__(mcs, name, bases, namespace) 10 | return dataclass(frozen=True)(cls) 11 | 12 | 13 | @dataclass 14 | class ContentNodeMetadata: 15 | """ 16 | A dataclass for storing metadata about a content node. 17 | """ 18 | 19 | title: Optional[str] = None 20 | description: Optional[str] = None 21 | thumbnail: Optional[str] = None 22 | license: Optional[str] = None 23 | license_description: Optional[str] = None 24 | author: Optional[str] = None 25 | aggregator: Optional[str] = None 26 | copyright_holder: Optional[str] = None 27 | provider: Optional[str] = None 28 | grade_levels: Optional[list[str]] = None 29 | categories: Optional[list[str]] = None 30 | resource_types: Optional[list[str]] = None 31 | learning_activities: Optional[list[str]] = None 32 | accessibility_labels: Optional[list[str]] = None 33 | learner_needs: Optional[list[str]] = None 34 | role: Optional[str] = None 35 | source_id: Optional[str] = None 36 | kind: Optional[str] = None 37 | extra_fields: Optional[dict] = None 38 | 39 | 40 | def _recursive_update(target, source): 41 | for k, v in source.items(): 42 | if k in target and isinstance(v, dict): 43 | target[k] = _recursive_update(target[k], v) 44 | else: 45 | target[k] = v 46 | return target 47 | 48 | 49 | @dataclass 50 | class FileMetadata: 51 | filename: Optional[str] = None 52 | path: Optional[str] = None 53 | original_filename: Optional[str] = None 54 | language: Optional[str] = None 55 | duration: Optional[int] = None 56 | license: Optional[str] = None 57 | license_description: Optional[str] = None 58 | preset: Optional[str] = None 59 | content_node_metadata: Optional[ContentNodeMetadata] = None 60 | 61 | def to_dict(self): 62 | return asdict( 63 | self, dict_factory=lambda x: {k: v for k, v in x if v is not None} 64 | ) 65 | 66 | def merge(self, other): 67 | """ 68 | Create a new FileMetadata object by the result of overwriting self 69 | fields with other fields when defined. 70 | """ 71 | new_dict = _recursive_update(self.to_dict(), other.to_dict()) 72 | return self.__class__(**new_dict) 73 | 74 | 75 | class ContextMetadata(metaclass=AutoDataClassMetaClass): 76 | def to_dict(self): 77 | return asdict(self) 78 | -------------------------------------------------------------------------------- /ricecooker/utils/pipeline/exceptions.py: -------------------------------------------------------------------------------- 1 | class NoOperationRequiredException(Exception): 2 | pass 3 | 4 | 5 | class InvalidFileException(Exception): 6 | pass 7 | 8 | 9 | class ExpectedFileException(Exception): 10 | pass 11 | -------------------------------------------------------------------------------- /ricecooker/utils/pipeline/extract_metadata.py: -------------------------------------------------------------------------------- 1 | from le_utils.constants import file_formats 2 | from le_utils.constants import format_presets 3 | 4 | from .file_handler import ExtensionMatchingHandler 5 | from .file_handler import StageHandler 6 | from ricecooker.utils.pipeline.context import ContentNodeMetadata 7 | from ricecooker.utils.pipeline.context import FileMetadata 8 | from ricecooker.utils.utils import extract_path_ext 9 | from ricecooker.utils.videos import extract_duration_of_media 10 | from ricecooker.utils.videos import guess_video_preset_by_resolution 11 | 12 | 13 | PRESETS_FROM_EXTENSIONS = { 14 | file_formats.MP3: format_presets.AUDIO, 15 | file_formats.EPUB: format_presets.EPUB, 16 | file_formats.PDF: format_presets.DOCUMENT, 17 | file_formats.H5P: format_presets.H5P_ZIP, 18 | file_formats.BLOOMPUB: format_presets.BLOOMPUB, 19 | file_formats.BLOOMD: format_presets.BLOOMPUB, 20 | file_formats.HTML5: format_presets.HTML5_ZIP, 21 | } 22 | 23 | KIND_FROM_PRESET = {p.id: p.kind for p in format_presets.PRESETLIST} 24 | 25 | 26 | class MetadataExtractor(ExtensionMatchingHandler): 27 | def infer_metadata(self, path): 28 | return {} 29 | 30 | def infer_preset(self, path): 31 | ext = extract_path_ext(path) 32 | return PRESETS_FROM_EXTENSIONS.get(ext) 33 | 34 | def handle_file(self, path): 35 | metadata = self.infer_metadata(path) 36 | preset = self.infer_preset(path) 37 | if preset: 38 | metadata["preset"] = preset 39 | kind = KIND_FROM_PRESET.get(preset) 40 | if kind: 41 | metadata["content_node_metadata"] = metadata.get( 42 | "content_node_metadata", ContentNodeMetadata() 43 | ) 44 | metadata["content_node_metadata"].kind = kind 45 | return FileMetadata(**metadata) 46 | 47 | 48 | class MediaMetadataExtractorMixin: 49 | def infer_metadata(self, path): 50 | return { 51 | "duration": extract_duration_of_media(path, extract_path_ext(path)), 52 | } 53 | 54 | 55 | class AudioMetadataExtractor(MediaMetadataExtractorMixin, MetadataExtractor): 56 | EXTENSIONS = {file_formats.MP3} 57 | 58 | 59 | class EPUBMetadataExtractor(MetadataExtractor): 60 | EXTENSIONS = {file_formats.EPUB} 61 | 62 | 63 | class PDFMetadataExtractor(MetadataExtractor): 64 | EXTENSIONS = {file_formats.PDF} 65 | 66 | 67 | class HTML5MetadataExtractor(MetadataExtractor): 68 | EXTENSIONS = {file_formats.HTML5} 69 | 70 | 71 | class H5PMetadataExtractor(MetadataExtractor): 72 | EXTENSIONS = {file_formats.H5P} 73 | 74 | 75 | class BloomPubMetadataExtractor(MetadataExtractor): 76 | EXTENSIONS = {file_formats.BLOOMPUB, file_formats.BLOOMD} 77 | 78 | 79 | class VideoMetadataExtractor(MediaMetadataExtractorMixin, MetadataExtractor): 80 | EXTENSIONS = {file_formats.MP4, file_formats.WEBM} 81 | 82 | def infer_preset(self, path): 83 | return guess_video_preset_by_resolution(path) 84 | 85 | 86 | class ExtractMetadataStageHandler(StageHandler): 87 | STAGE = "EXTRACT_METADATA" 88 | DEFAULT_CHILDREN = [ 89 | AudioMetadataExtractor, 90 | EPUBMetadataExtractor, 91 | PDFMetadataExtractor, 92 | H5PMetadataExtractor, 93 | HTML5MetadataExtractor, 94 | BloomPubMetadataExtractor, 95 | VideoMetadataExtractor, 96 | ] 97 | -------------------------------------------------------------------------------- /ricecooker/utils/tokens.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | try: # to support Python 2.x. 5 | input = raw_input 6 | except NameError: 7 | pass 8 | 9 | 10 | from ricecooker import config 11 | 12 | 13 | def get_env(envvar): 14 | """ 15 | Reads an environment variable `envvar` if it is defined or returns None. 16 | """ 17 | if envvar not in os.environ: 18 | return None 19 | else: 20 | return os.environ[envvar] 21 | 22 | 23 | def get_content_curation_token(args_token): 24 | """ 25 | Get the token through one of four possible ways. Input `args_token` can be 26 | 1. path to a token-containing file (path) 27 | 2. actual token (str) in which case there's nothing to get just pass along 28 | 3. `#` (default value when no --token is given on command line) 29 | 3a. if environment variable STUDIO_TOKEN exists, we'll use that 30 | 3b. else we prompt the user interactively 31 | """ 32 | if args_token != "#": # retrieval methods 1, 2 33 | if os.path.isfile(args_token): 34 | with open(args_token, "r") as fobj: 35 | return fobj.read().strip() 36 | else: 37 | return args_token 38 | else: # retrieval strategies 3 39 | token = get_env("STUDIO_TOKEN") or get_env("CONTENT_CURATION_TOKEN") 40 | if token is not None: 41 | return token # 3a 42 | else: 43 | return prompt_token(config.DOMAIN) # 3b 44 | 45 | 46 | def prompt_token(domain): 47 | """ 48 | Prompt user to enter content curation server authentication token. 49 | Args: domain (str): domain to authenticate user 50 | Returns: token 51 | """ 52 | token = input("\nEnter content curation server token ('q' to quit): ").lower() 53 | if token == "q": 54 | sys.exit() 55 | else: 56 | return token.strip() 57 | 58 | 59 | # SUSHI_BAR_TOKEN = get_env('SUSHI_BAR_TOKEN') # TODO in near future 60 | -------------------------------------------------------------------------------- /ricecooker/utils/utils.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import re 4 | import shutil 5 | from urllib.parse import urlparse 6 | 7 | from ricecooker import config 8 | 9 | 10 | VALID_UUID_REGEX = re.compile("^([a-f0-9]{32})$") 11 | 12 | 13 | def is_valid_uuid_string(uuid_str): 14 | """ 15 | Check if a string is a valid UUID. 16 | """ 17 | return isinstance(uuid_str, str) and VALID_UUID_REGEX.match(uuid_str) 18 | 19 | 20 | def make_dir_if_needed(path): 21 | """ 22 | Check if the dir exists, and if not, create it. If the directory exists, just return it 23 | rather than throwing an error. 24 | 25 | :param path: A string representing a directory on disk. 26 | :return: A path to the directory that is guaranteed to exist. 27 | """ 28 | 29 | if not os.path.exists(path): 30 | os.makedirs(path) 31 | return path 32 | 33 | 34 | class VideoURLFormatError(Exception): 35 | def __init__(self, url, expected_format): 36 | self.message = ( 37 | "The video at {} does not appear to be a proper {} video URL.".format( 38 | url, expected_format 39 | ) 40 | ) 41 | 42 | 43 | def extract_path_ext(path, default_ext=None): 44 | """ 45 | Extract file extension (without dot) from `path` or return `default_ext` if 46 | path does not contain a valid extension. 47 | """ 48 | path = urlparse(path).path 49 | _, ext = os.path.splitext(path) 50 | # Remove the leading "." from the extension 51 | ext = ext[1:] if ext else ext 52 | if not ext and default_ext: 53 | ext = default_ext 54 | if not ext: 55 | raise ValueError("No extension in path {} and default_ext is None".format(path)) 56 | return ext.lower() 57 | 58 | 59 | def get_hash(filepath): 60 | file_hash = hashlib.md5() 61 | with open(filepath, "rb") as fobj: 62 | for chunk in iter(lambda: fobj.read(2097152), b""): 63 | file_hash.update(chunk) 64 | return file_hash.hexdigest() 65 | 66 | 67 | def is_valid_url(path): 68 | """ 69 | Return `True` if path is a valid URL, else `False` if path is a local path. 70 | """ 71 | parts = urlparse(path) 72 | return parts.scheme != "" and parts.netloc != "" 73 | 74 | 75 | def copy_file_to_storage(srcfilename, ext=None): 76 | """ 77 | Copy `srcfilename` (filepath) to destination. 78 | :rtype: None 79 | """ 80 | if ext is None: 81 | ext = extract_path_ext(srcfilename) 82 | 83 | hash = get_hash(srcfilename) 84 | filename = "{}.{}".format(hash, ext) 85 | try: 86 | shutil.copy(srcfilename, config.get_storage_path(filename)) 87 | except shutil.SameFileError: 88 | pass 89 | 90 | return filename 91 | -------------------------------------------------------------------------------- /ricecooker/utils/web.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains tools for parsing and handling HTML and other web content. 3 | Note that we could not use html for the module name as recent versions of Python 4 | include their own html module. 5 | """ 6 | import os 7 | 8 | from bs4 import BeautifulSoup 9 | 10 | 11 | class HTMLParser: 12 | """ 13 | HTMLParser contains a set of functions for parsing, scraping, and updating an HTML page. 14 | """ 15 | 16 | def __init__(self, filename=None, html=None): 17 | self.filename = filename 18 | self.html = html 19 | self.link_tags = { 20 | "a": "href", 21 | "audio": "src", 22 | "img": "src", 23 | "link": "href", 24 | "script": "src", 25 | } 26 | 27 | def get_links(self): 28 | """ 29 | Retrieves all links contained within the page. 30 | 31 | :return: A list of local and remote URLs in the page. 32 | """ 33 | basename = None 34 | if self.html is None: 35 | basename = os.path.basename(self.filename) 36 | self.html = open(self.filename).read() 37 | soup = BeautifulSoup(self.html, "html.parser") 38 | 39 | extracted_links = [] 40 | for tag_name in self.link_tags: 41 | tags = soup.find_all(tag_name) 42 | for tag in tags: 43 | link = tag.get(self.link_tags[tag_name]) 44 | # don't include links to ourselves or # links 45 | # TODO: Should this part be moved to get_local_files instead? 46 | if ( 47 | link 48 | and (basename and not link.startswith(basename)) 49 | and not link.strip().startswith("#") 50 | ): 51 | if "?" in link: 52 | link, query = link.split("?") 53 | if "#" in link: 54 | link, marker = link.split("#") 55 | extracted_links.append(link) 56 | 57 | return extracted_links 58 | 59 | def get_local_files(self): 60 | """ 61 | Returns a list of files that are contained in the same directory as the HTML page or in its subdirectories. 62 | 63 | :return: A list of local files 64 | """ 65 | links = self.get_links() 66 | local_links = [] 67 | for link in links: 68 | # NOTE: This technically fails to handle file:// URLs, but we're highly unlikely to see 69 | # file:// URLs in any distributed package, so this is simpler than parsing out the protocol. 70 | if "://" not in link: 71 | local_links.append(link) 72 | 73 | return local_links 74 | 75 | def replace_links(self, links_to_replace): 76 | """ 77 | Updates page links using the passed in replacement dictionary. 78 | 79 | :param links_to_replace: A dictionary of OriginalURL -> ReplacementURL key value pairs. 80 | :return: An HTML string of the page with all links replaced. 81 | """ 82 | if self.html is None: 83 | self.html = open(self.filename).read() 84 | soup = BeautifulSoup(self.html, "html.parser") 85 | 86 | for tag_name in self.link_tags: 87 | tags = soup.find_all(tag_name) 88 | for tag in tags: 89 | link = tag.get(self.link_tags[tag_name]) 90 | if link in links_to_replace: 91 | tag[self.link_tags[tag_name]] = links_to_replace[link] 92 | 93 | return soup.prettify() 94 | -------------------------------------------------------------------------------- /ricecooker/utils/zip.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | import zipfile 4 | 5 | 6 | def _read_file(path): 7 | with open(path, "rb") as f: 8 | return f.read() 9 | 10 | 11 | def create_predictable_zip(path, entrypoint=None, file_converter=None): 12 | """ 13 | Create a zip file with predictable sort order and metadata so that MD5 will 14 | stay consistent if zipping the same content twice. 15 | Args: 16 | path (str): absolute path either to a directory to zip up, or an existing zip file to convert. 17 | entrypoint (str or None): if specified, a relative file path in the zip to serve as the first page to load 18 | Returns: path (str) to the output zip file 19 | """ 20 | extension = "zip" 21 | # if path is a directory, recursively enumerate all the files under the directory 22 | if os.path.isdir(path): 23 | paths = [] 24 | 25 | for root, directories, filenames in os.walk(path): 26 | paths += [ 27 | os.path.join(root, filename)[len(path) + 1 :] for filename in filenames 28 | ] 29 | 30 | def reader(x): 31 | return _read_file(os.path.join(path, x)) 32 | 33 | # otherwise, if it's a zip file, open it up and pull out the list of names 34 | elif os.path.isfile(path): 35 | extension = os.path.splitext(path)[1] 36 | inputzip = zipfile.ZipFile(path) 37 | paths = inputzip.namelist() 38 | 39 | def reader(x): 40 | return inputzip.read(x) 41 | 42 | # create a temporary zip file path to write the output into 43 | zippathfd, zippath = tempfile.mkstemp(suffix=".{}".format(extension)) 44 | 45 | with zipfile.ZipFile(zippath, "w", compression=zipfile.ZIP_DEFLATED) as outputzip: 46 | # loop over the file paths in sorted order, to ensure a predictable zip 47 | for filepath in sorted(paths): 48 | write_file_to_zip_with_neutral_metadata( 49 | outputzip, 50 | filepath, 51 | file_converter(filepath, reader) 52 | if file_converter 53 | else reader(filepath), 54 | ) 55 | os.fdopen(zippathfd).close() 56 | return zippath 57 | 58 | 59 | def write_file_to_zip_with_neutral_metadata(zfile, filepath, content): 60 | """ 61 | Write the string `content` to `filepath` in the open ZipFile `zfile`. 62 | Args: 63 | zfile (ZipFile): open ZipFile to write the content into 64 | filepath (str): the file path within the zip file to write into 65 | content (str): the content to write into the zip 66 | Returns: None 67 | """ 68 | # Convert any windows file separators to unix style for consistent 69 | # file paths in the zip file 70 | filepath = filepath.replace("\\", "/") 71 | info = zipfile.ZipInfo(filepath, date_time=(2015, 10, 21, 7, 28, 0)) 72 | info.compress_type = zipfile.ZIP_DEFLATED 73 | info.comment = "".encode() 74 | info.create_system = 0 75 | zfile.writestr(info, content) 76 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.1.0 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | search = version='{current_version}' 8 | replace = version='{new_version}' 9 | 10 | [bumpversion:file:ricecooker/__init__.py] 11 | search = __version__ = '{current_version}' 12 | replace = __version__ = '{new_version}' 13 | 14 | [bdist_wheel] 15 | universal = 1 16 | 17 | [flake8] 18 | exclude = docs, **/site-packages/**, examples 19 | ignore = E226,E203,E41,W503,E741 20 | max-line-length = 160 21 | max-complexity = 10 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from setuptools import find_packages 4 | from setuptools import setup 5 | 6 | import ricecooker 7 | 8 | 9 | readme = open("README.md").read() 10 | 11 | with open("docs/history.rst") as history_file: 12 | history = history_file.read() 13 | 14 | setup( 15 | name="ricecooker", 16 | version=ricecooker.__version__, 17 | description="API for adding content to the Kolibri content curation server", 18 | long_description=readme + "\n\n" + history, 19 | long_description_content_type="text/markdown", 20 | author="Learning Equality", 21 | author_email="dev@learningequality.org", 22 | url="https://github.com/learningequality/ricecooker", 23 | packages=find_packages(), 24 | package_dir={"ricecooker": "ricecooker"}, 25 | entry_points={ 26 | "console_scripts": [ 27 | "corrections = ricecooker.utils.corrections:correctionsmain", 28 | ] 29 | }, 30 | include_package_data=True, 31 | install_requires=[ 32 | "requests>=2.11.1", 33 | "le_utils>=0.2.10", 34 | "requests_file", 35 | "beautifulsoup4>=4.6.3,<4.9.0", # pinned to match versions in le-pycaption 36 | "selenium==4.31.0", 37 | "yt-dlp>=2024.12.23", 38 | "html5lib", 39 | "cachecontrol==0.14.3", 40 | "filelock==3.18.0", # This is needed, but not specified as a dependency by cachecontrol 41 | "css-html-js-minify==2.5.5", 42 | "pypdf2==1.26.0", 43 | "dictdiffer>=0.8.0", 44 | "Pillow==11.2.1", 45 | "colorlog>=4.1.0,<6.9", 46 | "chardet==5.2.0", 47 | "ffmpy>=0.2.2", 48 | "pdf2image==1.17.0", 49 | "le-pycaption>=2.2.0a1", 50 | "EbookLib>=0.17.1", 51 | "filetype>=1.1.0", 52 | "urllib3==2.4.0", 53 | "langcodes[data]==3.5.0", 54 | ], 55 | extras_require={ 56 | "test": [ 57 | "requests-cache==1.2.1", 58 | "pytest==8.3.5", 59 | "pytest-env==1.1.5", 60 | "vcrpy==7.0.0; python_version >='3.10'", 61 | "mock==5.2.0", 62 | ], 63 | "dev": [ 64 | "pre-commit>=4.1.0", 65 | ], 66 | "google_drive": ["google-api-python-client", "google-auth"], 67 | }, 68 | python_requires=">=3.9, <3.13", 69 | license="MIT license", 70 | zip_safe=False, 71 | keywords="ricecooker", 72 | classifiers=[ 73 | "Intended Audience :: Developers", 74 | "Development Status :: 5 - Production/Stable", 75 | "License :: OSI Approved :: MIT License", 76 | "Programming Language :: Python :: 3.9", 77 | "Programming Language :: Python :: 3.10", 78 | "Programming Language :: Python :: 3.11", 79 | "Programming Language :: Python :: 3.12", 80 | "Natural Language :: English", 81 | "Topic :: Education", 82 | ], 83 | test_suite="tests", 84 | ) 85 | -------------------------------------------------------------------------------- /tests/chefs/fake_chef.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/chefs/fake_chef.py -------------------------------------------------------------------------------- /tests/media_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/__init__.py -------------------------------------------------------------------------------- /tests/media_utils/files/Wilhelm_Scream.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/Wilhelm_Scream.mp3 -------------------------------------------------------------------------------- /tests/media_utils/files/assets/css/empty.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/assets/css/empty.css -------------------------------------------------------------------------------- /tests/media_utils/files/assets/css/empty2.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/assets/css/empty2.css -------------------------------------------------------------------------------- /tests/media_utils/files/assets/images/4933759886_098e9acf93_m.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/assets/images/4933759886_098e9acf93_m.jpg -------------------------------------------------------------------------------- /tests/media_utils/files/assets/images/copyright.txt: -------------------------------------------------------------------------------- 1 | File: 4933759886_098e9acf93_m.jpg 2 | Source: https://flic.kr/p/8vYNVC 3 | License: CC BY 2.0 4 | -------------------------------------------------------------------------------- /tests/media_utils/files/assets/js/empty.js: -------------------------------------------------------------------------------- 1 | // regex to match folder called examples and any subfolders 2 | -------------------------------------------------------------------------------- /tests/media_utils/files/audio/file_example_MP3_700KB.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/audio/file_example_MP3_700KB.mp3 -------------------------------------------------------------------------------- /tests/media_utils/files/file_metadata.txt: -------------------------------------------------------------------------------- 1 | Wilhelm_Scream.mp3: public domain, retrieved from https://en.wikipedia.org/wiki/File:Wilhelm_Scream.ogg 2 | -------------------------------------------------------------------------------- /tests/media_utils/files/generate_thumbnail/sample.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/generate_thumbnail/sample.epub -------------------------------------------------------------------------------- /tests/media_utils/files/generate_thumbnail/sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/generate_thumbnail/sample.pdf -------------------------------------------------------------------------------- /tests/media_utils/files/generate_thumbnail/sample.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/generate_thumbnail/sample.zip -------------------------------------------------------------------------------- /tests/media_utils/files/kepub.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/kepub.epub -------------------------------------------------------------------------------- /tests/media_utils/files/page_with_links.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | A Link Between Webs 9 | 10 | 11 |
    12 | Home | jQuery Tutorial 13 |
    14 | 15 |

    And now for something completely different...

    16 |

    Learning not equal? We can help!

    17 |

    This is not a/link/to/index.html

    18 |

    http://shouldntbeextracted.com

    19 | 20 |

    jQuery for ubernerds, chapter 1.

    21 |

    Importing jQuery via script tag:

    22 |
    23 |
    24 |             <script src="jquery.js"></script>
    25 |         
    26 | 27 |
    28 | 29 | 30 | -------------------------------------------------------------------------------- /tests/media_utils/files/subtitles/empty.ttml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 15 |

    16 | ( clock ticking ) 17 |

    18 |

     

    19 |

    20 | MAN:
    21 | When we think
    22 | \u266a ...say bow, wow, \u266a 23 |

    24 |

    25 | we have this vision of Einstein 26 |

    27 |

    28 |
    29 | as an old, wrinkly man
    30 | with white hair. 31 |

    32 |

    33 | MAN 2:
    34 | E equals m c-squared is
    35 | not about an old Einstein. 36 |

    37 |

    38 | MAN 2:
    39 | It's all about an eternal Einstein. 40 |

    41 |

    42 | <LAUGHING & WHOOPS!> 43 |

    44 | 45 | -------------------------------------------------------------------------------- /tests/media_utils/files/subtitles/encapsulated.vtt: -------------------------------------------------------------------------------- 1 | WEBVTT 2 | 3 | 00:09.209 --> 00:12.312 4 | ( clock ticking ) 5 | 6 | 00:14.848 --> 00:17.000 7 | MAN: 8 | When we think 9 | \u266a ...say bow, wow, \u266a 10 | 11 | 00:17.000 --> 00:18.752 align:right 12 | we have this vision of Einstein 13 | 14 | 00:18.752 --> 00:20.887 15 |   16 | as an old, wrinkly man 17 | with white hair. 18 | 19 | 00:20.887 --> 00:26.760 20 | MAN 2: 21 | E equals m c-squared is 22 | not about an old Einstein. 23 | 24 | 00:26.760 --> 00:32.200 25 | MAN 2: 26 | It's all about an eternal Einstein. 27 | 28 | 00:32.200 --> 00:36.200 29 | <LAUGHING & WHOOPS!> 30 | -------------------------------------------------------------------------------- /tests/media_utils/files/subtitles/not.txt: -------------------------------------------------------------------------------- 1 | This file doesn't contain subtitles nor isn't it a subtitle format. 2 | -------------------------------------------------------------------------------- /tests/media_utils/files/thumbnails/BRAlogo1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/thumbnails/BRAlogo1.png -------------------------------------------------------------------------------- /tests/media_utils/files/thumbnails/toosquare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/thumbnails/toosquare.png -------------------------------------------------------------------------------- /tests/media_utils/files/thumbnails/tootall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/thumbnails/tootall.png -------------------------------------------------------------------------------- /tests/media_utils/files/thumbnails/toowide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/thumbnails/toowide.png -------------------------------------------------------------------------------- /tests/media_utils/test_audio.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import atexit 4 | import os 5 | import tempfile 6 | 7 | import pytest 8 | import requests_cache 9 | from conftest import download_fixture_file 10 | 11 | from ricecooker.utils import audio 12 | from ricecooker.utils import videos 13 | 14 | 15 | # cache, so we don't keep requesting the full audio 16 | requests_cache.install_cache("audio_cache") 17 | 18 | 19 | # FIXTURES 20 | ################################################################################ 21 | 22 | 23 | @pytest.fixture 24 | def audio_file(): 25 | source_url = "https://archive.org/download/sound247/sound247.mp3" 26 | local_path = os.path.abspath( 27 | os.path.join( 28 | os.path.dirname(__file__), 29 | "..", 30 | "testcontent", 31 | "downloaded", 32 | "audio_media_test.mp3", 33 | ) 34 | ) 35 | download_fixture_file(source_url, local_path) 36 | assert os.path.exists(local_path) 37 | f = open(local_path, "rb") 38 | f.close() 39 | return f # returns a closed file descriptor which we use for name attribute 40 | 41 | 42 | @pytest.fixture 43 | def bad_audio(): 44 | with TempFile(suffix=".mp3") as f: 45 | f.write(b"noaudiohere. ffmpeg soshould error") 46 | f.flush() 47 | return f # returns a temporary file with a closed file descriptor 48 | 49 | 50 | # TESTS 51 | ################################################################################ 52 | 53 | 54 | class Test_compress_video: 55 | def test_compression_works(self, audio_file): 56 | duration = videos.extract_duration_of_media(audio_file.name, "mp3") 57 | with TempFile(suffix=".mp3") as vout: 58 | audio.compress_audio(audio_file.name, vout.name, overwrite=True) 59 | compressed_duration = videos.extract_duration_of_media(vout.name, "mp3") 60 | assert duration == compressed_duration 61 | 62 | def test_raises_for_bad_file(self, bad_audio): 63 | with TempFile(suffix=".mp4") as vout: 64 | with pytest.raises(audio.AudioCompressionError): 65 | audio.compress_audio(bad_audio.name, vout.name, overwrite=True) 66 | 67 | 68 | # Helper class for cross-platform temporary files 69 | ################################################################################ 70 | 71 | 72 | def remove_temp_file(*args, **kwargs): 73 | filename = args[0] 74 | try: 75 | os.remove(filename) 76 | except FileNotFoundError: 77 | pass 78 | assert not os.path.exists(filename) 79 | 80 | 81 | class TempFile(object): 82 | """ 83 | tempfile.NamedTemporaryFile deletes the file as soon as the filehandle is closed. 84 | This is OK on unix but on Windows the file can't be used by other commands 85 | (i.e. ffmpeg) unti the file is closed. 86 | Temporary files are instead deleted when we quit. 87 | """ 88 | 89 | def __init__(self, *args, **kwargs): 90 | # all parameters will be passed to NamedTemporaryFile 91 | self.args = args 92 | self.kwargs = kwargs 93 | 94 | def __enter__(self): 95 | # create a temporary file as per usual, but set it up to be deleted once we're done 96 | self.f = tempfile.NamedTemporaryFile(*self.args, delete=False, **self.kwargs) 97 | atexit.register(remove_temp_file, self.f.name) 98 | return self.f 99 | 100 | def __exit__(self, _type, value, traceback): 101 | self.f.close() 102 | -------------------------------------------------------------------------------- /tests/media_utils/test_proxy.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from ricecooker.utils import proxy 6 | from ricecooker.utils.youtube import YouTubeResource 7 | 8 | 9 | YOUTUBE_TEST_VIDEO = "https://www.youtube.com/watch?v=C0DPdy98e4c" 10 | YOUTUBE_TEST_PLAYLIST = "https://www.youtube.com/playlist?list=PL472BC6F4F2C3ABEF" 11 | 12 | 13 | # This test takes a few minutes, but is very useful for checking that the proxy is not being ignored, 14 | # so mark it to run when the PYTEST_RUN_SLOW env var is set. 15 | @pytest.mark.skipif( 16 | "PYTEST_RUN_SLOW" not in os.environ, 17 | reason="This test takes several minutes to complete.", 18 | ) 19 | def test_bad_proxies_get_banned(tmp_path): 20 | # create some fake proxies... 21 | FAKE_PROXIES = [ 22 | "122.123.123.123:1234", 23 | "142.123.1.234:123345", 24 | "156.245.233.211:12323", 25 | "11.22.33.44:123", 26 | ] 27 | # initialize PROXY_LIST to known-bad proxies to check that they get banned 28 | proxy.PROXY_LIST = FAKE_PROXIES.copy() 29 | 30 | video = YouTubeResource(YOUTUBE_TEST_VIDEO) 31 | video.download(tmp_path) 32 | 33 | # Fake proxies should get added to BROKEN_PROXIES 34 | assert set(FAKE_PROXIES).issubset(set(proxy.BROKEN_PROXIES)) 35 | 36 | 37 | @pytest.mark.skipif( 38 | "PYTEST_RUN_SLOW" not in os.environ, 39 | reason="This test can take several minutes to complete.", 40 | ) 41 | def test_proxy_download(tmp_path): 42 | proxy.get_proxies(refresh=True) 43 | assert len(proxy.PROXY_LIST) > 1 44 | 45 | video = YouTubeResource(YOUTUBE_TEST_VIDEO) 46 | video.download(tmp_path) 47 | 48 | temp_files = os.listdir(os.path.join(tmp_path, "Watch")) 49 | has_video = False 50 | for afile in temp_files: 51 | if afile.endswith(".mp4"): 52 | has_video = True 53 | 54 | assert has_video, "Video file not found" 55 | 56 | 57 | @pytest.mark.skipif( 58 | "PYTEST_RUN_SLOW" not in os.environ, 59 | reason="This test can take several minutes to complete.", 60 | ) 61 | def test_proxy_playlist_download(tmp_path): 62 | playlist = YouTubeResource(YOUTUBE_TEST_PLAYLIST) 63 | playlist.download(tmp_path) 64 | 65 | temp_files = os.listdir(os.path.join(tmp_path, "Playlist")) 66 | expected = [ 67 | "zbkizy-Y3qw.jpg", 68 | "oXnzstpBEOg.mp4", 69 | "oXnzstpBEOg.jpg", 70 | "zbkizy-Y3qw.mp4", 71 | ] 72 | 73 | assert set(temp_files) == set(expected) 74 | -------------------------------------------------------------------------------- /tests/media_utils/test_web.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ricecooker.utils import web 4 | 5 | test_dir = os.path.dirname(__file__) 6 | 7 | 8 | def test_get_links(): 9 | filename = os.path.abspath(os.path.join(test_dir, "files", "page_with_links.html")) 10 | parser = web.HTMLParser(filename) 11 | links = parser.get_links() 12 | 13 | expected_links = [ 14 | "assets/css/empty.css", 15 | "assets/css/empty2.css", 16 | "assets/js/empty.js", 17 | "assets/images/4933759886_098e9acf93_m.jpg", 18 | "the_spanish_inquisition.html", 19 | "http://www.learningequality.org", 20 | "Wilhelm_Scream.mp3", 21 | ] 22 | 23 | # make sure the link order is the same to do an equality test 24 | links.sort() 25 | expected_links.sort() 26 | 27 | assert links == expected_links 28 | 29 | 30 | def test_get_local_files(): 31 | filename = os.path.abspath(os.path.join(test_dir, "files", "page_with_links.html")) 32 | parser = web.HTMLParser(filename) 33 | links = parser.get_local_files() 34 | 35 | expected_links = [ 36 | "assets/css/empty.css", 37 | "assets/css/empty2.css", 38 | "assets/js/empty.js", 39 | "assets/images/4933759886_098e9acf93_m.jpg", 40 | "the_spanish_inquisition.html", 41 | "Wilhelm_Scream.mp3", 42 | ] 43 | 44 | # make sure the link order is the same to do an equality test 45 | links.sort() 46 | expected_links.sort() 47 | 48 | assert links == expected_links 49 | 50 | 51 | def test_replace_links(): 52 | filename = os.path.abspath(os.path.join(test_dir, "files", "page_with_links.html")) 53 | parser = web.HTMLParser(filename) 54 | 55 | original_links = [ 56 | "assets/css/empty.css", 57 | "assets/css/empty2.css", 58 | "assets/js/empty.js", 59 | "assets/images/4933759886_098e9acf93_m.jpg", 60 | "the_spanish_inquisition.html", 61 | "Wilhelm_Scream.mp3", 62 | ] 63 | 64 | replacement_links = {} 65 | for link in original_links: 66 | replacement_links[link] = "/zipcontent/012343545454645454/{}".format(link) 67 | 68 | new_html = parser.replace_links(replacement_links) 69 | 70 | new_parser = web.HTMLParser(html=new_html) 71 | links = new_parser.get_links() 72 | 73 | for link in links: 74 | assert link == replacement_links[link] 75 | -------------------------------------------------------------------------------- /tests/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/pipeline/__init__.py -------------------------------------------------------------------------------- /tests/test_argparse.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import pytest 4 | from mock import patch 5 | 6 | from ricecooker.chefs import SushiChef 7 | from ricecooker.exceptions import InvalidUsageException 8 | 9 | 10 | @pytest.fixture 11 | def cli_args_and_expected(): 12 | defaults = { 13 | "command": "uploadchannel", 14 | "update": False, 15 | "verbose": True, 16 | "debug": False, 17 | "warn": False, 18 | "quiet": False, 19 | "compress": False, 20 | "thumbnails": False, 21 | "download_attempts": 3, 22 | "resume": False, 23 | "step": "LAST", 24 | "prompt": False, 25 | "reset_deprecated": False, 26 | "stage": True, 27 | "stage_deprecated": False, 28 | "publish": False, 29 | "sample": None, 30 | } 31 | return [ 32 | { # this used to be the old recommended CLI args to run chefs 33 | "cli_input": "./sushichef.py -v --reset --token=letoken", 34 | "expected_args": dict(defaults, token="letoken", reset_deprecated=True), 35 | "expected_options": {}, 36 | }, 37 | { # nowadays we've changed the CLI defaults so don't need to specify these 38 | "cli_input": "./sushichef.py --token=letoken", 39 | "expected_args": dict(defaults, token="letoken"), 40 | "expected_options": {}, 41 | }, 42 | { 43 | "cli_input": "./sushichef.py --token=letoken --resume --step=START_UPLOAD", 44 | "expected_args": dict( 45 | defaults, token="letoken", resume=True, step="START_UPLOAD" 46 | ), 47 | "expected_options": {}, 48 | }, 49 | { 50 | "cli_input": "./sushichef.py --token=letoken lang=fr", 51 | "expected_args": dict(defaults, token="letoken"), 52 | "expected_options": dict(lang="fr"), 53 | }, 54 | { 55 | "cli_input": "./sushichef.py --token=letoken somethin=else extrakey=extraval", 56 | "expected_args": dict(defaults, token="letoken"), 57 | "expected_options": dict(somethin="else", extrakey="extraval"), 58 | }, 59 | { 60 | "cli_input": ( 61 | "./sushichef.py -uv --warn --compress --download-attempts=4 " 62 | "--token=besttokenever --resume --step=PUBLISH_CHANNEL --prompt --deploy --publish" 63 | ), 64 | "expected_args": dict( 65 | defaults, 66 | update=True, 67 | warn=True, 68 | compress=True, 69 | download_attempts=4, 70 | token="besttokenever", 71 | resume=True, 72 | step="PUBLISH_CHANNEL", 73 | prompt=True, 74 | stage=False, 75 | publish=True, 76 | ), 77 | "expected_options": {}, 78 | }, 79 | ] 80 | 81 | 82 | def chef_arg_parser(cli_input): 83 | """ 84 | Takes a string `cli_input` and parses it using the SushiChef arg parser. 85 | Returns tuple of args and options. 86 | """ 87 | test_argv = cli_input.split(" ") 88 | with patch.object(sys, "argv", test_argv): 89 | chef = SushiChef() 90 | args, options = chef.parse_args_and_options() 91 | assert args is not None, "argparse parsing failed" 92 | return args, options 93 | 94 | 95 | """ *********** CLI ARGUMENTS TESTS *********** """ 96 | 97 | 98 | def test_basic_command_line_args_and_options(cli_args_and_expected): 99 | for case in cli_args_and_expected: 100 | cli_input = case["cli_input"] 101 | expected_args = case["expected_args"] 102 | expected_options = case["expected_options"] 103 | 104 | args, options = chef_arg_parser(cli_input) 105 | 106 | # print('observed', args, options) 107 | # print('expected', expected_args, expected_options) 108 | 109 | for arg, val in expected_args.items(): 110 | assert args[arg] == val 111 | for opt, val in expected_options.items(): 112 | assert options[opt] == val 113 | 114 | 115 | def test_cannot_publish_without_deploy(): 116 | bad_cli_input = "./sushichef.py --token=letoken --publish" 117 | with pytest.raises(InvalidUsageException): 118 | args, options = chef_arg_parser(bad_cli_input) 119 | 120 | good_cli_input = "./sushichef.py --token=letoken --deploy --publish" 121 | args, options = chef_arg_parser(good_cli_input) 122 | assert not args["stage"] 123 | assert args["publish"] 124 | -------------------------------------------------------------------------------- /tests/test_csv_metadata.py: -------------------------------------------------------------------------------- 1 | """ Tests for CSV exercises channel logic """ 2 | import os 3 | import tempfile 4 | 5 | import pytest 6 | 7 | from ricecooker.chefs import LineCook 8 | from ricecooker.utils.jsontrees import read_tree_from_json 9 | from ricecooker.utils.metadata_provider import CsvMetadataProvider 10 | 11 | 12 | @pytest.fixture 13 | def channeldir(): 14 | return os.path.join( 15 | "tests", "testchannels", "csv_channel_with_exercises", "channeldir" 16 | ) 17 | 18 | 19 | def test_exercises_metadata_provider(channeldir): 20 | _, channeldirname = os.path.split(channeldir) 21 | mp = CsvMetadataProvider(channeldir) 22 | assert mp is not None, "CsvMetadataProvider does not exist" 23 | mp.validate_headers() 24 | assert mp.has_exercises(), "has exercises" 25 | assert ( 26 | mp.get_channel_info()["source_id"] == "csv_channel_with_exercises" 27 | ), "check source id" 28 | # 29 | assert len(mp.contentcache.keys()) == 8, "Found too many items" 30 | assert len(mp.get_exercises_for_dir((channeldirname,))) == 1, "one exercise in root" 31 | assert ( 32 | len(mp.get_exercises_for_dir((channeldirname, "exercises"))) == 3 33 | ), "3 exercise in exercises/" 34 | 35 | 36 | def test_exercises_linecook(channeldir): 37 | tmpdir_path = tempfile.mkdtemp() 38 | 39 | linecook = LineCook() 40 | linecook.TREES_DATA_DIR = tmpdir_path 41 | linecook.RICECOOKER_JSON_TREE = "test_ricecooker_json_tree.json" 42 | 43 | args = dict( 44 | channeldir=channeldir, 45 | channelinfo="Channel.csv", 46 | contentinfo="Content.csv", 47 | exercisesinfo="Exercises.csv", 48 | questionsinfo="ExerciseQuestions.csv", 49 | token="???", 50 | ) 51 | options = {} 52 | linecook.pre_run(args, options) 53 | 54 | jsontree_path = os.path.join(tmpdir_path, linecook.RICECOOKER_JSON_TREE) 55 | assert os.path.exists(jsontree_path), "output json exists" 56 | test_tree = read_tree_from_json(jsontree_path) 57 | assert len(test_tree["children"]) == 3, "exercise node + two dirs" 58 | 59 | # cleanup 60 | os.remove(jsontree_path) 61 | os.rmdir(tmpdir_path) 62 | -------------------------------------------------------------------------------- /tests/test_downloader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | from ricecooker.utils import downloader 5 | 6 | 7 | class TestArchiver(unittest.TestCase): 8 | def test_get_archive_filename_absolute(self): 9 | link = "https://learningequality.org/kolibri.png" 10 | 11 | urls_to_replace = {} 12 | result = downloader.get_archive_filename( 13 | link, download_root="./", resource_urls=urls_to_replace 14 | ) 15 | 16 | expected = os.path.join("learningequality.org", "kolibri.png") 17 | 18 | assert result == expected 19 | assert urls_to_replace[link] == expected 20 | 21 | def test_get_archive_filename_relative(self): 22 | link = "../kolibri.png" 23 | page_link = "https://learningequality.org/team/index.html" 24 | 25 | urls_to_replace = {} 26 | result = downloader.get_archive_filename( 27 | link, page_url=page_link, download_root="./", resource_urls=urls_to_replace 28 | ) 29 | 30 | expected = os.path.join("learningequality.org", "kolibri.png") 31 | 32 | assert result == expected 33 | assert urls_to_replace[link] == expected 34 | 35 | def test_get_archive_filename_with_query(self): 36 | link = "../kolibri.png?1.2.3" 37 | page_link = "https://learningequality.org/team/index.html" 38 | 39 | urls_to_replace = {} 40 | result = downloader.get_archive_filename( 41 | link, page_url=page_link, download_root="./", resource_urls=urls_to_replace 42 | ) 43 | 44 | expected = os.path.join("learningequality.org", "kolibri_1.2.3.png") 45 | 46 | assert result == expected 47 | assert urls_to_replace[link] == expected 48 | 49 | link = "../kolibri.png?v=1.2.3&i=u" 50 | page_link = "https://learningequality.org/team/index.html" 51 | 52 | urls_to_replace = {} 53 | result = downloader.get_archive_filename( 54 | link, page_url=page_link, download_root="./", resource_urls=urls_to_replace 55 | ) 56 | 57 | expected = os.path.join("learningequality.org", "kolibri_v_1.2.3_i_u.png") 58 | 59 | assert result == expected 60 | assert urls_to_replace[link] == expected 61 | 62 | def test_archive_path_as_relative_url(self): 63 | link = "../kolibri.png?1.2.3" 64 | page_link = "https://learningequality.org/team/index.html" 65 | page_filename = downloader.get_archive_filename(page_link, download_root="./") 66 | link_filename = downloader.get_archive_filename( 67 | link, page_url=page_link, download_root="./" 68 | ) 69 | rel_path = downloader.get_relative_url_for_archive_filename( 70 | link_filename, page_filename 71 | ) 72 | assert rel_path == "../kolibri_1.2.3.png" 73 | -------------------------------------------------------------------------------- /tests/test_licenses.py: -------------------------------------------------------------------------------- 1 | """ Tests for license getting and serialization """ 2 | import json 3 | 4 | import pytest 5 | from le_utils.constants.licenses import ALL_RIGHTS_RESERVED 6 | from le_utils.constants.licenses import CC_BY 7 | from le_utils.constants.licenses import CC_BY_NC 8 | from le_utils.constants.licenses import CC_BY_NC_ND 9 | from le_utils.constants.licenses import CC_BY_NC_SA 10 | from le_utils.constants.licenses import CC_BY_ND 11 | from le_utils.constants.licenses import CC_BY_SA 12 | from le_utils.constants.licenses import PUBLIC_DOMAIN 13 | from le_utils.constants.licenses import SPECIAL_PERMISSIONS 14 | 15 | from ricecooker.classes.licenses import get_license 16 | 17 | 18 | """ *********** LICENSE FIXTURES *********** """ 19 | 20 | 21 | @pytest.fixture 22 | def license_objects(): 23 | regular_ids = [ 24 | CC_BY, 25 | CC_BY_SA, 26 | CC_BY_ND, 27 | CC_BY_NC, 28 | CC_BY_NC_SA, 29 | CC_BY_NC_ND, 30 | ALL_RIGHTS_RESERVED, 31 | PUBLIC_DOMAIN, 32 | ] 33 | license_objects = [] 34 | for regular_id in regular_ids: 35 | # with desciption and copyright_holder 36 | licence_obj = get_license( 37 | regular_id, copyright_holder="Some name", description="Le description" 38 | ) 39 | assert licence_obj, "licence_obj should exist" 40 | license_objects.append(licence_obj) 41 | 42 | # with desciption only 43 | licence_obj = get_license(regular_id, description="Le description solo2") 44 | assert licence_obj, "licence_obj should exist" 45 | license_objects.append(licence_obj) 46 | 47 | # with copyright_holder only 48 | licence_obj = get_license(regular_id, copyright_holder="Some name3") 49 | assert licence_obj, "licence_obj should exist" 50 | license_objects.append(licence_obj) 51 | 52 | # bare 53 | licence_obj = get_license(regular_id) 54 | assert licence_obj, "licence_obj should exist" 55 | license_objects.append(licence_obj) 56 | 57 | return license_objects 58 | 59 | 60 | @pytest.fixture 61 | def special_license(): 62 | return get_license( 63 | SPECIAL_PERMISSIONS, 64 | copyright_holder="Authorov", 65 | description="Only for use offline", 66 | ) 67 | 68 | 69 | """ *********** LICENSE TESTS *********** """ 70 | 71 | 72 | def test_the_license_fixtures(license_objects, special_license): 73 | assert len(license_objects) > 4 74 | assert special_license.license_id == SPECIAL_PERMISSIONS 75 | assert special_license.description 76 | 77 | 78 | def test_bad_special_license(): 79 | try: 80 | get_license(SPECIAL_PERMISSIONS, description=None) 81 | assert False, "Should not come here because of missing description" 82 | except AssertionError: 83 | assert True, "SPECIAL_PERMISSIONS without description should raise an exception" 84 | 85 | 86 | def _compare_licence_objects(obj1, obj2): 87 | same = True 88 | if not obj1.license_id == obj2.license_id: 89 | same = False 90 | if not obj1.description == obj2.description: 91 | same = False 92 | if not obj1.copyright_holder == obj2.copyright_holder: 93 | same = False 94 | return same 95 | 96 | 97 | def test_license_serilizibility(license_objects, special_license): 98 | orig_licenses = license_objects 99 | orig_licenses.append(special_license) 100 | for licence_orig in orig_licenses: 101 | # serizlize 102 | license_dict = licence_orig.as_dict() 103 | license_json = json.dumps(license_dict) 104 | # deserizlize 105 | license_copy_dict = json.loads(license_json) 106 | license_copy = get_license(**license_copy_dict) 107 | 108 | same_attributes = _compare_licence_objects(licence_orig, license_copy) 109 | assert same_attributes, "License attributes not the same after serizlize" 110 | -------------------------------------------------------------------------------- /tests/test_links.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ricecooker.utils.html import replace_links 4 | 5 | 6 | def test_replace_absolute_links(): 7 | a_content = '' 8 | noscheme_a_content = '' 9 | root_a_content = '' 10 | 11 | img_content = '' 12 | 13 | img_srcset_content = '' 14 | 15 | urls_to_replace = { 16 | "http://replace.me/img/hello.jpg": "img/hello.jpg", 17 | "http://replace.me/link/to/page.html": "link/to/page.html", 18 | } 19 | 20 | output = replace_links(img_content, urls_to_replace) 21 | assert output == '' 22 | 23 | output = replace_links(a_content, urls_to_replace) 24 | assert output == '' 25 | 26 | output = replace_links(noscheme_a_content, urls_to_replace) 27 | assert output == '' 28 | 29 | output = replace_links(root_a_content, urls_to_replace) 30 | assert output == '' 31 | 32 | output = replace_links(img_srcset_content, urls_to_replace) 33 | assert output == '' 34 | 35 | 36 | def test_replace_relative_links(): 37 | a_content = '' 38 | noscheme_a_content = '' 39 | root_a_content = '' 40 | 41 | img_content = '' 42 | 43 | img_srcset_content = '' 44 | 45 | urls_to_replace = { 46 | "http://replace.me/img/hello.jpg": "replace.me/img/hello.jpg", 47 | "http://replace.me/link/to/page.html": "replace.me/link/to/page.html", 48 | } 49 | content_dir = os.path.join("replace.me", "link", "from") 50 | download_root = "." 51 | 52 | output = replace_links( 53 | img_content, 54 | urls_to_replace, 55 | download_root=download_root, 56 | content_dir=content_dir, 57 | relative_links=True, 58 | ) 59 | assert output == '' 60 | 61 | output = replace_links( 62 | a_content, 63 | urls_to_replace, 64 | download_root=download_root, 65 | content_dir=content_dir, 66 | relative_links=True, 67 | ) 68 | assert output == '' 69 | 70 | output = replace_links( 71 | noscheme_a_content, 72 | urls_to_replace, 73 | download_root=download_root, 74 | content_dir=content_dir, 75 | relative_links=True, 76 | ) 77 | assert output == '' 78 | 79 | output = replace_links( 80 | root_a_content, 81 | urls_to_replace, 82 | download_root=download_root, 83 | content_dir=content_dir, 84 | relative_links=True, 85 | ) 86 | assert output == '' 87 | 88 | output = replace_links( 89 | img_srcset_content, 90 | urls_to_replace, 91 | download_root=download_root, 92 | content_dir=content_dir, 93 | relative_links=True, 94 | ) 95 | assert output == '' 96 | -------------------------------------------------------------------------------- /tests/test_requests.py: -------------------------------------------------------------------------------- 1 | """ Tests for handling requests to Kolibri Studio """ 2 | import copy 3 | import uuid 4 | 5 | import pytest 6 | from le_utils.constants import licenses 7 | 8 | from ricecooker.classes.nodes import DocumentNode 9 | from ricecooker.classes.nodes import TopicNode 10 | from ricecooker.exceptions import InvalidNodeException 11 | from ricecooker.managers.tree import ChannelManager 12 | 13 | 14 | """ *********** TOPIC FIXTURES *********** """ 15 | 16 | 17 | @pytest.fixture 18 | def topic_id(): 19 | return "topic-id" 20 | 21 | 22 | @pytest.fixture 23 | def topic_content_id(channel_domain_namespace, topic_id): 24 | return uuid.uuid5(channel_domain_namespace, topic_id) 25 | 26 | 27 | @pytest.fixture 28 | def topic_node_id(channel_node_id, topic_content_id): 29 | return uuid.uuid5(channel_node_id, topic_content_id.hex) 30 | 31 | 32 | @pytest.fixture 33 | def topic(topic_id): 34 | return TopicNode(topic_id, "Topic") 35 | 36 | 37 | @pytest.fixture 38 | def invalid_topic(topic_id): 39 | topic = TopicNode(topic_id, "Topic") 40 | topic.title = None 41 | return topic 42 | 43 | 44 | """ *********** LOCAL DOCUMENT FIXTURES *********** """ 45 | 46 | 47 | @pytest.fixture 48 | def invalid_document(document_file): 49 | node = DocumentNode("invalid", "Document", licenses.CC_BY, files=[document_file]) 50 | node.license = None 51 | return node 52 | 53 | 54 | """ *********** TREE FIXTURES *********** """ 55 | 56 | 57 | @pytest.fixture 58 | def tree(channel, topic, document): 59 | topic.add_child(document) 60 | channel.add_child(topic) 61 | return ChannelManager(channel) 62 | 63 | 64 | @pytest.fixture 65 | def invalid_tree(invalid_channel, invalid_topic, invalid_document): 66 | invalid_topic.add_child(invalid_document) 67 | invalid_channel.add_child(invalid_topic) 68 | return ChannelManager(invalid_channel) 69 | 70 | 71 | @pytest.fixture 72 | def invalid_tree_2(channel, topic, invalid_document): 73 | channel_copy = copy.deepcopy(channel) 74 | topic_copy = copy.deepcopy(topic) 75 | topic_copy.add_child(invalid_document) 76 | channel_copy.add_child(topic_copy) 77 | return ChannelManager(channel_copy) 78 | 79 | 80 | """ TESTS """ 81 | 82 | 83 | def test_validate(tree, invalid_tree, invalid_tree_2): 84 | assert tree.validate(), "Tree should pass validation" 85 | pytest.raises(InvalidNodeException, invalid_tree.validate) 86 | pytest.raises(InvalidNodeException, invalid_tree_2.validate) 87 | -------------------------------------------------------------------------------- /tests/test_settings.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from mock import patch 4 | 5 | from ricecooker import chefs 6 | 7 | 8 | settings = {"thumbnails": True, "compress": True} 9 | 10 | 11 | def test_settings_unset_default(): 12 | chef = chefs.SushiChef() 13 | 14 | for setting in settings: 15 | assert chef.get_setting(setting) is None 16 | assert chef.get_setting(setting, default=False) is False 17 | 18 | 19 | def test_settings(): 20 | chef = chefs.SushiChef() 21 | 22 | for setting in settings: 23 | value = settings[setting] 24 | chef.SETTINGS[setting] = value 25 | assert chef.get_setting(setting) == value 26 | assert chef.get_setting(setting, default=None) == value 27 | 28 | 29 | def test_cli_args_override_settings(): 30 | """ 31 | For settings that can be controlled via the command line, ensure that the command line setting 32 | takes precedence over the default setting. 33 | """ 34 | 35 | test_argv = ["sushichef.py", "--compress", "--thumbnails", "--token", "12345"] 36 | 37 | with patch.object(sys, "argv", test_argv): 38 | chef = chefs.SushiChef() 39 | chef.SETTINGS["thumbnails"] = False 40 | chef.SETTINGS["compress"] = False 41 | 42 | assert chef.get_setting("thumbnails") is False 43 | assert chef.get_setting("compress") is False 44 | 45 | chef.parse_args_and_options() 46 | assert chef.get_setting("thumbnails") is True 47 | assert chef.get_setting("compress") is True 48 | 49 | test_argv = ["sushichef.py", "--compress", "--thumbnails", "--token", "12345"] 50 | 51 | with patch.object(sys, "argv", test_argv): 52 | chef = chefs.SushiChef() 53 | 54 | assert len(chef.SETTINGS) == 0 55 | 56 | assert chef.get_setting("thumbnails") is None 57 | assert chef.get_setting("compress") is None 58 | 59 | chef.parse_args_and_options() 60 | assert chef.get_setting("thumbnails") is True 61 | assert chef.get_setting("compress") is True 62 | 63 | # now test without setting the flags 64 | test_argv = ["sushichef.py", "--token", "12345"] 65 | 66 | with patch.object(sys, "argv", test_argv): 67 | chef = chefs.SushiChef() 68 | chef.SETTINGS["thumbnails"] = False 69 | chef.SETTINGS["compress"] = False 70 | 71 | assert chef.get_setting("thumbnails") is False 72 | assert chef.get_setting("compress") is False 73 | 74 | chef.parse_args_and_options() 75 | assert chef.get_setting("thumbnails") is False 76 | assert chef.get_setting("compress") is False 77 | -------------------------------------------------------------------------------- /tests/test_zip.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import tempfile 4 | 5 | import pytest 6 | 7 | from ricecooker.utils.zip import create_predictable_zip 8 | 9 | 10 | # The MD5s in this object are generated by running this file as a script 11 | # they should not be updated as they are now our baseline for what our predictable zip should produce 12 | # so any changes to the implementation should not change these values, if they do, it's a bug. 13 | TEST_CASES = { 14 | "nested_text": { 15 | "files": {"folder/nested.txt": "Nested content", "test.txt": "Hello World"}, 16 | "expected_md5": "220f0d36a5150d3912a0eebee2738d80", # Generated by running this file as a script 17 | }, 18 | "reversed": { 19 | "files": {"b.txt": "content b", "a.txt": "content a"}, 20 | "expected_md5": "5f3c72e2f32c5b7919cd6c31e5f169cd", # Generated by running this file as a script 21 | }, 22 | "binaryFiles": { 23 | "files": { 24 | "image.png": b"PNG\x89\x50\x4E\x47\x0D\x0A\x1A\x0A", 25 | "data.bin": bytes([0xFF, 0xD8, 0xFF, 0xE0]), 26 | "text.txt": "Mixed content", 27 | }, 28 | "expected_md5": "18ba9ca5ba2ed25ada40111fcc055a82", # Generated by running this file as a script 29 | }, 30 | "nested_binary": { 31 | "files": { 32 | "folder/image.png": b"PNG\x89\x50\x4E\x47\x0D\x0A\x1A\x0A", 33 | "test.txt": "Hello World", 34 | }, 35 | "expected_md5": "0fdfc3bd5b661ae3cde677d542426386", # Generated by running this file as a script 36 | }, 37 | "simple_binary": { 38 | "files": {"test.bin": bytes([0x00, 0x01, 0x02, 0x03])}, 39 | "expected_md5": "461a08dc38d2b7dae48c2bc2e641b958", # We'll generate this 40 | }, 41 | } 42 | 43 | 44 | def create_test_files(files): 45 | temp_dir = tempfile.mkdtemp() 46 | for path, content in files.items(): 47 | full_path = os.path.join(temp_dir, path.replace("/", os.sep)) 48 | os.makedirs(os.path.dirname(full_path), exist_ok=True) 49 | mode = "wb" if isinstance(content, bytes) else "w" 50 | with open(full_path, mode) as f: 51 | f.write(content) 52 | return temp_dir 53 | 54 | 55 | def generate_md5(temp_dir, entrypoint=None): 56 | zip_path = create_predictable_zip(temp_dir, entrypoint=entrypoint) 57 | with open(zip_path, "rb") as f: 58 | md5 = hashlib.md5(f.read()).hexdigest() 59 | os.remove(zip_path) 60 | return md5 61 | 62 | 63 | def cleanup(temp_dir): 64 | for root, dirs, files in os.walk(temp_dir, topdown=False): 65 | for name in files: 66 | os.remove(os.path.join(root, name)) 67 | for name in dirs: 68 | os.rmdir(os.path.join(root, name)) 69 | os.rmdir(temp_dir) 70 | 71 | 72 | @pytest.mark.parametrize("case_name,case", TEST_CASES.items()) 73 | def test_predictable_zip(case_name, case): 74 | temp_dir = create_test_files(case["files"]) 75 | try: 76 | md5 = generate_md5(temp_dir, case.get("entrypoint")) 77 | assert md5 == case["expected_md5"], f"MD5 mismatch for {case_name}" 78 | finally: 79 | cleanup(temp_dir) 80 | 81 | 82 | def test_order_independence(): 83 | reversed_files = dict(reversed(list(TEST_CASES["reversed"]["files"].items()))) 84 | temp_dir1 = create_test_files(TEST_CASES["reversed"]["files"]) 85 | temp_dir2 = create_test_files(reversed_files) 86 | try: 87 | md5_1 = generate_md5(temp_dir1) 88 | md5_2 = generate_md5(temp_dir2) 89 | assert md5_1 == md5_2 90 | finally: 91 | cleanup(temp_dir1) 92 | cleanup(temp_dir2) 93 | 94 | 95 | if __name__ == "__main__": 96 | for name, case in TEST_CASES.items(): 97 | temp_dir = create_test_files(case["files"]) 98 | md5 = generate_md5(temp_dir, case.get("entrypoint")) 99 | print(f"MD5 for {name}: {md5}") 100 | cleanup(temp_dir) 101 | -------------------------------------------------------------------------------- /tests/testchannels/csv_channel_with_exercises/Channel.csv: -------------------------------------------------------------------------------- 1 | Title,Description,Domain,Source ID,Language,Thumbnail 2 | Test CSV channel with Exercises,This channel was created from the files in the channeldir/ directory and the metadata stored in CSV files,source.org,csv_channel_with_exercises,en,channeldir/channel_thumbnail.jpg 3 | -------------------------------------------------------------------------------- /tests/testchannels/csv_channel_with_exercises/Content.csv: -------------------------------------------------------------------------------- 1 | Path *,Title *,Source ID,Description,Author,Language,License ID *,License Description,Copyright Holder,Thumbnail 2 | channeldir/contentnodes,Content Nodes,3be352f9,Put folder description here,,en,,,, 3 | channeldir/contentnodes/audio,Audio Files,09219f2e,Put folder description here,,en,,,, 4 | channeldir/exercises,Exercises,fafafa007,"This doesn’t contain any files, but will be populated with some of the exercises from Exercises.csv",First Last (author's name),en,CC BY,,Copyright holder name, 5 | -------------------------------------------------------------------------------- /tests/testchannels/csv_channel_with_exercises/ExerciseQuestions.csv: -------------------------------------------------------------------------------- 1 | Source ID *,Question ID *,Question type *,Question *,Option A,Option B,Option C,Option D,Option E,Options F...,Correct Answer *,Correct Answer 2,Correct Answer 3,Hint 1,Hint 2,Hint 3,Hint 4,Hint 5,Hint 6+ 2 | exrc1,1,single_selection,What is your 2+2?,1,2,3,4,5,,4,,,Add the two numbers together.,,,,, 3 | exrc1,2,multiple_selection,Select all the solution to x^2=4?,-2,-1,0,1,2,,-2,2,,Quadratic equations have multiple solutoins.,Which number times itself gives 4?,Is there another number that also works?,,, 4 | exrc1,3,input_question,What is the next integer after 2?,,,,,,,3,,,"Imagine the number line, what comes to the right of the number two?",,,,, 5 | exrc2,1,single_selection,What is your 2+2?,1,2,3,4,5,,4,,,Add the two numbers together.,,,,, 6 | exrc2,2,multiple_selection,Select all the solution to x^2=4?,-2,-1,0,1,2,,-2,2,,Quadratic equations have multiple solutoins.,Which number times itself gives 4?,Is there another number that also works?,,, 7 | exrc2,3,input_question,What is the next integer after 2?,,,,,,,3,,,"Imagine the number line, what comes to the right of the number two?",,,,, 8 | exrc3,1,single_selection,What is your 2+2?,1,2,3,4,5,,4,,,Add the two numbers together.,,,,, 9 | exrc4,2,single_selection,"What is the area of the circle shown below 10 | 11 | ![](figures/exrc4/circle-of-radius-2.png)",$\pi$,$2\pi$,$3\pi$,$4\pi$,$5\pi$,,$4\pi$,,,The area of a circle is proportional to the square of its radius.,The formula is $A=\pi r^2$.,In this case the circle has radius $r=2$ so the area of the circle is $A=4\pi$.,,, 12 | exrc5,3,multiple_selection,Select all the triangles.,![](figures/exrc5/triangle1.png),![](figures/exrc5/hexagon.png),![](figures/exrc5/triangle2.png),![](figures/exrc5/triangle3.png),![](figures/exrc5/octagon.png),![](figures/exrc5/square.png),![](figures/exrc5/triangle1.png),![](figures/exrc5/triangle2.png),![](figures/exrc5/triangle3.png),A triangle is a geometrical shape with three sides and three vertices.,,,,, 13 | -------------------------------------------------------------------------------- /tests/testchannels/csv_channel_with_exercises/Exercises.csv: -------------------------------------------------------------------------------- 1 | Path *,Title *,Source ID *,Description,Author,Language,License ID *,License Description,Copyright Holder,Number Correct,Out of Total,Randomize,Thumbnail 2 | channeldir/exercise1,First Exercise,exrc1,This is a really math exercise that will appear in the channel root.,Ivan Savov,en,CC BY,,Learning Equality,1,2,TRUE,channeldir/algebra_exercise_thumb.png 3 | channeldir/contentnodes/audio/Wzexercise,Second Exercise,exrc2,An exrcise ,Ivan Savov,fr,CC BY,,Learning Equality,,,FALSE,channeldir/contentnodes/audio/WZ_exercise_thumbnail.png 4 | channeldir/exercises/exercise3,Third Exercise,exrc3,An exercise in the subfolder exercsies/,Ivan Savov,en,CC BY,,Learning Equality,,,FALSE, 5 | channeldir/exercises/exercise4,Fourth Exercise,exrc4,An exercise that shows figures in question and use of LaTeX markup,Ivan Savov,en,CC BY,,Learning Equality,,,FALSE, 6 | channeldir/exercises/exercise5,Fifth Exercise,exrc5,An exercise which shows figures as answers,Ivan Savov,en,CC BY,,Learning Equality,,,FALSE, 7 | -------------------------------------------------------------------------------- /tests/testchannels/csv_channel_with_exercises/channeldir/algebra_exercise_thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testchannels/csv_channel_with_exercises/channeldir/algebra_exercise_thumb.png -------------------------------------------------------------------------------- /tests/testchannels/csv_channel_with_exercises/channeldir/channel_thumbnail.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testchannels/csv_channel_with_exercises/channeldir/channel_thumbnail.jpg -------------------------------------------------------------------------------- /tests/testchannels/csv_channel_with_exercises/channeldir/contentnodes/audio/WZ_exercise_thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testchannels/csv_channel_with_exercises/channeldir/contentnodes/audio/WZ_exercise_thumbnail.png -------------------------------------------------------------------------------- /tests/testchannels/csv_channel_with_exercises/channeldir/exercises/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testchannels/csv_channel_with_exercises/channeldir/exercises/.gitkeep -------------------------------------------------------------------------------- /tests/testcontent/downloaded/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/downloaded/.gitkeep -------------------------------------------------------------------------------- /tests/testcontent/exercises/eb3f3bf7c317408ee90995b5bcf4f3a59606aedd-data.json: -------------------------------------------------------------------------------- 1 | svgDataeb3f3bf7c317408ee90995b5bcf4f3a59606aedd({"range":[[-5,5.625],[-40,130]],"labels":[{"content":"\\small{1}","coordinates":[1,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{2}","coordinates":[2,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{3}","coordinates":[3,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{4}","coordinates":[4,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{\\llap{-}2}","coordinates":[-2,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{\\llap{-}3}","coordinates":[-3,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{\\llap{-}4}","coordinates":[-4,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{16}","coordinates":[0,16],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{32}","coordinates":[0,32],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{48}","coordinates":[0,48],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{64}","coordinates":[0,64],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{80}","coordinates":[0,80],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{96}","coordinates":[0,96],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{112}","coordinates":[0,112],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{\\llap{-}32}","coordinates":[0,-32],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"y","coordinates":[0,120],"alignment":"above","typesetAsMath":true,"style":{}},{"content":"x","coordinates":[5,0],"alignment":"right","typesetAsMath":true,"style":{}},{"content":"\\blueD{y=f(x)}","coordinates":[2,100],"typesetAsMath":true,"style":{}}]}); -------------------------------------------------------------------------------- /tests/testcontent/exercises/no-wifi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/exercises/no-wifi.png -------------------------------------------------------------------------------- /tests/testcontent/exercises/perseus_question_new_bar_graphs.json: -------------------------------------------------------------------------------- 1 | { 2 | "answerArea": { 3 | "calculator": false, 4 | "chi2Table": false, 5 | "periodicTable": false, 6 | "tTable": false, 7 | "zTable": false 8 | }, 9 | "hints": [ 10 | { 11 | "content": "The bottom bar lines up to $\\purpleD{6}$. \n\n![](web+graphie://cdn.kastatic.org/ka-perseus-graphie/d855aefe9a722f9a794b0883ebcdb8c37b4ba0c7)\n\nWhich type of fruit has $\\purpleD{6}$ in Luigi's home?", 12 | "images": { 13 | "web+graphie://cdn.kastatic.org/ka-perseus-graphie/d855aefe9a722f9a794b0883ebcdb8c37b4ba0c7": { 14 | "height": 330, 15 | "width": 404 16 | } 17 | }, 18 | "replace": false, 19 | "widgets": {} 20 | }, 21 | { 22 | "content": "Kind of fruit | Number\n:- | :-: \nOranges | $\\purpleD{6}$ \n\nLuigi has $\\purpleD{6}$ oranges. So, the bottom bar should be labeled $\\purpleD{\\text{Oranges}}$.", 23 | "images": {}, 24 | "replace": false, 25 | "widgets": {} 26 | }, 27 | { 28 | "content": "Now let's label the other bars to match the table.", 29 | "images": {}, 30 | "replace": false, 31 | "widgets": {} 32 | }, 33 | { 34 | "content": "Here is the completed graph:\n\n![](web+graphie://cdn.kastatic.org/ka-perseus-graphie/95262ebaf42bdd1929e5d6d1e2853d3eb0a5cc74)", 35 | "images": { 36 | "web+graphie://cdn.kastatic.org/ka-perseus-graphie/95262ebaf42bdd1929e5d6d1e2853d3eb0a5cc74": { 37 | "height": 330, 38 | "width": 404 39 | } 40 | }, 41 | "replace": false, 42 | "widgets": {} 43 | } 44 | ], 45 | "itemDataVersion": { 46 | "major": 0, 47 | "minor": 1 48 | }, 49 | "question": { 50 | "content": "Luigi created a chart and a bar graph to show how many of each type of fruit were in his home.\n\nKind of fruit | Number \n:- | :-: \nApple | $7$ \nStrawberries | $3$ \nOranges | $6$ \nBananas| $2$ \n\n**Label each bar on the bar graph.**\n\n[[☃ label-image 1]]\n", 51 | "images": {}, 52 | "widgets": { 53 | "label-image 1": { 54 | "alignment": "default", 55 | "graded": true, 56 | "options": { 57 | "choices": [ 58 | "Apple", 59 | "Strawberries", 60 | "Oranges", 61 | "Bananas" 62 | ], 63 | "hideChoicesFromInstructions": true, 64 | "imageAlt": "", 65 | "imageHeight": 330, 66 | "imageUrl": "web+graphie://cdn.kastatic.org/ka-perseus-graphie/ab207c6f38c887130b68c078e6158a87aab60c45", 67 | "imageWidth": 404, 68 | "markers": [ 69 | { 70 | "answers": [ 71 | "Strawberries" 72 | ], 73 | "label": "", 74 | "x": 24.1, 75 | "y": 17.7 76 | }, 77 | { 78 | "answers": [ 79 | "Bananas" 80 | ], 81 | "label": "", 82 | "x": 24.4, 83 | "y": 35.7 84 | }, 85 | { 86 | "answers": [ 87 | "Apple" 88 | ], 89 | "label": "", 90 | "x": 23.8, 91 | "y": 52.9 92 | }, 93 | { 94 | "answers": [ 95 | "Oranges" 96 | ], 97 | "label": "", 98 | "x": 24.1, 99 | "y": 70.9 100 | } 101 | ], 102 | "multipleAnswers": false, 103 | "static": false 104 | }, 105 | "static": false, 106 | "type": "label-image", 107 | "version": { 108 | "major": 0, 109 | "minor": 0 110 | } 111 | } 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /tests/testcontent/exercises/perseus_question_x43bbec76d5f14f88_bg.json: -------------------------------------------------------------------------------- 1 | { 2 | "itemDataVersion": { 3 | "major": 0, 4 | "minor": 1 5 | }, 6 | "hints": [ 7 | { 8 | "content": "Функцията $f$ е положителна $($т.е. $f(x)>0)$, когато графиката ѝ е над оста $x$.\n\nТова е така, защото положителните стойности на $y$ са над оста $x$, а $y=f(x)$.\n\n![graph](web+graphie://ka-perseus-graphie.s3.amazonaws.com/d8daa074ec7d09ce3819d6259b3e4670701d2540)", 9 | "images": {}, 10 | "widgets": {}, 11 | "replace": false 12 | }, 13 | { 14 | "content": "От всички изброени варианти единственият верен интервал е $-20$.**\n\n[[☃ radio 1]]", 22 | "images": {}, 23 | "widgets": { 24 | "radio 1": { 25 | "graded": true, 26 | "version": { 27 | "major": 1, 28 | "minor": 0 29 | }, 30 | "static": false, 31 | "type": "radio", 32 | "options": { 33 | "onePerLine": true, 34 | "displayCount": null, 35 | "choices": [ 36 | { 37 | "content": "$-20$.**\n\n[[☃ radio 1]]", 4 | "images": { 5 | "web+graphie://ka-perseus-graphie.s3.amazonaws.com/eb3f3bf7c317408ee90995b5bcf4f3a59606aedd": { 6 | "width": 425, 7 | "height": 425 8 | } 9 | }, 10 | "widgets": { 11 | "radio 1": { 12 | "type": "radio", 13 | "alignment": "default", 14 | "static": false, 15 | "graded": true, 16 | "options": { 17 | "choices": [ 18 | { 19 | "content": "$-20)$ whenever its graph is above the $x$-axis.\n\nThis is because the positive $y$-values are above the $x$-axis, and $y=f(x)$.\n\n![graph](web+graphie://ka-perseus-graphie.s3.amazonaws.com/d8daa074ec7d09ce3819d6259b3e4670701d2540)", 60 | "images": { 61 | "web+graphie://ka-perseus-graphie.s3.amazonaws.com/d8daa074ec7d09ce3819d6259b3e4670701d2540": { 62 | "width": 425, 63 | "height": 425 64 | } 65 | }, 66 | "widgets": {} 67 | }, 68 | { 69 | "replace": false, 70 | "content": "Out of our options, the only correct interval is $-2 00:00:14,979 3 | أمضيت ما يقرب من العقدين 4 | 5 | 2 6 | 00:00:14,979 --> 00:00:18,532 7 | ألاحظ ما يجعل البعض أكثر حظًا من غيرهم 8 | 9 | 3 10 | 00:00:18,536 --> 00:00:22,119 11 | وأحاول مساعدة الناس على زيادة حظهم. 12 | -------------------------------------------------------------------------------- /tests/testcontent/samples/thumbnail.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/samples/thumbnail.jpg -------------------------------------------------------------------------------- /tests/testcontent/samples/thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/samples/thumbnail.png -------------------------------------------------------------------------------- /tests/testcontent/youtubecache/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/youtubecache/.gitkeep -------------------------------------------------------------------------------- /tests/utils/test_extensions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ricecooker.utils.utils import extract_path_ext 4 | 5 | # Tests generated by Claude Sonnet 3.7 6 | 7 | 8 | def test_extract_path_ext_basic_file(): 9 | """Test basic file path extension extraction.""" 10 | assert extract_path_ext("file.txt") == "txt" 11 | assert extract_path_ext("path/to/file.jpg") == "jpg" 12 | assert extract_path_ext("/absolute/path/to/file.pdf") == "pdf" 13 | assert extract_path_ext("file.PNG") == "png" # Tests lowercase conversion 14 | 15 | 16 | def test_extract_path_ext_windows_paths(): 17 | """Test Windows-style file paths.""" 18 | assert extract_path_ext("C:\\Users\\name\\file.docx") == "docx" 19 | assert extract_path_ext("D:\\path\\to\\file.xlsx") == "xlsx" 20 | assert extract_path_ext("\\\\network\\share\\file.csv") == "csv" 21 | 22 | 23 | def test_extract_path_ext_urls(): 24 | """Test URL path extension extraction.""" 25 | assert extract_path_ext("http://example.com/file.json") == "json" 26 | assert extract_path_ext("https://domain.org/path/to/file.xml") == "xml" 27 | assert extract_path_ext("ftp://files.net/downloads/file.zip") == "zip" 28 | 29 | 30 | def test_extract_path_ext_urls_with_query_params(): 31 | """Test URLs with query parameters.""" 32 | assert extract_path_ext("http://example.com/file.html?param=value") == "html" 33 | assert ( 34 | extract_path_ext("https://api.domain.org/data.json?id=123&token=abc") == "json" 35 | ) 36 | assert extract_path_ext("http://site.com/download.tar.gz?download=true") == "gz" 37 | 38 | 39 | def test_extract_path_ext_no_extension(): 40 | """Test paths with no extension.""" 41 | with pytest.raises(ValueError): 42 | extract_path_ext("file_without_extension") 43 | 44 | with pytest.raises(ValueError): 45 | extract_path_ext("/path/to/file") 46 | 47 | with pytest.raises(ValueError): 48 | extract_path_ext("http://example.com/api/v1/resource") 49 | 50 | 51 | def test_extract_path_ext_empty_extension(): 52 | """Test paths with empty extension (dot at the end).""" 53 | with pytest.raises(ValueError): 54 | extract_path_ext("file.") 55 | 56 | with pytest.raises(ValueError): 57 | extract_path_ext("http://example.com/document.") 58 | 59 | 60 | def test_extract_path_ext_default_ext(): 61 | """Test default extension parameter.""" 62 | assert extract_path_ext("file_without_extension", default_ext="txt") == "txt" 63 | assert extract_path_ext("file.", default_ext="dat") == "dat" 64 | assert ( 65 | extract_path_ext("http://example.com/api/v1/resource", default_ext="json") 66 | == "json" 67 | ) 68 | 69 | 70 | def test_extract_path_ext_complex_paths(): 71 | """Test more complex path scenarios.""" 72 | assert extract_path_ext("file.tar.gz") == "gz" 73 | assert extract_path_ext("path/to/archive.tar.bz2") == "bz2" 74 | assert extract_path_ext("http://example.com/path/to/file.min.js") == "js" 75 | 76 | 77 | def test_extract_path_ext_edge_cases(): 78 | """Test edge cases.""" 79 | # URL with hash fragment 80 | assert extract_path_ext("http://example.com/file.html#section") == "html" 81 | 82 | # URL with query and hash fragment 83 | assert extract_path_ext("http://example.com/file.php?id=1#top") == "php" 84 | 85 | # Path with multiple dots 86 | assert extract_path_ext("file.name.with.dots.txt") == "txt" 87 | -------------------------------------------------------------------------------- /tests/vcr_config.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | try: 4 | import vcr 5 | 6 | my_vcr = vcr.VCR( 7 | cassette_library_dir="tests/cassettes", 8 | record_mode="new_episodes", 9 | path_transformer=vcr.VCR.ensure_suffix(".yaml"), 10 | filter_headers=["authorization"], 11 | ) 12 | except ImportError: 13 | 14 | class VCR: 15 | def use_cassette(self, *args, **kwargs): 16 | return pytest.mark.skip("vcrpy is not available on this Python version") 17 | 18 | my_vcr = VCR() 19 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py3.{9,10,11,12} 3 | 4 | [testenv] 5 | basepython = 6 | py3.9: python3.9 7 | py3.10: python3.10 8 | py3.11: python3.11 9 | py3.12: python3.12 10 | extras = test,google_drive 11 | setenv = 12 | PYTHONPATH = {toxinidir} 13 | commands = 14 | pytest --basetemp={envtmpdir} 15 | --------------------------------------------------------------------------------