├── .editorconfig
├── .github
    ├── ISSUE_TEMPLATE.md
    ├── dependabot.yml
    └── workflows
    │   ├── community-contribution-labeling.yml
    │   ├── notify_team_new_comment.yml
    │   ├── pre-commit.yml
    │   ├── python-publish.yml
    │   ├── pythontest.yml
    │   ├── unassign-inactive.yaml
    │   └── update-pr-spreadsheet.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── AUTHORS.rst
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── docs
    ├── 404.rst
    ├── Makefile
    ├── README.rst
    ├── _templates
    │   ├── footer.html
    │   └── layout.html
    ├── chefops.md
    ├── community
    │   ├── authors.rst
    │   ├── contributing.md
    │   └── index.rst
    ├── concepts
    │   ├── content_workflows.md
    │   ├── developer_workflows.md
    │   ├── index.rst
    │   ├── introduction.md
    │   ├── reviewing_channels.md
    │   └── terminology.md
    ├── conf.py
    ├── csv_metadata
    │   ├── README.rst
    │   ├── csv_exercises.md
    │   ├── csv_workflow.md
    │   └── index.rst
    ├── developer
    │   ├── corrections.md
    │   ├── design_cli.md
    │   ├── ids.md
    │   ├── index.rst
    │   ├── kolibripreview.md
    │   ├── sushops.md
    │   └── uploadprocess.md
    ├── downloader.md
    ├── examples
    │   ├── detokenify.pl
    │   ├── document_conversion.ipynb
    │   ├── exercises.ipynb
    │   ├── index.rst
    │   ├── languages.ipynb
    │   └── samplefiles
    │   │   └── documents
    │   │       ├── doc_EN.pdf
    │   │       ├── doc_ES.pdf
    │   │       └── doc_FR.pdf
    ├── exercises.md
    ├── figures
    │   ├── HandBrake
    │   │   ├── handbrake_steps.png
    │   │   ├── handbreake_audio_settings.png
    │   │   ├── handbreake_resizing_settings.png
    │   │   └── handbreake_screenshot_video_settings.png
    │   ├── content_pipeline_diagram.png
    │   ├── content_pipeline_diagram_with_highlight.png
    │   ├── kolibri_logo.png
    │   ├── logo.ico
    │   └── ricecooker_domain.png
    ├── files.md
    ├── history.rst
    ├── htmlapps.md
    ├── index.rst
    ├── index_api_reference.rst
    ├── index_utils.rst
    ├── installation.md
    ├── languages.md
    ├── make.bat
    ├── nodes.md
    ├── parsing_html.md
    ├── pdfutils.md
    ├── requirements.txt
    ├── tutorial
    │   ├── explanations.md
    │   ├── gettingstarted.rst
    │   ├── index.rst
    │   ├── quickstart.rst
    │   └── tutorial.rst
    ├── usage.md
    └── video_compression.md
├── examples
    ├── README.md
    ├── gettingstarted
    │   └── sushichef.py
    ├── oldexamples
    │   ├── README.md
    │   ├── content
    │   │   ├── 0a0c0f1a1a40226d8d227a07dd143f8c08a4b8a5-data.json
    │   │   ├── 0a0c0f1a1a40226d8d227a07dd143f8c08a4b8a5.svg
    │   │   ├── captions.vtt
    │   │   ├── htmltest.zip
    │   │   ├── sushirolls.pdf
    │   │   └── video.mp4
    │   ├── data
    │   │   ├── perseus_graph_question.json
    │   │   ├── sample_perseus01.json
    │   │   ├── sample_perseus02.json
    │   │   └── sample_tree.json
    │   ├── large_wikipedia_chef.py
    │   ├── sample_program.py
    │   └── wikipedia_video_chef.py
    ├── studiocontent
    │   └── sushichef.py
    ├── tutorial
    │   └── sushichef.py
    └── wikipedia
    │   ├── README.md
    │   └── sushichef.py
├── pytest.ini
├── resources
    ├── scripts
    │   ├── convertvideo.bat
    │   └── convertvideo.sh
    └── templates
    │   └── csv_channel
    │       ├── Channel.csv
    │       ├── Content.csv
    │       ├── ExerciseQuestions.csv
    │       ├── Exercises.csv
    │       └── csvchef.py
├── ricecooker
    ├── __init__.py
    ├── chefs.py
    ├── classes
    │   ├── __init__.py
    │   ├── files.py
    │   ├── licenses.py
    │   ├── nodes.py
    │   └── questions.py
    ├── commands.py
    ├── config.py
    ├── exceptions.py
    ├── managers
    │   ├── __init__.py
    │   ├── progress.py
    │   └── tree.py
    └── utils
    │   ├── __init__.py
    │   ├── audio.py
    │   ├── browser.py
    │   ├── caching.py
    │   ├── corrections.py
    │   ├── downloader.py
    │   ├── encodings.py
    │   ├── html.py
    │   ├── html_writer.py
    │   ├── images.py
    │   ├── jsontrees.py
    │   ├── kolibripreview.py
    │   ├── libstudio.py
    │   ├── linecook.py
    │   ├── metadata_provider.py
    │   ├── paths.py
    │   ├── pdf.py
    │   ├── pipeline
    │       ├── __init__.py
    │       ├── context.py
    │       ├── convert.py
    │       ├── exceptions.py
    │       ├── extract_metadata.py
    │       ├── file_handler.py
    │       ├── mime.types
    │       └── transfer.py
    │   ├── proxy.py
    │   ├── subtitles.py
    │   ├── thumbscropping.py
    │   ├── tokens.py
    │   ├── utils.py
    │   ├── videos.py
    │   ├── web.py
    │   ├── youtube.py
    │   └── zip.py
├── setup.cfg
├── setup.py
├── tests
    ├── cassettes
    │   ├── test_gdrive_audio.yaml
    │   ├── test_gdrive_channel_spreadsheet.yaml
    │   ├── test_gdrive_doc.yaml
    │   ├── test_gdrive_pdf.yaml
    │   ├── test_gdrive_slideshow.yaml
    │   ├── test_gdrive_video.yaml
    │   ├── test_gdrive_vtt.yaml
    │   └── test_youtubevideo_process_file.yaml
    ├── chefs
    │   └── fake_chef.py
    ├── conftest.py
    ├── media_utils
    │   ├── README.md
    │   ├── __init__.py
    │   ├── files
    │   │   ├── Wilhelm_Scream.mp3
    │   │   ├── assets
    │   │   │   ├── css
    │   │   │   │   ├── empty.css
    │   │   │   │   └── empty2.css
    │   │   │   ├── images
    │   │   │   │   ├── 4933759886_098e9acf93_m.jpg
    │   │   │   │   └── copyright.txt
    │   │   │   └── js
    │   │   │   │   └── empty.js
    │   │   ├── audio
    │   │   │   └── file_example_MP3_700KB.mp3
    │   │   ├── file_metadata.txt
    │   │   ├── generate_thumbnail
    │   │   │   ├── sample.epub
    │   │   │   ├── sample.pdf
    │   │   │   └── sample.zip
    │   │   ├── kepub.epub
    │   │   ├── page_with_links.html
    │   │   ├── subtitles
    │   │   │   ├── basic.srt
    │   │   │   ├── basic.vtt
    │   │   │   ├── empty.ttml
    │   │   │   ├── encapsulated.sami
    │   │   │   ├── encapsulated.vtt
    │   │   │   └── not.txt
    │   │   └── thumbnails
    │   │   │   ├── BRAlogo1.png
    │   │   │   ├── toosquare.png
    │   │   │   ├── tootall.png
    │   │   │   └── toowide.png
    │   ├── test_audio.py
    │   ├── test_proxy.py
    │   ├── test_subtitles.py
    │   ├── test_thumbnails.py
    │   ├── test_videos.py
    │   ├── test_web.py
    │   └── test_youtube.py
    ├── pipeline
    │   ├── __init__.py
    │   └── test_transfer.py
    ├── test_argparse.py
    ├── test_chef_integration.py
    ├── test_csv_metadata.py
    ├── test_data.py
    ├── test_downloader.py
    ├── test_exercises.py
    ├── test_files.py
    ├── test_licenses.py
    ├── test_links.py
    ├── test_pdfutils.py
    ├── test_requests.py
    ├── test_settings.py
    ├── test_thumbnails.py
    ├── test_tree.py
    ├── test_videos.py
    ├── test_youtube.py
    ├── test_zip.py
    ├── testchannels
    │   └── csv_channel_with_exercises
    │   │   ├── Channel.csv
    │   │   ├── Content.csv
    │   │   ├── ExerciseQuestions.csv
    │   │   ├── Exercises.csv
    │   │   └── channeldir
    │   │       ├── algebra_exercise_thumb.png
    │   │       ├── channel_thumbnail.jpg
    │   │       ├── contentnodes
    │   │           └── audio
    │   │           │   └── WZ_exercise_thumbnail.png
    │   │       └── exercises
    │   │           └── .gitkeep
    ├── testcontent
    │   ├── downloaded
    │   │   └── .gitkeep
    │   ├── exercises
    │   │   ├── eb3f3bf7c317408ee90995b5bcf4f3a59606aedd-data.json
    │   │   ├── eb3f3bf7c317408ee90995b5bcf4f3a59606aedd.svg
    │   │   ├── no-wifi.png
    │   │   ├── perseus_question_new_bar_graphs.json
    │   │   ├── perseus_question_x43bbec76d5f14f88_bg.json
    │   │   ├── perseus_question_x43bbec76d5f14f88_en.json
    │   │   └── test_image_base64.data
    │   ├── generated
    │   │   └── .gitkeep
    │   ├── samples
    │   │   ├── sample_doc_with_toc.pdf
    │   │   ├── testdocument.epub
    │   │   ├── testsubtitles_ar.srt
    │   │   ├── testsubtitles_ar.ttml
    │   │   ├── thumbnail.jpg
    │   │   └── thumbnail.png
    │   └── youtubecache
    │   │   └── .gitkeep
    ├── utils
    │   └── test_extensions.py
    └── vcr_config.py
└── tox.ini


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | charset = utf-8
11 | end_of_line = lf
12 | 
13 | [*.bat]
14 | indent_style = tab
15 | end_of_line = crlf
16 | 
17 | [LICENSE]
18 | insert_final_newline = false
19 | 
20 | [Makefile]
21 | indent_style = tab
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | * ricecooker version:
 2 | * Python version:
 3 | * Operating System:
 4 | 
 5 | ### Description
 6 | 
 7 | Describe what you were trying to get done.
 8 | Tell us what happened, what went wrong, and what you expected to happen.
 9 | 
10 | ### What I Did
11 | 
12 | ```
13 | Paste the command(s) you ran and the output.
14 | If there was a crash, please include the traceback here.
15 | ```
16 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # Automatically update versions for pip
 2 | 
 3 | version: 2
 4 | updates:
 5 |   # Maintain dependencies for Python
 6 |   - package-ecosystem: "pip"
 7 |     directory: "/"
 8 |     schedule:
 9 |       interval: "weekly"
10 |       day: "friday"
11 |       time: "00:00"
12 |   # Maintain dependencies for Github Actions
13 |   - package-ecosystem: "github-actions"
14 |     directory: "/"
15 |     schedule:
16 |       interval: "weekly"
17 |       day: "friday"
18 |       time: "00:00"
19 |     groups:
20 |       github:
21 |         patterns:
22 |           - "actions/*"
23 | 


--------------------------------------------------------------------------------
/.github/workflows/community-contribution-labeling.yml:
--------------------------------------------------------------------------------
 1 | name: Community Contribution Label
 2 | on:
 3 |   issues:
 4 |     types: [assigned, unassigned]
 5 | jobs:
 6 |   call-label-action:
 7 |     uses: learningequality/.github/.github/workflows/community-contribution-label.yml@main
 8 |     secrets:
 9 |       LE_BOT_APP_ID: ${{ secrets.LE_BOT_APP_ID }}
10 |       LE_BOT_PRIVATE_KEY: ${{ secrets.LE_BOT_PRIVATE_KEY }}
11 | 


--------------------------------------------------------------------------------
/.github/workflows/notify_team_new_comment.yml:
--------------------------------------------------------------------------------
 1 | name: Send a slack notification when a contributor comments on issue
 2 | on:
 3 |   issue_comment:
 4 |     types: [created]
 5 | jobs:
 6 |   contributor_issue_comment:
 7 |     uses: learningequality/.github/.github/workflows/notify_team_new_comment.yml@main
 8 |     secrets:
 9 |       SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
10 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | name: Linting
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - develop
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - develop
10 |       - main
11 | jobs:
12 |   pre_job:
13 |     name: Path match check
14 |     runs-on: ubuntu-latest
15 |     # Map a step output to a job output
16 |     outputs:
17 |       should_skip: ${{ steps.skip_check.outputs.should_skip }}
18 |     steps:
19 |       - id: skip_check
20 |         uses: fkirc/skip-duplicate-actions@master
21 |         with:
22 |           github_token: ${{ github.token }}
23 |           paths_ignore: '["**.po", "**.json"]'
24 |   linting:
25 |     name: All file linting
26 |     needs: pre_job
27 |     if: ${{ needs.pre_job.outputs.should_skip != 'true' }}
28 |     runs-on: ubuntu-latest
29 |     steps:
30 |       - uses: actions/checkout@v4
31 |       - uses: actions/setup-python@v5
32 |       - uses: pre-commit/action@v3.0.1
33 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using pypa/gh-action-pypi-publish when a release is created
 2 | 
 3 | name: Upload Python Package
 4 | on:
 5 |   release:
 6 |     types: [published]
 7 | jobs:
 8 |   deploy:
 9 |     runs-on: ubuntu-latest
10 |     permissions:
11 |       # IMPORTANT: this permission is mandatory for trusted publishing
12 |       id-token: write
13 |     steps:
14 |       - uses: actions/checkout@v4
15 |       - name: Set up Python
16 |         uses: actions/setup-python@v5
17 |         with:
18 |           python-version: 3.9
19 |       - name: Install dependencies
20 |         run: |
21 |           python -m pip install --upgrade pip
22 |           pip install setuptools wheel pre-commit
23 |       - name: Build distribution
24 |         run: make dist
25 |       - name: Publish package distributions to PyPI
26 |         uses: pypa/gh-action-pypi-publish@release/v1
27 | 


--------------------------------------------------------------------------------
/.github/workflows/unassign-inactive.yaml:
--------------------------------------------------------------------------------
 1 | name: "Unassign Inactive Contributors"
 2 | run-name: Unassign Inactive Contributors
 3 | on:
 4 |   schedule:
 5 |     - cron: "1 0 * * 1" # Every Monday at 00:01 UTC
 6 |   workflow_dispatch:
 7 | jobs:
 8 |   unassign-inactive:
 9 |     uses: learningequality/.github/.github/workflows/unassign-inactive-issues.yaml@main
10 |     secrets:
11 |       LE_BOT_APP_ID: ${{ secrets.LE_BOT_APP_ID }}
12 |       LE_BOT_PRIVATE_KEY: ${{ secrets.LE_BOT_PRIVATE_KEY }}
13 |       SLACK_COMMUNITY_NOTIFICATIONS_WEBHOOK_URL: ${{ secrets.SLACK_COMMUNITY_NOTIFICATIONS_WEBHOOK_URL }}
14 | 


--------------------------------------------------------------------------------
/.github/workflows/update-pr-spreadsheet.yml:
--------------------------------------------------------------------------------
 1 | name: Update community pull requests spreadsheet
 2 | on:
 3 |   pull_request_target:
 4 |     types: [assigned, unassigned, opened, closed, reopened]
 5 | jobs:
 6 |   call-update-spreadsheet:
 7 |     uses: learningequality/.github/.github/workflows/update-pr-spreadsheet.yml@main
 8 |     secrets:
 9 |       CONTRIBUTIONS_SPREADSHEET_ID: ${{ secrets.CONTRIBUTIONS_SPREADSHEET_ID }}
10 |       CONTRIBUTIONS_SHEET_NAME: ${{ secrets.CONTRIBUTIONS_SHEET_NAME }}
11 |       GH_UPLOADER_GCP_SA_CREDENTIALS: ${{ secrets.GH_UPLOADER_GCP_SA_CREDENTIALS }}
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Standard credentials (make sure never commited)
  3 | credentials/studio.json
  4 | credentials/studio_token.txt
  5 | credentials/.studiotoken
  6 | 
  7 | # Code examples not ready for prime time yet
  8 | 
  9 | 
 10 | # Byte-compiled / optimized / DLL files
 11 | __pycache__/
 12 | *.py[cod]
 13 | *$py.class
 14 | 
 15 | # C extensions
 16 | *.so
 17 | 
 18 | # Development notebooks
 19 | WIP
 20 | 
 21 | 
 22 | # Distribution / packaging
 23 | .Python
 24 | env/
 25 | build/
 26 | develop-eggs/
 27 | dist/
 28 | downloads/
 29 | eggs/
 30 | .eggs/
 31 | lib/
 32 | lib64/
 33 | parts/
 34 | sdist/
 35 | var/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | 
 40 | # PyInstaller
 41 | #  Usually these files are written by a python script from a template
 42 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 43 | *.manifest
 44 | *.spec
 45 | 
 46 | # Installer logs
 47 | pip-log.txt
 48 | pip-delete-this-directory.txt
 49 | 
 50 | # Unit test / coverage reports
 51 | htmlcov/
 52 | .tox/
 53 | .coverage
 54 | .coverage.*
 55 | .cache
 56 | nosetests.xml
 57 | coverage.xml
 58 | *,cover
 59 | .hypothesis/
 60 | .pytest_cache
 61 | 
 62 | # Translations
 63 | *.mo
 64 | *.pot
 65 | 
 66 | # Django stuff:
 67 | *.log
 68 | local_settings.py
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/drafts
 79 | docs/examples/drafts
 80 | docs/build/
 81 | docs/_build/
 82 | docs/ricecooker.classes.rst
 83 | docs/ricecooker.managers.rst
 84 | docs/ricecooker.utils.rst
 85 | 
 86 | # Links checker cache
 87 | .brokdb
 88 | 
 89 | # PyBuilder
 90 | target/
 91 | 
 92 | # IPython Notebook
 93 | .ipynb_checkpoints
 94 | 
 95 | # pyenv
 96 | .python-version
 97 | 
 98 | # celery beat schedule file
 99 | celerybeat-schedule
100 | 
101 | # dotenv
102 | .env
103 | 
104 | # virtualenv
105 | venv/
106 | venv3/
107 | ENV/
108 | 
109 | # Spyder project settings
110 | .spyderproject
111 | 
112 | # Rope project settings
113 | .ropeproject
114 | docs/out/
115 | docs/_build
116 | 
117 | storage/
118 | restore/
119 | 
120 | # nodejs modules
121 | node_modules/
122 | 
123 | sushi_chefs/
124 | tests/testcontent/downloaded/
125 | tests/testcontent/generated/
126 | tests/testcontent/youtubecache/
127 | video_cache_py3.sqlite
128 | .webcache
129 | .ricecookerfilecache
130 | 
131 | # IDE project dirs
132 | .idea/
133 | .vscode/
134 | .vim/
135 | 
136 | cache.sqlite
137 | 
138 | chefdata/
139 | audio_cache.sqlite
140 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/python/black
 3 |     rev: 21.12b0
 4 |     hooks:
 5 |       - id: black
 6 |         types_or: [python, pyi]
 7 |         additional_dependencies: ['click==8.0.4']
 8 |   - repo: https://github.com/pycqa/flake8
 9 |     rev: 7.1.1
10 |     hooks:
11 |       - id: flake8
12 |         exclude: (?x)(.*examples.*)
13 |   - repo: https://github.com/pre-commit/pre-commit-hooks
14 |     rev: v4.1.0
15 |     hooks:
16 |       - id: trailing-whitespace
17 |       - id: check-yaml
18 |       - id: check-added-large-files
19 |         exclude: '^tests/cassettes'
20 |       - id: debug-statements
21 |       - id: end-of-file-fixer
22 |         exclude: '^.+?\.json$'
23 |   - repo: https://github.com/asottile/reorder_python_imports
24 |     rev: v2.6.0
25 |     hooks:
26 |       - id: reorder-python-imports
27 |   - repo: https://github.com/google/yamlfmt
28 |     rev: v0.14.0
29 |     hooks:
30 |       - id: yamlfmt
31 |         exclude: '^tests/cassettes'
32 |   - repo: https://github.com/rhysd/actionlint
33 |     rev: v1.7.7
34 |     hooks:
35 |       - id: actionlint
36 |         additional_dependencies:
37 |           # actionlint has a shellcheck integration which extracts shell scripts in `run:` steps from GitHub Actions
38 |           # and checks these with shellcheck. This is arguably its most useful feature,
39 |           # but the integration only works if shellcheck is installed
40 |           - "github.com/wasilibs/go-shellcheck/cmd/shellcheck@v0.10.0"
41 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | formats: all
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: "3.11"
 7 |   jobs:
 8 |     pre_install:
 9 |       - pip install -e .
10 | sphinx:
11 |   configuration: docs/conf.py
12 | python:
13 |   install:
14 |     - requirements: docs/requirements.txt
15 | 


--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Credits
 3 | =======
 4 | 
 5 | * Jordan Yoshihara <jordan@learningequality.org>
 6 | * Aron Asor <aron@learningequality.org>
 7 | * Jamie Alexandre <jamie@learningequality.org>
 8 | * Benjamin Bach <ben@learningequality.org>
 9 | * Ivan Savov <ivan@learningequality.org>
10 | * David Hu <davidhu@learningequality.org>
11 | * Kevin Ollivier <kevin@learningequality.org>
12 | * Alejandro Martinez Romero <mara80@gmail.com>
13 | * Blaine Jester <blaine@learningequality.org>
14 | 
15 | 
16 | .. TODOC: use gource of ricecooker repo & embed video here
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016, 2017, 2018 Learning Equality.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | 
 2 | include AUTHORS.rst
 3 | include CONTRIBUTING.rst
 4 | include HISTORY.rst
 5 | include LICENSE
 6 | include README.md
 7 | 
 8 | recursive-include tests *
 9 | recursive-exclude tests/testcontent *
10 | recursive-exclude * __pycache__
11 | recursive-exclude * *.py[co]
12 | 
13 | recursive-include docs *.md *.rst conf.py Makefile make.bat *.jpg *.png *.gif
14 | 
15 | recursive-exclude docs/tutorial/storage *
16 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | .PHONY: clean clean-test clean-pyc clean-build docs help
  2 | .DEFAULT_GOAL := help
  3 | define BROWSER_PYSCRIPT
  4 | import os, webbrowser, sys
  5 | try:
  6 | 	from urllib import pathname2url
  7 | except:
  8 | 	from urllib.request import pathname2url
  9 | 
 10 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
 11 | endef
 12 | export BROWSER_PYSCRIPT
 13 | 
 14 | define PRINT_HELP_PYSCRIPT
 15 | import re, sys
 16 | 
 17 | for line in sys.stdin:
 18 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
 19 | 	if match:
 20 | 		target, help = match.groups()
 21 | 		print("%-20s %s" % (target, help))
 22 | endef
 23 | export PRINT_HELP_PYSCRIPT
 24 | BROWSER := python -c "$$BROWSER_PYSCRIPT"
 25 | 
 26 | help:
 27 | 	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
 28 | 
 29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
 30 | 
 31 | 
 32 | clean-build: ## remove build artifacts
 33 | 	rm -fr build/
 34 | 	rm -fr dist/
 35 | 	rm -fr .eggs/
 36 | 	find . -name '*.egg-info' -exec rm -fr {} +
 37 | 	find . -name '*.egg' -exec rm -f {} +
 38 | 
 39 | clean-pyc: ## remove Python file artifacts
 40 | 	find . -name '*.pyc' -exec rm -f {} +
 41 | 	find . -name '*.pyo' -exec rm -f {} +
 42 | 	find . -name '*~' -exec rm -f {} +
 43 | 	find . -name '__pycache__' -exec rm -fr {} +
 44 | 
 45 | clean-test: ## remove test and coverage artifacts
 46 | 	rm -fr .tox/
 47 | 	rm -f .coverage
 48 | 	rm -fr htmlcov/
 49 | 	rm -rf tests/testcontent/downloaded/*
 50 | 	rm -rf tests/testcontent/generated/*
 51 | 
 52 | lint: ## check style with flake8
 53 | 	flake8 ricecooker tests
 54 | 
 55 | test: clean-test ## run tests quickly with the default Python
 56 | 	pytest
 57 | 
 58 | 
 59 | test-all: clean-test ## run tests on every Python version with tox
 60 | 	tox
 61 | 
 62 | integration-test:
 63 | 	echo "Testing against hotfixes"
 64 | 	CONTENTWORKSHOP_URL=https://hotfixes.studio.learningequality.org python tests/test_chef_integration.py
 65 | 	echo "Testing against unstable"
 66 | 	CONTENTWORKSHOP_URL=https://unstable.studio.learningequality.org python tests/test_chef_integration.py
 67 | 	echo "Testing against production"
 68 | 	CONTENTWORKSHOP_URL=https://studio.learningequality.org python tests/test_chef_integration.py
 69 | 
 70 | coverage: ## check code coverage quickly with the default Python
 71 | 	pip install coverage pytest
 72 | 	coverage run --source ricecooker -m pytest
 73 | 	coverage report -m
 74 | 	coverage html
 75 | 	$(BROWSER) htmlcov/index.html
 76 | 
 77 | docsclean:
 78 | 	$(MAKE) -C docs clean
 79 | 	rm -f docs/_build/*
 80 | 
 81 | docs: ## generate Sphinx HTML documentation
 82 | 	pip install -r docs/requirements.txt
 83 | 	$(MAKE) -C docs clean
 84 | 	$(MAKE) -C docs html
 85 | 	# $(BROWSER) docs/build/html/index.html
 86 | 
 87 | latexdocs:
 88 | 	pip install -r docs/requirements.txt
 89 | 	$(MAKE) -C docs clean
 90 | 	$(MAKE) -C docs latex
 91 | 
 92 | servedocs: docs ## compile the docs watching for changes
 93 | 	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
 94 | 
 95 | dist: clean
 96 | 	pip install setuptools wheel
 97 | 	python setup.py sdist bdist_wheel
 98 | 
 99 | release: dist ## package and upload a release
100 | 	pip install twine
101 | 	twine upload dist/*
102 | 
103 | install: clean ## install the package to the active Python's site-packages
104 | 	python setup.py install
105 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ricecooker
 2 | ==========
 3 | [![PyPI pyversions](https://img.shields.io/pypi/pyversions/ricecooker.svg)](https://pypi.python.org/pypi/ricecooker/)
 4 | [![build](https://github.com/learningequality/ricecooker/actions/workflows/pythontest.yml/badge.svg?branch=master)](https://github.com/learningequality/ricecooker/actions)
 5 | [![docs](https://readthedocs.org/projects/ricecooker/badge/?version=latest&style=flat)](https://ricecooker.readthedocs.io/)
 6 | 
 7 | 
 8 | The `ricecooker` library is a framework for automating the conversion of educational content into
 9 | Kolibri content channels and uploading them to [Kolibri Studio](https://studio.learningequality.org/),
10 | which is the central content server for [Kolibri](http://learningequality.org/kolibri/).
11 | 
12 | 
13 | ## [📚 Ricecooker docs](https://ricecooker.readthedocs.io/)
14 | 
15 | Visit the documentation site at [📚 ricecooker.readthedocs.io](https://ricecooker.readthedocs.io/)
16 | for the full details about [installation](https://ricecooker.readthedocs.io/en/latest/installation.html),
17 | [getting started](https://ricecooker.readthedocs.io/en/latest/tutorial/gettingstarted.html),
18 | [API reference](https://ricecooker.readthedocs.io/en/latest/index_api_reference.html),
19 | and [code examples](https://ricecooker.readthedocs.io/en/latest/examples/index.html).
20 | 
21 | 
22 | 
23 | ## Overview of Kolibri content
24 | 
25 | `ricecooker` is used to take openly licensed educational content available on the
26 | web and convert it into an offline-friendly package that can be imported into Kolibri.
27 | 
28 | The basic process of getting new content into Kolibri is as follows:
29 | 
30 |  - **UPLOAD** your content to Kolibri Studio either manually through the Kolibri Studio
31 |    web interface or programmatically using a `ricecooker`-based content integration script.
32 |  - **PUBLISH** the channel on Kolibri Studio to make it accessible for use in Kolibri.
33 |  - **IMPORT** the the channel into Kolibri using the channel token displayed in
34 |    Kolibri Studio after the PUBLISH step is done.
35 | 
36 | The diagram below illustrates how content flows within the Kolibri ecosystem
37 | and highlights the part which is covered by the `ricecooker` framework (bottom left).
38 | 
39 | ![Overview of steps for integrating external content sources for use in the Kolibri Learning Platform](docs/figures/content_pipeline_diagram_with_highlight.png)
40 | *External content sources (left) are first uploaded to [Kolibri Studio](https://studio.learningequality.org/) (middle), so they can be used in the [Kolibri Learning Platform](http://learningequality.org/kolibri/) (right).*
41 | 
42 | 
43 | 
44 | 
45 | ##### Further reading
46 | The [Ricecooker docs](https://ricecooker.readthedocs.io/) website is the best
47 | place to learn about writing automated content integration scripts.
48 | 
49 | Here are some links to other documents and guides you can read to learn about
50 | the other parts of the Kolibri content platform:
51 | 
52 |   - The [Kolibri Content Integration Guide](https://learningequality.org/r/integration-guide)
53 |     is a comprehensive guide to the decisions, processes, and tools for integrating
54 |     external content sources for use in the Kolibri Learning Platform.
55 |   - Read the [Kolibri Studio docs](http://kolibri-studio.readthedocs.io/en/latest/)
56 |     to learn more about the Kolibri Studio features
57 |   - Read the [Kolibri docs](http://kolibri.readthedocs.io/en/latest/) to learn
58 |     how to install Kolibri on your machine (useful for testing channels)
59 | 


--------------------------------------------------------------------------------
/docs/404.rst:
--------------------------------------------------------------------------------
 1 | :orphan:
 2 | 
 3 | Page not found
 4 | ==============
 5 | 
 6 | It seems you are searching for a topic that has been moved elsewhere in our documentation. Please try the following to find what you were looking for:
 7 | 
 8 | * Browse the table of content in the sidebar
 9 | * Use the search box
10 | 
11 | 
12 |  We apologize for the inconvenience!
13 | 


--------------------------------------------------------------------------------
/docs/README.rst:
--------------------------------------------------------------------------------
 1 | :orphan:
 2 | 
 3 | .. Note this page exists for backward compatibility (since we sent this link)
 4 |    to partners, we don't want them to hit a 404.
 5 | 
 6 | 
 7 | Ricecooker
 8 | ==========
 9 | 
10 | The following links will get you started with content integration process in no time!
11 | 
12 |  - `Install <installation.html>`_  Python, the ``ricecooker`` package, and system prerequisites (5–20 mins)
13 |  - `Getting started <tutorial/gettingstarted.html>`_: upload your first channel to Kolibri Studio and import it in Kolibri (10 mins)
14 |  - For more info see the `ricecooker docs main page <../index.html>`_ 📚.
15 | 
16 | Welcome to the team!
17 | 
18 | 
19 | License
20 | -------
21 | 
22 | .. image:: https://i.creativecommons.org/l/by-sa/4.0/88x31.png
23 |    :alt: Creative Commons License
24 | 
25 | This work is licensed under a `Creative Commons Attribution-ShareAlike 4.0 International License <http://creativecommons.org/licenses/by-sa/4.0/>`__
26 | 


--------------------------------------------------------------------------------
/docs/_templates/footer.html:
--------------------------------------------------------------------------------
 1 | {% extends "!footer.html" %}
 2 | {%- block extrafooter %}
 3 |     <span>
 4 |       &copy; {% trans %}Copyright{% endtrans %} {{ copyright }}
 5 |     </span><br />
 6 |     <span>
 7 |       Licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.
 8 |     </span>
 9 |     {{ super() }}
10 | {% endblock %}
11 | 


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | {% extends "!layout.html" %}
 2 |   {% block footer %} {{ super() }}
 3 | 
 4 |   <style>
 5 | 
 6 |     /* Sidebar header (and topbar for mobile) */
 7 |     .wy-side-nav-search, .wy-nav-top {
 8 |       background: #996189 !important;
 9 |     }
10 | 
11 |     /* Let there be width */
12 |     /* .wy-nav-content {
13 |         max-width: 1000px;
14 |     } */
15 | 
16 | 
17 |     /* div {
18 |         border: 1px solid red;
19 |     } */
20 | 
21 |     .row {clear: both}
22 | 
23 |     @media (min-width: 800px) {
24 | 
25 |         .column {
26 |             padding-left: 8px;
27 |             padding-right: 8px;
28 |             float: left;
29 |         }
30 | 
31 |         .firstcolumn {
32 |             padding-right: 8px;
33 |             float: left;
34 |         }
35 | 
36 |         .lastcolumn {
37 |             padding-left: 8px;
38 |             float: left;
39 |         }
40 | 
41 | 
42 |         .column3  {
43 |             width: 33.3%;
44 |             font-size: 80%;
45 |         }
46 | 
47 |         .column2  {
48 |             width: 50%;
49 |         }
50 |     }
51 | 
52 |     h2, h3, h4 {
53 |       margin-bottom: 10px;
54 |     }
55 | 
56 | 
57 |     /* push down the RTD banner so it's not confused with menu */
58 |     .keep-us-sustainable {
59 |       margin-top: 20em !important;
60 |     }
61 | 
62 |   </style>
63 | {% endblock %}
64 | 


--------------------------------------------------------------------------------
/docs/community/authors.rst:
--------------------------------------------------------------------------------
1 | ../../AUTHORS.rst
2 | 


--------------------------------------------------------------------------------
/docs/community/contributing.md:
--------------------------------------------------------------------------------
1 | ../../CONTRIBUTING.md
2 | 


--------------------------------------------------------------------------------
/docs/community/index.rst:
--------------------------------------------------------------------------------
 1 | Community
 2 | =========
 3 | Learn how to contribute to the project and how you can become part of the community
 4 | of content developers working to integrate content into the Kolibri platform,
 5 | and more broadly into all offline learning tools.
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 1
 9 | 
10 |    contributing
11 |    authors
12 |    ../history
13 | 
14 | .. TODOC: volunteers progam, step-by-step guide to small contribution, etc.
15 | 


--------------------------------------------------------------------------------
/docs/concepts/content_workflows.md:
--------------------------------------------------------------------------------
 1 | Content integration methods
 2 | ===========================
 3 | 
 4 | There are two methods that you can use to create Kolibri channels:
 5 | 
 6 |   * **Manual content upload**:
 7 |     This method is suitable for content that is saved on your local computer such as files or folders.
 8 |     You can directly upload your content through the Kolibri Studio web interface.
 9 |     This method is appropriate for small and medium content sets.
10 |     See the [Kolibri Studio User Guide](https://kolibri-studio.readthedocs.io/en/latest/) for more information.
11 | 
12 |   * **Uploading content using a content integration script**:
13 |     You can use a content integration script (a.k.a. sushichef script) to
14 |     integrate content from websites, content repositories, APIs, or other external sources.
15 |     A content integration script is a Python program.
16 | 
17 | 
18 | More information about each of these methods provided below.
19 | 
20 | 
21 | ## Manual content upload
22 | You can use the [Kolibri Studio](https://studio.learningequality.org/) web interface
23 | to upload various content types and organize them into channels. Kolibri Studio
24 | allows you to explore pre-organized libraries of open educational resources,
25 | and reuse them in your channels. You can also add tags, re-order, re-mix content,
26 | and create exercises to support student's learning process.
27 | 
28 | To learn more about Studio, we recommend reading the following pages in the
29 | [Kolibri Studio User Guide](https://kolibri-studio.readthedocs.io/en/latest/):
30 |   - [Accessing Studio](https://kolibri-studio.readthedocs.io/en/latest/access_studio.html)
31 |   - [Working with channels](https://kolibri-studio.readthedocs.io/en/latest/working_channels.html)
32 |   - [Adding content to channels](https://kolibri-studio.readthedocs.io/en/latest/add_content.html)
33 | 
34 | When creating large channels (100+ content items) or channels that need to be
35 | updated regularly, you should consider using a content integration script,
36 | as described below.
37 | 
38 | 
39 | 
40 | 
41 | ## Content integration scripts
42 | 
43 | The [`ricecooker`](https://github.com/learningequality/ricecooker) framework is a
44 | tool that programmers can use to upload content to Kolibri Studio in an automated
45 | fashion. We refer to these import scripts as **sushi chefs**, because their job
46 | is to chop-up the source material (e.g. an educational website) and package the
47 | content items into tasty morsels (content items) with all the associated metadata.
48 | 
49 | Using the bulk import option requires the a content developer (sushi chef author)
50 | to prepare the content, content metadata, and run the chef script to perform the
51 | upload to Kolibri Studio.
52 | 
53 | Educators and content specialists can assist the developers by preparing a **spec sheet**
54 | for the content source that provides detailed guidance for how content should be
55 | structured and organized within the channel. The content specialist also plays a role
56 | during the channel [review process](reviewing_channels.md).
57 | 
58 | 
59 | 
60 | The following alternative options are available for specifying the metadata for
61 | content nodes that can be used in special circumstances.
62 | 
63 | ### CSV metadata workflow
64 | In addition to the web interface and the Python interface (`ricecooker`), there
65 | exists an option for creating Kolibri channels by:
66 |   - Organizing content items (documents, videos, mp3 files) into a folder hierarchy on the local file system
67 |   - Specifying metadata in the form of CSV files created using Excel
68 | 
69 | The CSV-based workflow is a good fit for non-technical users since it doesn't
70 | require writing any code, but instead can use Excel to provide all the metadata.
71 | 
72 |   - [CSV-based workflow README](https://github.com/learningequality/sample-channels/tree/master/channels/csv_channel)
73 |   - [Example content folder](https://github.com/learningequality/sample-channels/tree/master/channels/csv_exercises/content)
74 |   - [Example Channel.csv metadata file](https://github.com/learningequality/sample-channels/blob/master/channels/csv_channel/content/Channel.csv)
75 |   - [Example Content.csv metadata file](https://github.com/learningequality/sample-channels/blob/master/channels/csv_channel/content/Content.csv)
76 |   - [CSV-based exercises info](https://github.com/learningequality/sample-channels/tree/master/channels/csv_exercises)
77 | 
78 | Organizing the content into folders and creating the CSV metadata files is most
79 | of the work, and can be done by non-programmers.
80 | The generic sushi chef script (`LineCook`) is then used to upload the channel.
81 | 


--------------------------------------------------------------------------------
/docs/concepts/index.rst:
--------------------------------------------------------------------------------
 1 | Concepts
 2 | ========
 3 | 
 4 | The purpose of this page is to help you understand how content integration work
 5 | fits more broadly within the Kolibri ecosystem.
 6 | 
 7 | The links below establish the shared vocabulary to facilitate communication between
 8 | content partners, the Learning Equality team, Kolibri users, and other stakeholders.
 9 | 
10 | .. toctree::
11 |    :maxdepth: 1
12 | 
13 |    introduction
14 |    terminology
15 |    content_workflows
16 |    developer_workflows
17 |    reviewing_channels
18 | 


--------------------------------------------------------------------------------
/docs/concepts/introduction.md:
--------------------------------------------------------------------------------
 1 | Kolibri content ecosystem overview
 2 | ==================================
 3 | 
 4 | Educational content in the Kolibri platform is organized into **content channels**.
 5 | The `ricecooker` framework is used for creating content channels and uploading them
 6 | to [Kolibri Studio](https://studio.learningequality.org/), which is the central
 7 | content server that [Kolibri](https://learningequality.org/kolibri/) applications
 8 | talk to when importing their content.
 9 | 
10 | Content flow within the Kolibri ecosystem is pictured below.
11 | 
12 | ![The Kolibri Content Pipeline](../figures/content_pipeline_diagram.png)
13 | 
14 | This `ricecooker` framework is the main tool used to facilitate **Integration Method 2**.
15 | 
16 | 
17 | 
18 | Kolibri channels
19 | ----------------
20 | A Kolibri channel is the combination of a topic tree (a nested folder structure)
21 | and number of self-contained "content items" packaged for offline use and distribution.
22 | Each content item within the channel is represented as a content node with one
23 | or more files associated with it. In summary, a channel is a nested structure of
24 | `TopicNodes` (folders) that contain `ContentNode` objects similar to how files
25 | are organized into folders on computers.
26 | 
27 | The Kolibri channel is the fundamental structure common to all parts of the Kolibri ecosystem:
28 | the Kolibri Learning Platform is where Kolibri channels are used by learners and teachers,
29 | Kolibri Studio is the editor for Kolibri Channels (think five Rs),
30 | and Ricecooker scripts are used for content integrations that pull in OER from
31 | external sources, package them for offline use, and upload them to Kolibri Studio.
32 | 
33 | 
34 | Supported content kinds
35 | -----------------------
36 | Kolibri channels are tree-like structures that consist of the following types of nodes:
37 | 
38 |   * **Topic nodes** (folders): the nested folders structure is the is main way of
39 |     representing structured content in Kolibri. Depending on the particular channel,
40 |     a topic node could be a language, a subject, a course, a unit, a module, a section,
41 |     a lesson, or any other structural element. Rather than impose a particular fixed structure,
42 |     we let educators decide the folder structure that is best suited for the learners needs.
43 | 
44 |   * **Content nodes**:
45 | 
46 |      - Document (either an `epub` or a `pdf` file)
47 |      - Audio (`mp3` files of audio lessons, audiobooks, podcasts, radio shows, etc.)
48 |      - Video (`mp4` files with `h264` video codec and `aac` audio codec)
49 |      - HTML5App (`zip` files containing web content like HTML, JavaScript, css and images)
50 |      - H5PApp (self-contained `h5p` files)
51 |      - Slideshow (a sequence of `jpg` and `png` slide images)
52 |      - Exercises containing questions like multiple choice, multiple selection, and numeric inputs
53 | 
54 | 
55 | Further reading
56 | ---------------
57 |   - [Kolibri channel](https://kolibri.readthedocs.io/en/latest/manage/resources.html#channels-and-resources)
58 |     as explained in the Kolibri documentation.
59 |   - [Kolibri Studio User Guide](https://kolibri-studio.readthedocs.io/en/latest/index.html)
60 | 


--------------------------------------------------------------------------------
/docs/concepts/reviewing_channels.md:
--------------------------------------------------------------------------------
 1 | Reviewing Kolibri content channels
 2 | ==================================
 3 | Every content channel on the Kolibri platform benefits from a the review process
 4 | that ensures the content structure, metadata, and functionality is up to standard.
 5 | This is broadly referred to as "channel review," "providing feedback," or "QA."
 6 | Everyone on the LE team is a potential channel reviewer, and external partners
 7 | can also be asked to review channels when they have capacity.
 8 | 
 9 | 
10 | 
11 | Issue tracker
12 | -------------
13 | Channel reviewers can use the "Issue tracker" table to report problems so that
14 | developers responsible for creating the channel can address them.
15 | 
16 | ### Issue tracker columns
17 |   - `Issue ID`: internal numeric identifier
18 |     (or `github:nn` for two-way-synced issues with the chef's github repo)
19 |   - `Type` (multi select): what type of issue is this (see full list of options below)
20 |   - `Severity` (Blocker || Nice to have): how bad is the issue
21 |   - `URL`: A link to studio, a demo server, or the source website where the issue is visible
22 |   - `Screenshots` (files): screenshot that shows the issue in action
23 |   - `Issue description` (text): provide detailed description of what the issue is, how to reproduce, and any additional info (e.g. copy-paste of errors from the JavaScript console)
24 |   - `Possible fixes` (text): provide suggestions (technical or not) for how issue could be fixed and ideas for workarounds
25 |   - `Assigned to` (notion user): track the person that is supposed to fix this issue
26 |   - `Status` (Not started||In progress||Fixed): track progress on issue fix
27 |   - `Created`: record the date when the issue was added
28 |   - `Created by`: record who filed the issue
29 | 
30 | #### Issue types
31 |   - `Missing content`: some content from the source was not imported
32 |   - `Structure`: problem with the channel structure
33 |   - `Title`: problem with titles, e.g. titles that are too long or not informative
34 |   - `Description`: use to flag description problems (non-informative or repeating junk text)
35 |   - `Metadata`: problem with metadata associated with nodes (language, licensing info, author, role visibility, tags)
36 |   - `Thumbnails`: flag broken or missing thumbnails on the channel, topics, or content nodes
37 |   - `Display issue`:  the content doesn't look right (HTML/CSS issues) or doesn't work as expected (JavaScript issues)
38 |   - `Learning UX`: any problem that might interfere with learning user experience
39 |   - `Video compression`: if videos are not compressed enough (files too large)
40 |     or alternatively too compressed (cannot read text)
41 |   - `Bulk corrections`: flag issues that might require bulk metadata edits on numerous content nodes
42 |   - `Translation`: content files or metadata are partially or completely in the wrong language
43 |   - `Enhancement`: use to keep track of possible enhancements or additions that could be made to improve coach or learner experience
44 | 
45 | 
46 | #### Issue severity
47 |   - `Blocker`: this issue must be fixed before the channel can go into QA
48 |   - `Nice to have`: non-blocking issues like corrections, enhancements,
49 |     and minor learning UX problems
50 | 
51 | 
52 | 
53 | Who can be a channel reviewer?
54 | ------------------------------
55 | You can. Whenever you need a distraction, take 20 minutes and place yourself in
56 | the learner's shoes and go explore the channel on the demo server link provided
57 | on the notion card.  If you notice any issues  while browsing, add them to the
58 | Issue tracker table. That's it. Learn something today.
59 | 


--------------------------------------------------------------------------------
/docs/concepts/terminology.md:
--------------------------------------------------------------------------------
 1 | Terminology
 2 | ===========
 3 | 
 4 | This page lists key concepts and technical terminology used as part of the
 5 | content integration work within Learning Equality.
 6 | 
 7 | 
 8 | Content Pipeline
 9 | ----------------
10 | The combination of software tools and procedures used to convert content
11 | from an external content source to becoming a Kolibri Channel available
12 | for use in the Kolibri Learning Platform. The Kolibri Content Pipeline is
13 | a collaborative effort between educational experts and software developers.
14 | 
15 | 
16 | 
17 | Channel Spec
18 | ------------
19 | A content specification document, or Channel Spec, is a blueprint document
20 | that specifies the structure of the Kolibri channel that is to be created.
21 | 
22 | Channel Specs are an important aspect of the content integration process for two reasons:
23 | 
24 | 1. It specifies what needs to be done.
25 |    The channel spec establishes an agreement between the curriculum specialist
26 |    and the developer who will be writing the content integration script.
27 | 
28 | 2. It serves to define when the work is done.
29 |    Used as part of the [review process](reviewing_channels.md) to know when the
30 |    channel is "Spec Compliant," i.e. the channel structure in Kolibri matches the blueprint.
31 | 
32 | A Channel Spec document includes the following information:
33 | 
34 |  - Channel Title: usually of the form `{Source Name} ({lang})` where `{Source Name}`
35 |    is chosen to be short and descriptive, and `{lang}` is included in the title
36 |    to make it easy to search for content in this language.
37 |  - Channel Description: a description (up to 400 characters) of the channel and its contents.
38 |  - Languages: notes about content language, and special handling for multilingual content, subtitles, or missing translations
39 |  - Files Types: info about what content kinds and file types to look for
40 |  - Channel Structure: a specification of the desired topic structure for the channel.
41 |    This is the key element in the Channel Spec and often requires domain expertise
42 |    to take into account the needs of the teachers and learners who will be accessing this content.
43 |  - Links and sample content
44 |  - Credentials: info about how to access the content (e.g. info about API access)
45 |  - Technical notes: The Channel Spec can include guidance about technical aspects
46 |    like content transformations (for example, the need to compress the videos so that they take up less space).
47 | 
48 | For more info about each of these aspects, see the section "Creating a Content Channel Spec"
49 | in the [Kolibri Content Integration Guide](https://learningequality.org/r/integration-guide).
50 | 
51 | 
52 | Content Integration Script (aka SushiChef)
53 | ------------------------------------------
54 | The content integration scripts that use the `ricecooker` library to
55 | generate Kolibri Channels are commonly referred to as **SushiChef**
56 | scripts. The responsibility of a `SushiChef` script is to download the source
57 | content, perform any necessary format or structure conversions to create
58 | a content tree viewable in Kolibri, then to upload the output of this
59 | process to Kolibri Studio for review and publishing.
60 | 
61 | Conceptually, `SushiChef` scripts are very similar to web scrapers,
62 | but with specialized functions for optimizing the content for Kolibri's
63 | data structures and capabilities.
64 | 


--------------------------------------------------------------------------------
/docs/csv_metadata/README.rst:
--------------------------------------------------------------------------------
1 | :orphan:
2 | 
3 | .. This is a added to avoid 404s.
4 | 
5 | See the `index page <index.html>`__ to learn about CSV metadata workflow.
6 | 


--------------------------------------------------------------------------------
/docs/csv_metadata/csv_workflow.md:
--------------------------------------------------------------------------------
 1 | CSV Metadata Workflow
 2 | =====================
 3 | 
 4 | It is possible to create Kolibri channels by:
 5 |   - Organizing content items (documents, videos, mp3 files) into a folder hierarchy
 6 |     on the local file system
 7 |   - Specifying metadata in the form of CSV files
 8 | 
 9 | 
10 | The CSV-based workflow is a good fit for non-technical users since it doesn't
11 | require writing any code, but instead can use the Excel to provide all the metadata.
12 | 
13 |   - [CSV-based workflow README](https://github.com/learningequality/sample-channels/tree/master/channels/csv_channel)
14 |   - [Example content folder](https://github.com/learningequality/sample-channels/tree/master/channels/csv_exercises/content)
15 |   - [Example Channel.csv metadata file](https://github.com/learningequality/sample-channels/blob/master/channels/csv_channel/content/Channel.csv)
16 |   - [Example Content.csv metadata file](https://github.com/learningequality/sample-channels/blob/master/channels/csv_channel/content/Content.csv)
17 | 
18 | Organizing the content into folders and creating the CSV metadata files is most
19 | of the work, and can be done by non-programmers.
20 | The generic sushi chef script (`LineCook`) is then used to upload the channel.
21 | 
22 | 
23 | CSV Exercises
24 | -------------
25 | You can also use the CSV metadata workflow to upload simple exercises to Kolibri Studio.
26 | See [this doc](./csv_exercises.md) for the technical details about creating exercises.
27 | 


--------------------------------------------------------------------------------
/docs/csv_metadata/index.rst:
--------------------------------------------------------------------------------
 1 | Spreadsheet Metadata Workflow
 2 | =============================
 3 | 
 4 | It is possible to create Kolibri channels by specifying all the channel metadata
 5 | in the form of spreadsheet or CSV files instead of through Python code.
 6 | 
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 1
10 | 
11 |    csv_workflow
12 |    csv_exercises
13 | 


--------------------------------------------------------------------------------
/docs/developer/corrections.md:
--------------------------------------------------------------------------------
  1 | Studio bulk corrections
  2 | =======================
  3 | The command line script `corrections` allows to perform bulk corrections of
  4 | titles, descriptions, and other attributes for the content nodes of a channel.
  5 | 
  6 | 
  7 | Use cases:
  8 |   - Bulk modify titles and descriptions (e.g. to fix typos)
  9 |   - Translate titles and/or descriptions (for sources with missing structure translations)
 10 |   - Enhance content by adding description (case by case detail work done during QA)
 11 |   - Add missing metadata like author, copyright holder, and tags to content nodes
 12 |   - Perform basic structural edits to channel (remove unwanted topics and content nodes)
 13 | 
 14 | Not use cases:
 15 |   - Modify a few node attributes (better do manually through the Studio web interface)
 16 |   - Structural changes (the corrections workflow does not support node moves)
 17 |   - Global changes (if the same modification must be performed on all nodes in the
 18 |     channel, it would be better to implement these changes during cheffing)
 19 | 
 20 | 
 21 | Credentials
 22 | -----------
 23 | In order to use the corrections workflow as part of a chef script, you need to
 24 | create the file `credentials/studio.json` in the chef repo that contains the
 25 | following information:
 26 | 
 27 |     {
 28 |       "token": "YOURTOKENHERE9139139f3a23232fefefefefefe",
 29 |       "username": "your.name@yourdomain.org",
 30 |       "password": "yourstudiopassword",
 31 |       "studio_url": "https://studio.learningequality.org"
 32 |     }
 33 | 
 34 | These credentials will be used to make the necessary Studio API calls. Make sure
 35 | you have edit rights for this channel.
 36 | 
 37 | 
 38 | Corrections workflow
 39 | --------------------
 40 | The starting point is an existing channel available on Studio, which we will
 41 | identify through its Channel ID, denoted `<channel_id>` in code examples below.
 42 | 
 43 | ### Step 1: Export the channel metadata to CSV
 44 | Export the complete metadata of the source channel as a local `.csv` file using:
 45 | 
 46 |     corrections export <channel_id>
 47 | 
 48 | This will create the file `corrections-export.csv` which can be opened with a
 49 | spreadsheet program (e.g. LibreOffice). In order to allow for collaboration,
 50 | the content of the spreadsheet must be copied to a shared google sheet with
 51 | permissions set to allow external edits.
 52 | 
 53 | 
 54 | ### Step 2: Edit metadata
 55 | In this step the content expert (internal or external) edits the metadata for
 56 | each content node in the shared google sheet.
 57 | The possible actions (first column) to apply to each row are as follows:
 58 |   - `modify`: to apply metadata modifications to the topic or content node
 59 |   - `delete`: to remove the topic or content node from the channel
 60 |   - Leaving the Action column blank will leave the content node unchanged
 61 | 
 62 | All rows with the `modify` keyword in the Action column will undergo metadata
 63 | modifications according to the text specified in the `New *` columns of the sheet.
 64 | 
 65 | For example, to correct typos in the title and description of a content node you must:
 66 |   - Mark the row with Action=`modify` (first column)
 67 |   - Add the desired title text in the column `New Title`
 68 |   - Add the desired description text in the column `New Description`
 69 | 
 70 | Note that not all metadata columns need to be specified. The choice of fields
 71 | that will be edited during the `modify` operation will be selected in the next step.
 72 | 
 73 | 
 74 | ### Step 3: Apply the corrections from a google sheet
 75 | Once the google sheet has been edited to contain all desired changes in the
 76 | `New *` columns, the next step is apply the corrections:
 77 | 
 78 |     corrections apply <channel_id> --gsheet_id='<gsheet_id>' --gid=<gsheet_gid>
 79 | 
 80 | where `<gsheet_id>` is the google sheets document identifier (take from the URL)
 81 | and `<gsheet_gid>` is identifier of the particular sheet within the spreadsheet
 82 | document that contains the corrections (usually `<gsheet_gid>=0`).
 83 | 
 84 | The attributes that will be edited during the `modify` operation is specified
 85 | using the `--modifyattrs` command line argument. For example to apply modifications
 86 | only to the `title` and `description` attributes use the following command:
 87 | 
 88 |     corrections apply <channel_id> --gsheet_id='<gsheet_id>' --gid=<gsheet_gid> --modifyattrs='title,description'
 89 | 
 90 | Using the above command will apply only the modifications only from the
 91 | `New Title` and `New Description` columns and ignore modifications to copyright holder,
 92 | author, and tags attributes.
 93 | The default settings is `--modifyattrs=title,description,author,copyright_holder`.
 94 | 
 95 | 
 96 | Status
 97 | ------
 98 | Note the corrections workflows is considered "experimental" and to be used only
 99 | when no other options are viable (too many edits to do manually through the Studio
100 | web interface).
101 | 


--------------------------------------------------------------------------------
/docs/developer/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Developer docs
 3 | ==============
 4 | To learn about the inner workings of the ``ricecooker`` library, consult the following pages:
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 1
 8 | 
 9 |    ../chefops
10 |    sushops
11 |    kolibripreview
12 |    ids
13 |    uploadprocess
14 |    design_cli
15 |    corrections
16 | 


--------------------------------------------------------------------------------
/docs/developer/sushops.md:
--------------------------------------------------------------------------------
 1 | SushOps
 2 | =======
 3 | SushOps engineers (also called ETL engineers) are responsible for making sure
 4 | the overall content pipeline runs smoothly. Assuming the [chefops](../chefops)
 5 | is done right, running the chef script should be as simple as running a single command.
 6 | SushOps engineers need to make sure not only that chef is running correctly,
 7 | but also monitor content in Kolibri Studio, in downstream remixed channels,
 8 | and in Kolibri installations.
 9 | 
10 | SushOps is an internal role to Learning Equality but we'll document the responsibilities
11 | here for convenience, since this role is closely related to the `ricecooker` library.
12 | 
13 | 
14 | 
15 | Project management and support
16 | ------------------------------
17 | SushOps manage and support developers working on new chefs scripts, by reviewing
18 | spec sheets, writing technical specs, crating necessary git repos, reviewing
19 | pull requests, chefops, and participating in QA.
20 | 
21 | 
22 | Cheffing servers
23 | ----------------
24 | Chef scripts run on various cheffing servers, equipped with appropriate storage
25 | space and processing power (if needed for video transcoding). Currently we have:
26 |   - CPU-intensive chefs running on `vader`
27 |   - various other chefs running on partner orgs infrastructure
28 | 
29 | ### Cheffing servers conventions
30 |   - Put all the chef repos in `/data` (usually a multi-terabyte volume), e.g.,
31 |     use the directory `/data/sushi-chef-{{nickname}}/` for the `nickcname` chef.
32 |   - Use the name `sushichef.py` for the chef script
33 |   - Document all the instructions and options needed to run the chef script in
34 |     the chef's `README.md`
35 |   - Use the directory `/data/sushi-chef-{{nickname}}/chefdata/tmp/` to store tmp
36 |     files to avoid cluttering the global `/tmp` directory.
37 |   - For long running chefs, use the command `nohup  <chef cmd>  &` to run the chef
38 |     so you can close the ssh session (hangup) without the process being terminated.
39 | 
40 | 
41 | 
42 | SushOps tooling and automation
43 | ------------------------------
44 | Some of the more repetitive system administration tasks have been automated using `fab` commands:
45 | 
46 |     fab -R vader   setup_chef:nickname     # clones the nickname repo and installs requirements
47 |     fab -R vader   update:nickname         # git fetch and git reset --hard to get latest chef code
48 |     fab -R vader   run_chef:nickname       # runs the chef
49 | 
50 | See the [content-automation-scripts](https://github.com/learningequality/content-automation-scripts)
51 | project for more details.
52 | 


--------------------------------------------------------------------------------
/docs/examples/detokenify.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl -pi
 2 | # multi-line in place substitute
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | BEGIN {undef $/;}
 7 | 
 8 | # remove access tokens in case left by mistake
 9 | # ####################################################################
10 | s/a5c5fb[\da-f]{34}/YOURTOKENHERE9139139f3a23232/g;
11 | s/70aec3[\da-f]{34}/YOURTOKENHERE9139139f3a23232/g;
12 | s/563554[\da-f]{34}/YOURTOKENHERE9139139f3a23232/g;
13 | 


--------------------------------------------------------------------------------
/docs/examples/index.rst:
--------------------------------------------------------------------------------
 1 | Examples
 2 | ========
 3 | 
 4 | Below are some examples that demonstrate certain aspects of the content integration
 5 | process that require careful consideration and are best explained in code:
 6 | 
 7 | 
 8 | .. toctree::
 9 |    :titlesonly:
10 | 
11 |    Learn how to work with language codes <languages>
12 |    How to create exercises and questions <exercises>
13 |    Document conversions <document_conversion>
14 |    Step-by-step tutorial <https://github.com/learningequality/ricecooker/blob/master/examples/tutorial/sushichef.py>
15 |    Wikipedia scraping example <https://github.com/learningequality/ricecooker/blob/master/examples/wikipedia/sushichef.py>
16 |    Kitchen sink example that includes all content kinds <https://github.com/learningequality/sample-channels/blob/master/channels/ricecooker_channel/sushichef.py>
17 | 
18 | 
19 | Jupyter notebooks
20 | -----------------
21 | Jypyter notebooks are a very powerful tool for interactive programming.
22 | You type in commands into an online shell, and you immediately see the results.
23 | 
24 | To install jupyter notebook on your machine, you run:
25 | 
26 | .. code::
27 | 
28 |     pip install jupyter
29 | 
30 | then to start the jupyter notebook server, run
31 | 
32 | .. code::
33 | 
34 |     jupyter notebook
35 | 
36 | If you then navigate to the directory `docs/examples/` in the ricecooker source
37 | code repo, you'll find the same examples described above in the form of runnable
38 | notebooks that will allow you to experiment and learn hands-on.
39 | 
40 | 
41 | You'll need to press CTRL+C in the terminal to stop the jupyter notebook server,
42 | or use the Shutdown button in the web interface.
43 | 
44 | Watch the beginning of this `Video tutorial <http://35.196.115.213/en/learn/#/topics/c/1ef68d0dcb52555f9b63f15f36f77b54>`__
45 | to learn how to use the Jypyter notebook environment for interactively coding parts of the chef logic.
46 | 
47 | .. raw:: html
48 | 
49 |    <a href="http://35.196.115.213/en/learn/#/topics/c/1ef68d0dcb52555f9b63f15f36f77b54" target="_blank">
50 |    <iframe width="560" height="315" src="https://www.youtube.com/embed/vnMCeHQYcBU" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
51 |    </a>
52 |    <div style="height:60px;">&nbsp;</div>
53 | 
54 | 
55 | Advanced examples
56 | -----------------
57 | The links below will take you to the GitHub repositories of content integration
58 | scripts we use to create some of the most popular Kolibri channels in the library:
59 | 
60 | * `Khan Academy chef <https://github.com/learningequality/sushi-chef-khan-academy>`__
61 | * `Open Stax chef <https://github.com/learningequality/sushi-chef-openstax>`__
62 | * `SHLS Toolkit chef <https://github.com/learningequality/sushi-chef-shls>`__
63 | 
64 | You can get a list of ALL the content integration scripts by searching for
65 | `sushi-chef <https://github.com/learningequality?q=sushi-chef&type=public&language=python>`__
66 | on GitHub.
67 | 


--------------------------------------------------------------------------------
/docs/examples/samplefiles/documents/doc_EN.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/examples/samplefiles/documents/doc_EN.pdf


--------------------------------------------------------------------------------
/docs/examples/samplefiles/documents/doc_ES.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/examples/samplefiles/documents/doc_ES.pdf


--------------------------------------------------------------------------------
/docs/examples/samplefiles/documents/doc_FR.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/examples/samplefiles/documents/doc_FR.pdf


--------------------------------------------------------------------------------
/docs/exercises.md:
--------------------------------------------------------------------------------
 1 | Exercises
 2 | =========
 3 | 
 4 | Exercises (assessment activities) are an important part of every learning experience.
 5 | Kolibri exercises are graded automatically and provide immediate feedback learners.
 6 | Student answers to be logged and enable progress reports for teachers and coaches.
 7 | Exercises can also be used as part of lessons and quizzes.
 8 | 
 9 | An `ExerciseNode`s are special kind of content node contains one or more questions.
10 | In order to set the criteria for completing exercises, you must set __exercise_data__
11 | to a dict containing a `mastery_model` field based on the mastery models provided
12 | in `le_utils.constants.exercises`.
13 | If no data is provided, `ricecooker` will default to mastery at 3 of 5 correct.
14 | For example:
15 | ```
16 | node = ExerciseNode(
17 |     exercise_data={
18 |         'mastery_model': exercises.M_OF_N,
19 |         'randomize': True,
20 |         'm': 3,
21 |         'n': 5,
22 |     },
23 |     ...
24 | )
25 | ```
26 | 
27 | 
28 | To add a question to an exercise node, you must first create a question model from
29 | `ricecooker.classes.questions`. Your sushi chef is responsible for determining
30 | which question type to create. Here are the available question types:
31 |   - __SingleSelectQuestion__: questions that only have one right answer (e.g. radio button questions)
32 |   - __MultipleSelectQuestion__: questions that have multiple correct answers (e.g. check all that apply)
33 |   - __InputQuestion__: questions that have text-based answers (e.g. fill in the blank)
34 |   - __PerseusQuestion__: special question type for pre-formatted perseus questions
35 | 
36 | 
37 | Each question class has the following attributes that can be set at initialization:
38 |   - __id__ (str): question's unique id
39 |   - __question__ (str): question body, in plaintext or Markdown format;
40 |     math expressions must be in Latex format, surrounded by `$`, e.g. `$f(x) = 2^3$`.
41 |   - __correct_answer__ (str) or __answers__ ([str]): the answer(s) to question as plaintext or Markdown
42 |   - __all_answers__ ([str]): list of choices for single select and multiple select questions as plaintext or Markdown
43 |   - __hints__ (str or [str]): optional hints on how to answer question, also in plaintext or Markdown
44 | 
45 | To set the correct answer(s) for MultipleSelectQuestions, you must provide a list
46 | of all of the possible choices as well as an array of the correct answers
47 | (`all_answers [str]`) and `correct_answers [str]` respectively).
48 | ```
49 | question = MultipleSelectQuestion(
50 |     question = "Select all prime numbers.",
51 |     correct_answers = ["2", "3", "5"],
52 |     all_answers = ["1", "2", "3", "4", "5"],
53 |     ...
54 | )
55 | ```
56 | 
57 | To set the correct answer(s) for SingleSelectQuestions, you must provide a list
58 | of all possible choices as well as the correct answer (`all_answers [str]` and
59 | `correct_answer str` respectively).
60 | 
61 | ```
62 | question = SingleSelectQuestion(
63 |     question = "What is 2 x 3?",
64 |     correct_answer = "6",
65 |     all_answers = ["2", "3", "5", "6"],
66 |     ...
67 | )
68 | ```
69 | 
70 | To set the correct answer(s) for InputQuestions, you must provide an array of
71 | all of the accepted answers (`answers [str]`).
72 | ```
73 | question = InputQuestion(
74 |     question = "Name a factor of 10.",
75 |     answers = ["1", "2", "5", "10"],
76 | )
77 | ```
78 | 
79 | To add images to a question's question, answers, or hints, format the image path
80 | with `'![](path/to/some/file.png)'` and `ricecooker` will parse them automatically.
81 | 
82 | 
83 | Once you have created the appropriate question object, add it to an exercise object
84 | with `exercise_node.add_question(question)`.
85 | 
86 | 
87 | Further reading
88 | ---------------
89 | 
90 |   - See also the section `Exercise Nodes <nodes.html#exercise-nodes>`__ on the nodes page.
91 | 


--------------------------------------------------------------------------------
/docs/figures/HandBrake/handbrake_steps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/HandBrake/handbrake_steps.png


--------------------------------------------------------------------------------
/docs/figures/HandBrake/handbreake_audio_settings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/HandBrake/handbreake_audio_settings.png


--------------------------------------------------------------------------------
/docs/figures/HandBrake/handbreake_resizing_settings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/HandBrake/handbreake_resizing_settings.png


--------------------------------------------------------------------------------
/docs/figures/HandBrake/handbreake_screenshot_video_settings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/HandBrake/handbreake_screenshot_video_settings.png


--------------------------------------------------------------------------------
/docs/figures/content_pipeline_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/content_pipeline_diagram.png


--------------------------------------------------------------------------------
/docs/figures/content_pipeline_diagram_with_highlight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/content_pipeline_diagram_with_highlight.png


--------------------------------------------------------------------------------
/docs/figures/kolibri_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/kolibri_logo.png


--------------------------------------------------------------------------------
/docs/figures/logo.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/logo.ico


--------------------------------------------------------------------------------
/docs/figures/ricecooker_domain.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/docs/figures/ricecooker_domain.png


--------------------------------------------------------------------------------
/docs/index_api_reference.rst:
--------------------------------------------------------------------------------
 1 | Ricecooker API reference
 2 | ========================
 3 | The detailed information for content developers (chef authors) is presented here:
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 2
 7 | 
 8 |    nodes
 9 |    files
10 |    htmlapps
11 |    exercises
12 |    languages
13 |    chefops
14 | 
15 | 
16 | Code examples
17 | -------------
18 | 
19 |   - See the `examples directory on GitHub <https://github.com/learningequality/ricecooker/tree/master/examples>`__
20 |     full code examples.
21 |   - See the `examples page <examples/index.html>`__ for literate code examples
22 |     that explain how to do specific tasks (find language codes, download subtitles,
23 |     and exercises questions, etc). These examples are available as runnable
24 |     Jupyter notebooks so you can try things out interactively and learn.
25 |   - See the `Cheffing techniques doc <https://docs.google.com/document/d/18Gwip2a1nzjeFT8PT6hQpVeu9DAhmolCRNbrohPSxPM/edit#>`__
26 |     which provides links to tips and code examples for handling various special cases and content sources.
27 | 


--------------------------------------------------------------------------------
/docs/index_utils.rst:
--------------------------------------------------------------------------------
 1 | Working with content
 2 | ================
 3 | Ricecooker includes a number of utility functions to help chef authors
 4 | with common content extraction and transformation tasks.
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 1
 8 | 
 9 | 
10 |    downloader
11 |    parsing_html
12 |    developer/kolibripreview
13 |    pdfutils
14 |    video_compression
15 |    csv_metadata/index
16 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | You can install `ricecooker` by running the command `pip install ricecooker`,
 5 | which will install the Python package and all its Python dependencies.
 6 | You'll need version 3.9 or higher of Python to use the `ricecooker` framework,
 7 | as well as some software for media file conversions (`ffmpeg` and  `poppler`).
 8 | 
 9 | In the next fifteen minutes or so, we'll setup your computer with all these things
10 | so you can get started writing your first content integration scripts.
11 | 
12 | 
13 | System prerequisites
14 | --------------------
15 | The first step will will be to make sure you have `python3` installed on your
16 | computer and two additional file conversion tools: `ffmpeg` for video compression,
17 | and the `poppler` library for manipulating PDFs.
18 | 
19 | Jump to the specific instructions for your operating system, and be sure to try
20 | the *Checklist* commands to know the installation was successful.
21 | 
22 | 
23 | ### Linux
24 | On a Debian or Ubuntu GNU/Linux, you can install the necessary packages using:
25 | 
26 |     sudo apt-get install  git python3 ffmpeg poppler-utils
27 | 
28 | You may need to adjust the package names for other Linux distributions (ContOS/Fedora/OpenSuSE).
29 | 
30 | *Checklist*: verify your python version is 3.9 or higher by running `python3 --version`.
31 | If no `python3` command exists, then try `python --version`.
32 | Run the commands `ffmpeg -h` and `pdftoppm -h` to make sure they are available.
33 | 
34 | 
35 | ### Mac
36 | Mac OS X users can install the necessary software using [Homebrew](https://brew.sh/):
37 | 
38 |     brew install  git python3 ffmpeg poppler
39 | 
40 | *Checklist*: verify you python version is 3.9 or higher by running `python3 --version`.
41 | Also run the commands `ffmpeg -h` and `pdftoppm -h` to make sure they are available.
42 | 
43 | 
44 | 
45 | ### Windows
46 | On windows the process is a little longer since we'll have to download and install
47 | several programs and make sure their `bin`-directories are added to the `Path` variable:
48 | 
49 | 1. Download Python from [https://www.python.org/downloads/windows/](https://www.python.org/downloads/windows/).
50 |    Look under the **Python 3.9.x** heading and choose the "Windows x86-64 executable installer"
51 |    option to download the latest installer and follow usual installation steps.
52 |    During the installation, make sure to check the box **"Add Python 3.9 to path"**.
53 |      - *Checklist*: after installation, open a new command prompt (`cmd.exe`) and
54 |        type in `python --version` and `pip --version` to make sure the commands are available.
55 | 2. Download `ffmpeg` from [https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip](https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip).
56 |    Extract the zip file to a permanent location where you store your code,
57 |    like `C:\Users\User\Tools` for example. Next, you must add the `bin` folder
58 |    that contains `ffmpeg` (e.g. `C:\Users\User\Tools\ffmpeg-4.1.4-win64-static\bin`)
59 |    to your user Path variable following [these instructions](https://www.computerhope.com/issues/ch000549.htm).
60 |      - *Checklist*: Open a new command prompt and type in `ffmpeg -h` and `ffprobe -h`
61 |        to verify the commands `ffmpeg` and `ffprobe` are available on your Path.
62 | 3. Download the file linked under "Latest binary" from [poppler-windows](http://blog.alivate.com.au/poppler-windows/).
63 |    You will need to download and install [7-zip](https://www.7-zip.org/) to "unzip"
64 |    the `.7z` archive. Extract the file to a some permanent location in your files.
65 |    Add the `bin` folder `poppler-0.xx.y\bin` to your Path variable.
66 |      - *Checklist*: after installation, open a command terminal and type in
67 |        `pdftoppm -h` to make sure the command `pdftoppm` is available.
68 | 
69 | We recommend you also download and install Git from [https://git-scm.com/downloads](https://git-scm.com/downloads).
70 | Using git is not a requirement for the getting started, but it's a great tool to
71 | have for borrowing code from others and sharing back your own code on the web.
72 | 
73 | If you find the text descriptions to be confusing, you can watch this
74 | [video walkthrough](http://youtube.com/watch?v=LxK8_BOSy-8) that shows the
75 | installation steps and also explains the adding-to-Path process.
76 | 
77 | 
78 | <iframe width="560" height="315" src="https://www.youtube.com/embed/LxK8_BOSy-8" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
79 | <div style="height:50px;">&nbsp;</div>
80 | 
81 | 
82 | 
83 | Installing Ricecooker
84 | ---------------------
85 | To install the `ricecooker` package, simply run this command in a command prompt:
86 | 
87 |     pip install ricecooker
88 | 
89 | You will see lots of lines scroll on the screen as `pip`, the package installer for Python,
90 | installs all the Python packages required to create content integration scripts.
91 | 
92 | **Reporting issues**: If you run into problems or encounter an error in any of the above steps,
93 | please let us know by [opening an issue on github](https://github.com/learningequality/ricecooker/issues).
94 | 
95 | ------
96 | 
97 | Okay so now we have all the system software and Python libraries installed.
98 | [Let's get started!](tutorial/gettingstarted.html)
99 | 


--------------------------------------------------------------------------------
/docs/languages.md:
--------------------------------------------------------------------------------
 1 | Kolibri Language Codes
 2 | ----------------------
 3 | 
 4 | The file [le_utils/constants/languages.py](https://github.com/learningequality/le-utils/blob/master/le_utils/constants/languages.py)
 5 | and the lookup table in [le_utils/resources/languagelookup.json](https://github.com/learningequality/le-utils/blob/master/le_utils/resources/languagelookup.json)
 6 | define the internal representation for languages codes used by Ricecooker, Kolibri,
 7 | and Kolibri Studio to identify content items in different languages.
 8 | 
 9 | The internal representation uses a mixture of two-letter codes (e.g. `en`),
10 | two-letter-and-country code (e.g. `pt-BR` for Brazilian Portuguese),
11 | and three-letter codes (e.g., `zul` for Zulu).
12 | 
13 | In order to make sure you have the correct language code when interfacing with
14 | the Kolibri ecosystem (e.g. when uploading new content to Kolibri Studio), you
15 | must lookup the language object using the helper method `getlang`:
16 | 
17 | ```
18 | >>> from le_utils.constants.languages import getlang
19 | >>> language_obj = getlang('en')       # lookup language using language code
20 | >>> language_obj
21 | Language(native_name='English', primary_code='en', subcode=None, name='English', ka_name=None)
22 | ```
23 | The function `getlang` will return `None` if the lookup fails. In such cases, you
24 | can try lookup by name or lookup by alpha2 code (ISO_639-1) methods defined below.
25 | 
26 | Once you've successfully looked up the language object, you can obtain the internal
27 | representation language code from the language object's `code` attribute:
28 | ```
29 | >>> language_obj.code
30 | 'en'
31 | ```
32 | The `ricecooker` API expects these internal representation language codes will be
33 | supplied for all `language` attributes (channel language, node language, and files language).
34 | 
35 | 
36 | 
37 | ### More lookup helper methods
38 | 
39 | The helper method `getlang_by_name` allows you to lookup a language by name:
40 | ```
41 | >>> from le_utils.constants.languages import getlang_by_name
42 | >>> language_obj = getlang_by_name('English')  # lookup language by name
43 | >>> language_obj
44 | Language(native_name='English', primary_code='en', subcode=None, name='English', ka_name=None)
45 | ```
46 | 
47 | The module `le_utils.constants.languages` defines two other language lookup methods:
48 |   - Use `getlang_by_native_name` for lookup up names by native language name,
49 |     e.g., you look for 'Français' to find French.
50 |  -  Use `getlang_by_alpha2` to perform lookups using the standard two-letter codes
51 |     defined in [ISO_639-1](https://en.wikipedia.org/wiki/ISO_639-1) that are
52 |     supported by the `pycountries` library.
53 | 


--------------------------------------------------------------------------------
/docs/parsing_html.md:
--------------------------------------------------------------------------------
 1 | Parsing HTML using BeautifulSoup
 2 | ================================
 3 | BeautifulSoup is an HTML parsing library that allows you to "select" various DOM
 4 | elements, and extract their attributes and text contents.
 5 | 
 6 | 
 7 | 
 8 | Video tutorial
 9 | --------------
10 | To get started, you can watch this [cheffing video tutorial](http://35.196.115.213/en/learn/#/topics/c/73470ad1a3015769ace455fbfdf17d48)
11 | that will show the basic steps of using `requests` and `BeautifulSoup` for crawling a website.
12 | See the [sushi-chef-shls code repo](https://github.com/learningequality/sushi-chef-shls/blob/master/sushichef.py#L226-L340)
13 | for the final version of the web crawling code that was used for this content source.
14 | 
15 | <a href="http://35.196.115.213/en/learn/#/topics/c/73470ad1a3015769ace455fbfdf17d48" target='_blank'>
16 | <iframe width="560" height="315" src="https://www.youtube.com/embed/yo-O3A8Jj38" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
17 | </a>
18 | <div style="height:50px;">&nbsp;</div>
19 | 
20 | 
21 | Scraping 101
22 | ------------
23 | The basic code to GET the HTML source of a webpage and parse it:
24 | 
25 | ```python
26 | import requests
27 | from bs4 import BeautifulSoup
28 | 
29 | url = 'https://somesite.edu'
30 | html = requests.get(url).content
31 | doc = BeautifulSoup(html, "html5lib")
32 | ```
33 | 
34 | You can now call `doc.find` and `doc.find_all` methods to select various DOM elements:
35 | 
36 | ```python
37 | special_ul = doc.find('ul', class_='some-special-class')
38 | section_lis = special_ul.find_all('li', recursive=False)  # search only immediate children
39 | for section_li in section_lis:
40 |     print('processing a section <li> right now...')
41 |     print(section_li.prettify())  # useful seeing HTML in when developing...
42 | ```
43 | 
44 | The most commonly used parts of the BeautifulSoup API are:
45 |   - `.find(tag_name,  <spec>)`: find the next occurrence of the tag `tag_name` that
46 |      has attributes specified in `<spec>` (given as a dictionary), or can use the
47 |      shortcut options `id` and `class_` (note extra underscore).
48 |   - `.find_all(tag_name, <spec>)`: same as above but returns a list of all matching
49 |      elements. Use the optional keyword argument `recursive=False` to select only
50 |      immediate child nodes (instead of including children of children, etc.).
51 |   - `.next_sibling`: find the next element (for badly formatted pages with no useful selectors)
52 |   - `.get_text()` extracts the text contents of the node. See also helper method
53 |     called `get_text` that performs additional cleanup of newlines and spaces.
54 |   - `.extract()`: to extract an element from the DOM tree
55 |   - `.decompose()`: useful to remove any unwanted DOM elements
56 |     (same as `.extract()` but throws away the extracted element)
57 | 
58 | 
59 | ### Example 1
60 | Here is some sample code for getting the text of the LE mission statement:
61 | 
62 | ```python
63 | from bs4 import BeautifulSoup
64 | from ricecooker.utils.downloader import read
65 | 
66 | url = 'https://learningequality.org/'
67 | html = read(url)
68 | doc = BeautifulSoup(html, 'html5lib')
69 | 
70 | main_div = doc.find('div', {'id': 'body-content'})
71 | mission_el = main_div.find('h3', class_='mission-state')
72 | mission = mission_el.get_text().strip()
73 | print(mission)
74 | ```
75 | 
76 | ### Example 2
77 | To print a list of all the links on the page, use the following code:
78 | ```python
79 | links = doc.find_all('a')
80 | for link in links:
81 |     print(link.get_text().strip(), '-->', link['href'])
82 | ```
83 | 
84 | 
85 | 
86 | Further reading
87 | ---------------
88 | For more info about BeautifulSoup, see [the docs](https://www.crummy.com/software/BeautifulSoup/bs4/doc/).
89 | 
90 | There are also some excellent tutorials online you can read:
91 |   - [http://akul.me/blog/2016/beautifulsoup-cheatsheet/](http://akul.me/blog/2016/beautifulsoup-cheatsheet/)
92 |   - [http://youkilljohnny.blogspot.com/2014/03/beautifulsoup-cheat-sheet-parse-html-by.html](http://youkilljohnny.blogspot.com/2014/03/beautifulsoup-cheat-sheet-parse-html-by.html)
93 |   - [http://www.compjour.org/warmups/govt-text-releases/intro-to-bs4-lxml-parsing-wh-press-briefings/](http://www.compjour.org/warmups/govt-text-releases/intro-to-bs4-lxml-parsing-wh-press-briefings/)
94 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==8.2.3
2 | sphinx_rtd_theme
3 | recommonmark
4 | nbsphinx
5 | ipython
6 | sphinx-autobuild
7 | sphinx-notfound-page
8 | 


--------------------------------------------------------------------------------
/docs/tutorial/index.rst:
--------------------------------------------------------------------------------
 1 | Getting started
 2 | ===============
 3 | 
 4 | The purpose of these pages is to help you learn how to use the ``ricecooker`` framework.
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 | 
 9 |    gettingstarted
10 |    tutorial
11 |    explanations
12 | 


--------------------------------------------------------------------------------
/docs/tutorial/quickstart.rst:
--------------------------------------------------------------------------------
 1 | :orphan:
 2 | 
 3 | .. Note this page exists for backward compatibility (since we sent this link)
 4 |    to partners, we don't want them to hit a 404. The materials from docs/tutorial
 5 |    now lives in the better named docs/examples.
 6 | 
 7 | 
 8 | Ricecooker quickstart
 9 | =====================
10 | 
11 | The following links will get you started with content integration process in no time!
12 | 
13 |  - `Install <../installation.html>`_  Python, the ``ricecooker`` package, and system prerequisites (5–20 mins)
14 |  - `Getting started <gettingstarted.html>`_: upload your first channel to Kolibri Studio and import it in Kolibri (10 mins)
15 |  - For more info see the `ricecooker docs main page <../index.html>`_ 📚.
16 | 
17 | Welcome to the team!
18 | 


--------------------------------------------------------------------------------
/docs/tutorial/tutorial.rst:
--------------------------------------------------------------------------------
  1 | Hands-on tutorial
  2 | =================
  3 | 
  4 | In this tutorial, you'll start with a basic content integration script (sushi chef)
  5 | and extend the code to construct a bigger channel based on your own content.
  6 | In the process you'll learn about all the features of the ``ricecooker`` framework.
  7 | 
  8 | 
  9 | Prerequisite steps
 10 | ------------------
 11 | The steps in this tutorial assume you have:
 12 | 
 13 | 1. Completed the `Installation <../installation.html>`__ steps
 14 | 2. Created an account on `Kolibri Studio <https://studio.learningequality.org/>`__
 15 |    and obtained your access token, which you'll need to to use instead of the text
 16 |    ``<your-access-token>`` in the examples below
 17 | 3. Successfully managed to run the basic chef example in the `Getting started <gettingstarted.html>`__ tutorial
 18 | 
 19 | 
 20 | Step 1: Setup your environment
 21 | ------------------------------
 22 | Create a directory called ``tutorial`` where you will run this code.
 23 | In general it is recommended to have separate directories for each content
 24 | integration script you will be working on.
 25 | In order to prepare for the upcoming **Step 6**, find a ``.pdf`` document,
 26 | a small ``.mp4`` video file, and an ``.mp3`` audio file.
 27 | Save these files somewhere inside the ``tutorial`` directory.
 28 | 
 29 | 
 30 | Step 2: Copy the sample code
 31 | ----------------------------
 32 | To begin, download the sample code from `here <https://github.com/learningequality/ricecooker/blob/master/examples/tutorial/sushichef.py>`__
 33 | and save it as the file `sushichef.py` in the tutorial directory.
 34 | 
 35 | Note all the ``TODO`` items in the code. These are the places left for you to edit.
 36 | 
 37 | 
 38 | Step 3: Edit the channel metadata
 39 | ---------------------------------
 40 | 1. Open your terminal and ``cd`` into the folder where ``sushichef.py`` is located.
 41 | 2. Open ``sushichef.py`` in a text editor.
 42 | 3. Change ``<yourdomain.org>`` to any domain. The source domain specifies who is supplying the content.
 43 | 4. Change ``<yourid>`` to any id. The source_id will distinguish your channel from other channels.
 44 | 5. Change ``The Tutorial Channel`` to any channel name.
 45 | 
 46 | Try running the sushi chef by entering the following command in your terminal::
 47 | 
 48 |     python sushichef.py  --token=<your-access-token>
 49 | 
 50 | Click the link to `Kolibri Studio <https://studio.learningequality.org/>`__ that
 51 | shows up in the final step and make sure your channel looks OK.
 52 | 
 53 | 
 54 | 
 55 | Step 4: Create a Topic
 56 | ----------------------
 57 | 1. Locate the first **TODO** in the ``sushichef.py`` file.
 58 |    Here, you will create your first topic.
 59 | 2. Copy/paste the example code and change ``exampletopic`` to ``mytopic``.
 60 | 3. Set the ``source_id`` to be something other than ``topic-1``
 61 |    (the ``source_id`` will distinguish your node from other nodes in the tree)
 62 | 4. Set the title.
 63 | 5. Go to the next **TODO** and add ``mytopic`` to channel (use example code as guide)
 64 | 
 65 | ::
 66 | 
 67 |     Check Run sushi chef from your terminal. Your channel should look like this:
 68 |     Channel
 69 |     | Example Topic
 70 |     | Your Topic
 71 | 
 72 | 
 73 | 
 74 | 
 75 | Step 5: Create a Subtopic
 76 | -------------------------
 77 | 1. Go to the next **TODO** in the ``sushichef.py`` file. Here, you will create a subtopic
 78 | 2. Copy/paste the example code and change ``examplesubtopic`` to ``mysubtopic``
 79 | 3. Set the ``source_id`` and ``title``
 80 | 4. Go to the next **TODO** and add ``mysubtopic`` to ``mytopic`` (use example code as guide)
 81 | 
 82 | ::
 83 | 
 84 |     Check Run the sushi chef from your terminal. Your channel should look like this:
 85 |     Channel
 86 |     | Example Topic
 87 |     |      | Example Subtopic
 88 |     | Your Topic
 89 |     |      | Your Subtopic
 90 | 
 91 | 
 92 | Step 6: Create Files
 93 | --------------------
 94 | 1. Go to the next **TODO** in the sushichef.py file. Here, you will create a pdf file
 95 | 2. Copy/paste the example code and change ``examplepdf`` to ``mypdf``.
 96 |    ``DocumentFile(...)`` will automatically download a pdf file from the given path.
 97 | 3. Set the ``source_id``, the ``title``, and the ``path`` (any url to a pdf file)
 98 | 4. Repeat steps 1-3 for video files and audio files.
 99 | 5. Finally, add your files to your channel (see last \*\* statements)
100 | 
101 | ::
102 | 
103 |     Check: Run the sushi chef from your terminal. Your channel should look like this:
104 |     Channel
105 |     | Example Topic
106 |     |      | Example Subtopic
107 |     |      |      | Example Audio
108 |     |      |  Example Video
109 |     | Your Topic
110 |     |      | Your Subtopic
111 |     |      |      | Your Audio
112 |     |      | Your Video
113 |     | Example PDF
114 |     | Your PDF
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | Next steps
122 | ----------
123 | You're now ready to start writing your own content integration scripts.
124 | The following links will guide you to the next steps:
125 | 
126 | - `Ricecooker API reference <../index_api_reference.html>`_
127 | - `Code examples <../examples/index.html>`_
128 | - `Learn about the ricecooker utilities and helpers <../index_utils.html>`_
129 | 


--------------------------------------------------------------------------------
/docs/usage.md:
--------------------------------------------------------------------------------
 1 | Using the ricecooker framework
 2 | ==============================
 3 | 
 4 | <!-- Note this page exists for backward compatibility (since we sent this link)
 5 |     to partners, we don't want them to hit a 404. -->
 6 | 
 7 | The following links will get you started with content integration process in no time!
 8 | 
 9 |  - [Installation](installation.html): info about install the `ricecooker` package
10 |    and system prerequisites (5–20 mins)
11 |  - [Getting started](tutorial/gettingstarted.html): upload your first channel to
12 |    Kolibri Studio and import it in Kolibri (10 mins)
13 |  - For more info see the [ricecooker docs main page](index.html) 📚.
14 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | Complete script examples
 2 | ========================
 3 | 
 4 | This directory contains examples of `ricecooker` content integration scripts (sushi chefs).
 5 | 
 6 |   - [`gettingstarted`](./gettingstarted)/[`sushichef.py`](./gettingstarted/sushichef.py)
 7 |     is a basic "Hello, World!" example used in the [Getting started](https://ricecooker.readthedocs.io/en/latest/gettingstarted.html) guide.
 8 |   - `tutorial/sushichf.py` the code that goes with the sushi chef tutorial doc
 9 |     https://docs.google.com/document/d/1iiwce8B_AyJ2d6K8dYBl66n9zjz0zQ3G4gTrubdk9ws/edit
10 |   - `kitchensink/sushichef.py` is a comprehensive example that creates audio, video, and exercise nodes.
11 |   - `wikipedia/sushichef.py` an example that creates a channel from two Wikipedia categories
12 | 
13 | To run each of these, you you'll need to edit the `SOURCE_DOMAIN` and `SOURCE_ID`
14 | in each chef script and then call them on the command line:
15 | 
16 |     git clone https://github.com/learningequality/ricecooker.git
17 |     cd ricecooker/examples/examplename
18 |     # Follow the instructions in the README.md file...
19 |     # ...then run the sushichef script by calling:
20 |     python suschief.py --token=YOURSTUDIOTOKENHERE9139139f3a23232
21 | 
22 | 
23 | Further reading
24 | ---------------
25 |   - See the [examples](https://ricecooker.readthedocs.io/en/latest/examples/)
26 |     page in the ricecooker docs site for more code samples related to specific tasks.
27 |   - See also the [sample-channels](https://github.com/learningequality/sample-channels)
28 |     repository which contains even more examples that cover special cases and needs.
29 | 


--------------------------------------------------------------------------------
/examples/gettingstarted/sushichef.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from ricecooker.chefs import SushiChef
 3 | from ricecooker.classes.files import DocumentFile
 4 | from ricecooker.classes.licenses import get_license
 5 | from ricecooker.classes.nodes import DocumentNode
 6 | from ricecooker.classes.nodes import TopicNode
 7 | 
 8 | 
 9 | class SimpleChef(SushiChef):
10 |     channel_info = {
11 |         "CHANNEL_TITLE": "Potatoes info channel",
12 |         "CHANNEL_SOURCE_DOMAIN": "<yourdomain.org>",  # where content comes from
13 |         "CHANNEL_SOURCE_ID": "<unique id for the channel>",  # CHANGE ME!!!
14 |         "CHANNEL_LANGUAGE": "en",  # le_utils language code
15 |         "CHANNEL_THUMBNAIL": "https://upload.wikimedia.org/wikipedia/commons/b/b7/A_Grande_Batata.jpg",  # (optional)
16 |         "CHANNEL_DESCRIPTION": "What is this channel about?",  # (optional)
17 |     }
18 | 
19 |     def construct_channel(self, **kwargs):
20 |         channel = self.get_channel(**kwargs)
21 |         potato_topic = TopicNode(title="Potatoes!", source_id="<potatoes_id>")
22 |         channel.add_child(potato_topic)
23 |         document_node = DocumentNode(
24 |             title="Growing potatoes",
25 |             description="An article about growing potatoes on your rooftop.",
26 |             source_id="pubs/mafri-potatoe",
27 |             license=get_license("CC BY", copyright_holder="University of Alberta"),
28 |             language="en",
29 |             files=[
30 |                 DocumentFile(
31 |                     path="https://www.gov.mb.ca/inr/pdf/pubs/mafri-potatoe.pdf",
32 |                     language="en",
33 |                 )
34 |             ],
35 |         )
36 |         potato_topic.add_child(document_node)
37 |         return channel
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     """
42 |     Run this script on the command line using:
43 |         python sushichef.py  --token=YOURTOKENHERE9139139f3a23232
44 |     """
45 |     simple_chef = SimpleChef()
46 |     simple_chef.main()
47 | 


--------------------------------------------------------------------------------
/examples/oldexamples/README.md:
--------------------------------------------------------------------------------
 1 | Old examples
 2 | ============
 3 | 
 4 | The code examples and procedures of these examples are still accurate,
 5 | but we don't recommend as starting point for learning since more involved tests
 6 | and use advanced features like parsing json, compression, etc.
 7 | 
 8 | 
 9 | Need to fix URLs:
10 | 
11 | ```
12 | WARNING  	Video 6cafe8: http://www.youtube.com/watch?v=kpCJyQ2usJ4
13 |      Subtitle with langauge en is not available for http://www.youtube.com/watch?v=kpCJyQ2usJ4
14 | WARNING  	Audio aaaa4d: https://upload.wikimedia.org/wikipedia/commons/b/ba/Rice_grains_(IRRI)
15 |      404 Client Error: Not Found for url: https://upload.wikimedia.org/wikipedia/commons/b/ba/Rice_grains_(IRRI)
16 | WARNING  	Audio aaaa4d: https://ia801407.us.archive.org/21/items/ah_Rice/Rice.mp3
17 |      403 Client Error: Forbidden for url: https://archive.org/download/ah_Rice/Rice.mp3
18 | WARNING  	Exercise 6cafe3: http://www.publicdomainpictures.net/pictures/110000/nahled/bowl-of-rice.jpg
19 |      503 Server Error: Service Temporarily Unavailable for url: http://www.publicdomainpictures.net/pictures/110000/nahled/bowl-of-rice.jpg
20 | WARNING  	Question ddddd: ka-perseus-graphie.s3.amazonaws.com/907dec1b45fb177f0937fa521b7af03fb837f0bd
21 |      [Errno 2] No such file or directory: 'ka-perseus-graphie.s3.amazonaws.com/907dec1b45fb177f0937fa521b7af03fb837f0bd.svg'
22 | ```
23 | 


--------------------------------------------------------------------------------
/examples/oldexamples/content/0a0c0f1a1a40226d8d227a07dd143f8c08a4b8a5-data.json:
--------------------------------------------------------------------------------
1 | svgData0a0c0f1a1a40226d8d227a07dd143f8c08a4b8a5({"range":[[-0.5,10.5],[-1,1]],"labels":[{"content":"\\small{0}","coordinates":[0,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{1}","coordinates":[1,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{2}","coordinates":[2,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{3}","coordinates":[3,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{4}","coordinates":[4,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{5}","coordinates":[5,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{6}","coordinates":[6,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{7}","coordinates":[7,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{8}","coordinates":[8,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{9}","coordinates":[9,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{10}","coordinates":[10,-0.2],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\scriptsize{2}","coordinates":[6,0.2],"alignment":"above","typesetAsMath":true,"style":{}},{"content":"\\scriptsize{3}","coordinates":[4,0.2],"alignment":"above","typesetAsMath":true,"style":{}}]});


--------------------------------------------------------------------------------
/examples/oldexamples/content/0a0c0f1a1a40226d8d227a07dd143f8c08a4b8a5.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" version="1.1" width="440" height="80" viewBox="0 0 440 80"><defs/><path fill="none" stroke="#666666" d="M 20,40 L 420,40" style="stroke-width: 2px" stroke-width="2"/><path fill="none" stroke="#666666" d="M 20,48 L 20,32" style="stroke-width: 2px" stroke-width="2"/><path fill="none" stroke="#666666" d="M 60,48 L 60,32" style="stroke-width: 2px" stroke-width="2"/><path fill="none" stroke="#666666" d="M 100,48 L 100,32" style="stroke-width: 2px" stroke-width="2"/><path fill="none" stroke="#666666" d="M 140,48 L 140,32" style="stroke-width: 2px" stroke-width="2"/><path fill="none" stroke="#666666" d="M 180,48 L 180,32" style="stroke-width: 2px" stroke-width="2"/><path fill="none" stroke="#666666" d="M 220,48 L 220,32" style="stroke-width: 2px" stroke-width="2"/><path fill="none" stroke="#666666" d="M 260,48 L 260,32" style="stroke-width: 2px" stroke-width="2"/><path fill="none" stroke="#666666" d="M 300,48 L 300,32" style="stroke-width: 2px" stroke-width="2"/><path fill="none" stroke="#666666" d="M 340,48 L 340,32" style="stroke-width: 2px" stroke-width="2"/><path fill="none" stroke="#666666" d="M 380,48 L 380,32" style="stroke-width: 2px" stroke-width="2"/><path fill="none" stroke="#666666" d="M 420,48 L 420,32" style="stroke-width: 2px" stroke-width="2"/><ellipse cx="420" cy="40" rx="5" ry="5" fill="#6495ed" stroke="none" style="stroke-width: 2px" stroke-width="2"/><ellipse cx="260" cy="40" rx="5" ry="5" fill="#6495ed" stroke="none" style="stroke-width: 2px" stroke-width="2"/><ellipse cx="180" cy="40" rx="5" ry="5" fill="#6495ed" stroke="none" style="stroke-width: 2px" stroke-width="2"/><ellipse cx="60" cy="40" rx="5" ry="5" fill="#6495ed" stroke="none" style="stroke-width: 2px" stroke-width="2"/></svg>
2 | 


--------------------------------------------------------------------------------
/examples/oldexamples/content/captions.vtt:
--------------------------------------------------------------------------------
 1 | WEBVTT
 2 | Kind: captions
 3 | Language: sw
 4 | 
 5 | 00:00:00.042 --> 00:00:03.067
 6 | Kabla ya kuingia katika nyama ya algebra,
 7 | 
 8 | 00:00:03.067 --> 00:00:06.060
 9 | i alitaka kuwapa quote kutoka kwa mmoja wa akili mkuu katika historia ya binadamu,
10 | 
11 | 00:00:06.060 --> 00:00:11.726
12 | Galileo Galilei, kwa sababu nadhani quote hii encapsulates hatua ya kweli ya algebra
13 | 
14 | 00:00:11.726 --> 00:00:14.234
15 | na kwa kweli hisabati kwa ujumla.
16 | 
17 | 00:00:14.234 --> 00:00:19.133
18 | Akasema: "Falsafa, imeandikwa katika kitabu ile kuu ambayo milele uongo mbele ya macho yetu
19 | 
20 | 00:00:19.133 --> 00:00:21.444
21 | - I mean ulimwengu - lakini sisi hawezi kuelewa kama hatuwezi kwanza kujifunza lugha
22 | 
23 | 00:00:25.313 --> 00:00:27.980
24 | na kufahamu alama ambayo imeandikwa.
25 | 
26 | 00:00:27.980 --> 00:00:30.800
27 | Kitabu hii imeandikwa kwa lugha ya hisabati ...
28 | 
29 | 00:00:30.800 --> 00:00:35.933
30 | bila ambayo moja wanders bure kwa njia ya labyrinth giza. "
31 | 
32 | 00:00:35.933 --> 00:00:41.106
33 | Sana lakini kwa kiasi kikubwa na kina kirefu sana na kwa kweli hii ni hatua ya hisabati
34 | 


--------------------------------------------------------------------------------
/examples/oldexamples/content/htmltest.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/examples/oldexamples/content/htmltest.zip


--------------------------------------------------------------------------------
/examples/oldexamples/content/sushirolls.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/examples/oldexamples/content/sushirolls.pdf


--------------------------------------------------------------------------------
/examples/oldexamples/content/video.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/examples/oldexamples/content/video.mp4


--------------------------------------------------------------------------------
/examples/oldexamples/data/sample_perseus01.json:
--------------------------------------------------------------------------------
 1 | {
 2 |    "answerArea":{
 3 |       "chi2Table":false,
 4 |       "periodicTable":false,
 5 |       "tTable":false,
 6 |       "zTable":false,
 7 |       "calculator":false
 8 |    },
 9 |    "hints":[
10 |       {
11 |          "widgets":{
12 | 
13 |          },
14 |          "images":{
15 |          },
16 |          "content":"Hint #1",
17 |          "replace":false
18 |       },
19 |       {
20 |          "widgets":{
21 | 
22 |          },
23 |          "images":{
24 | 
25 |          },
26 |          "content":"Hint #2",
27 |          "replace":false
28 |       }
29 |    ],
30 |    "question":{
31 |       "widgets":{
32 |          "radio 1":{
33 |             "type":"radio",
34 |             "alignment":"default",
35 |             "graded":true,
36 |             "static":false,
37 |             "options":{
38 |                "deselectEnabled":false,
39 |                "multipleSelect":false,
40 |                "choices":[
41 |                   {
42 |                      "correct":true,
43 |                      "content":"Yes"
44 |                   },
45 |                   {
46 |                      "correct":false,
47 |                      "content":"No"
48 |                   },
49 |                   {
50 |                      "correct":false,
51 |                      "content":">"
52 |                   }
53 |                ],
54 |                "displayCount":null,
55 |                "hasNoneOfTheAbove":false,
56 |                "randomize":false,
57 |                "onePerLine":true
58 |             },
59 |             "version":{
60 |                "minor":0,
61 |                "major":1
62 |             }
63 |          }
64 |       },
65 |       "images":{
66 |       },
67 |       "content":"Do you like rice?\\\"\\n\\n![](web+graphie:ka-perseus-graphie.s3.amazonaws.com/907dec1b45fb177f0937fa521b7af03fb837f0bd)\\n\\n[[\\u2603 radio 1]]"
68 |    },
69 |    "itemDataVersion":{
70 |       "minor":1,
71 |       "major":0
72 |    }
73 | }


--------------------------------------------------------------------------------
/examples/oldexamples/data/sample_perseus02.json:
--------------------------------------------------------------------------------
 1 | {
 2 |    "hints":[
 3 |       {
 4 |          "replace":false,
 5 |          "content":"Numbers are equivalent when they are located at the same point on the number line.\n\nLet's see what fraction is at the same location as $\\\\tealD{\\\\dfrac48}$ on the number line.\n",
 6 |          "widgets":{
 7 | 
 8 |          },
 9 |          "images":{
10 |             "web+graphie:file:///C:/Users/Jordan/contentcuration-dump/ddb3feb4c8e3740ca4f10c2ebad70b5797f60ebd":{
11 |                "width":460,
12 |                "height":120
13 |             }
14 |          }
15 |       },
16 |       {
17 |          "replace":false,
18 |          "content":"![](web+graphie:file:///home/ralphie/Desktop/ka-sushi-chef-sw/build/a61/a61ac6f4038cb3e2c3bd6e69f6e75da10632a3d4\\n)\\n\\n $\\\\purpleC{\\\\dfrac24}$ is at the same location on the number line as  $\\\\tealD{\\\\dfrac48}$.\\n",
19 |          "widgets":{
20 | 
21 |          },
22 |          "images":{
23 | 
24 |          }
25 |       },
26 |       {
27 |          "replace":false,
28 |          "content":" $\\\\purpleC{\\\\dfrac24}$ is equivalent to $\\\\tealD{\\\\dfrac48}$.\\n\\n![]( web+graphie:file:///home/ralphie/Desktop/ka-sushi-chef-sw/build/e84/e84b6d5fa1410f002ef8f9446a999d4a09266edd)",
29 |          "widgets":{
30 | 
31 |          },
32 |          "images":{
33 |             "web+graphie:file:///home/ralphie/Desktop/ka-sushi-chef-sw/build/6a1/6a1bf04c8df3d217c846362e8902008d84d10ff4":{
34 |                "width":460,
35 |                "height":120
36 |             }
37 |          }
38 |       }
39 |    ],
40 |    "question":{
41 |       "content":"![](web+graphie:file:///home/ralphie/Desktop/ka-sushi-chef-sw/build/749/749d2d16db0cfc94e8685f3eb7302394448d8c8c)\\n\\n**Move the dot to a fraction equivalent to $\\\\tealD{\\\\dfrac48}$ on the number line.**\\n\\n\\n[[\\u2603 number-line 1]]\\n",
42 |       "widgets":{
43 |          "number-line 1":{
44 |             "type":"number-line",
45 |             "static":false,
46 |             "options":{
47 |                "initialX":null,
48 |                "labelRange":[
49 |                   null,
50 |                   null
51 |                ],
52 |                "divisionRange":[
53 |                   null,
54 |                   null
55 |                ],
56 |                "correctX":0.5,
57 |                "labelStyle":"non-reduced",
58 |                "labelTicks":true,
59 |                "snapDivisions":2,
60 |                "correctRel":"eq",
61 |                "static":false,
62 |                "numDivisions":null,
63 |                "range":[
64 |                   null,
65 |                   null
66 |                ],
67 |                "tickStep":0.25
68 |             },
69 |             "graded":true,
70 |             "version":{
71 |                "minor":0,
72 |                "major":0
73 |             },
74 |             "alignment":"default"
75 |          }
76 |       },
77 |       "images":{
78 | 
79 |       }
80 |    },
81 |    "itemDataVersion":{
82 |       "minor":1,
83 |       "major":0
84 |    },
85 |    "answerArea":{
86 |       "periodicTable":false,
87 |       "zTable":false,
88 |       "chi2Table":false,
89 |       "calculator":false,
90 |       "tTable":false
91 |    }
92 | }


--------------------------------------------------------------------------------
/examples/studiocontent/sushichef.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from ricecooker.chefs import SushiChef
  3 | from ricecooker.classes.files import AudioFile
  4 | from ricecooker.classes.files import DocumentFile
  5 | from ricecooker.classes.licenses import get_license
  6 | from ricecooker.classes.nodes import AudioNode
  7 | from ricecooker.classes.nodes import DocumentNode
  8 | from ricecooker.classes.nodes import StudioContentNode
  9 | from ricecooker.classes.nodes import TopicNode
 10 | 
 11 | """
 12 | This example shows how to use the StudioContentNode to create a channel that
 13 | curates content from another channel already on Studio into a new channel.
 14 | """
 15 | 
 16 | SOURCE_DOMAIN = "testdomain.org"  ## change me!
 17 | 
 18 | # global dict to retain state between the two chef runs
 19 | original_channel_data = {
 20 |     "channel_id": None,
 21 |     "doc_node_id": None,
 22 |     "audio_node_id": None,
 23 | }
 24 | 
 25 | 
 26 | class OriginalChannelChef(SushiChef):
 27 |     channel_info = {
 28 |         "CHANNEL_TITLE": "Original channel",
 29 |         "CHANNEL_SOURCE_DOMAIN": SOURCE_DOMAIN,
 30 |         "CHANNEL_SOURCE_ID": "originalchannel",
 31 |         "CHANNEL_LANGUAGE": "en",
 32 |     }
 33 | 
 34 |     def construct_channel(self, **kwargs):
 35 |         channel = self.get_channel(**kwargs)
 36 | 
 37 |         document_node = DocumentNode(
 38 |             title="Growing potatoes",
 39 |             description="An article about growing potatoes on your rooftop.",
 40 |             source_id="pubs/mafri-potatoe",
 41 |             license=get_license("CC BY", copyright_holder="University of Alberta"),
 42 |             files=[
 43 |                 DocumentFile(
 44 |                     path="https://www.gov.mb.ca/inr/pdf/pubs/mafri-potatoe.pdf",
 45 |                     language="en",
 46 |                 )
 47 |             ],
 48 |         )
 49 |         channel.add_child(document_node)
 50 | 
 51 |         audio_node = AudioNode(
 52 |             source_id="also-sprach",
 53 |             title="Also Sprach Zarathustra",
 54 |             author="Kevin MacLeod / Richard Strauss",
 55 |             description="Also Sprach Zarathustra, Op. 30, is a tone poem by Richard Strauss, composed in 1896.",
 56 |             license=get_license("CC BY", copyright_holder="Kevin MacLeod"),
 57 |             files=[
 58 |                 AudioFile(
 59 |                     "https://ia600702.us.archive.org/33/items/Classical_Sampler-9615/Kevin_MacLeod_-_Also_Sprach_Zarathustra.mp3"
 60 |                 )
 61 |             ],
 62 |         )
 63 |         channel.add_child(audio_node)
 64 | 
 65 |         return channel
 66 | 
 67 | 
 68 | class CuratedChannelChef(SushiChef):
 69 |     channel_info = {
 70 |         "CHANNEL_TITLE": "Curated channel",
 71 |         "CHANNEL_SOURCE_DOMAIN": SOURCE_DOMAIN,
 72 |         "CHANNEL_SOURCE_ID": "curatedchannel",
 73 |         "CHANNEL_LANGUAGE": "en",
 74 |     }
 75 | 
 76 |     def construct_channel(self, **kwargs):
 77 |         channel = self.get_channel(**kwargs)
 78 | 
 79 |         document_topic = TopicNode(
 80 |             title="Documents",
 81 |             source_id="documents",
 82 |         )
 83 |         channel.add_child(document_topic)
 84 |         remote_document = StudioContentNode(
 85 |             title="Glorious new title for the potato doc",
 86 |             source_channel_id=original_channel_data["channel_id"],
 87 |             source_node_id=original_channel_data["doc_node_id"],
 88 |         )
 89 |         document_topic.add_child(remote_document)
 90 | 
 91 |         audio_topic = TopicNode(
 92 |             title="Audio",
 93 |             source_id="audio",
 94 |         )
 95 |         channel.add_child(audio_topic)
 96 |         remote_audio = StudioContentNode(
 97 |             source_channel_id=original_channel_data["channel_id"],
 98 |             source_node_id=original_channel_data["audio_node_id"],
 99 |         )
100 |         audio_topic.add_child(remote_audio)
101 | 
102 |         return channel
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     """
107 |     Run this script on the command line using:
108 |         python sushichef.py --token=YOURTOKENHERE9139139f3a23232
109 |     """
110 |     original_chef = OriginalChannelChef()
111 |     original_chef.main()
112 |     original_channel = original_chef.construct_channel()
113 | 
114 |     original_channel_data["channel_id"] = original_channel.get_node_id().hex
115 |     original_channel_data["doc_node_id"] = (
116 |         original_channel.children[0].get_node_id().hex
117 |     )
118 |     original_channel_data["audio_node_id"] = (
119 |         original_channel.children[1].get_node_id().hex
120 |     )
121 | 
122 |     input(
123 |         "Please visit the URL above and deploy the channel, and wait for it to finish. Then press enter to continue..."
124 |     )
125 | 
126 |     curated_chef = CuratedChannelChef()
127 |     curated_chef.main()
128 | 


--------------------------------------------------------------------------------
/examples/wikipedia/README.md:
--------------------------------------------------------------------------------
 1 | Wikipedia example
 2 | =================
 3 | 
 4 | The content integration script `sushichef.py` scrapes several wikipedia pages,
 5 | packages their contents as standalone `HTMLZipFile`s and uploads them to Studio.
 6 | 
 7 | 
 8 | ## Running the script
 9 | 
10 |     ./sushichef.py   --token=YOURSTUDIOTOKENHERE9139139f3a23232
11 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | testpaths = tests/
3 | norecursedirs = docs examples resources
4 | env =
5 |     # cleaned up in conftest.py fixture
6 |     RICECOOKER_STORAGE=./.pytest_storage
7 |     RICECOOKER_FILECACHE=./.pytest_filecache
8 | 


--------------------------------------------------------------------------------
/resources/scripts/convertvideo.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | TITLE Video conversion and compression script
 3 | REM Video conversion and compression script               Learning Equality 2018
 4 | REM Usage:
 5 | REM    convertvideo.bat  inputfile.mpg  [outputfile.mp4]
 6 | REM
 7 | REM This script will perform the following conversion steps:
 8 | REM   - Apply CRF 32 compression (very aggressive; may need to adjust below)
 9 | REM   - Limit the audio track to 32k/sec
10 | REM   - Resize the video to max_height=480
11 | REM You can manually edit the command below to customize the oprations performed.
12 | setlocal
13 | 
14 | 
15 | REM 1. Check we have ffmpeg
16 | REM ############################################################################
17 | WHERE ffmpeg >nul 2>nul
18 | IF %ERRORLEVEL% NEQ 0 (
19 |     echo Error: ffmpeg not installed.
20 |     echo Please download zip from https://web.archive.org/web/20200918193047/https://ffmpeg.zeranoe.com/builds/
21 |     echo Then copy the files ffmpeg.exe and ffprobe.exe from bin/ folder to this folder.
22 |     exit /b 1
23 | )
24 | 
25 | 
26 | REM 2. Parse input filename
27 | REM ############################################################################
28 | IF NOT "%~1" == "" (
29 |     set "INFILE=%~1"
30 | ) else (
31 |     echo ERROR: Missing argument inputfile.mp4
32 |     echo Usage:   convertvideo.bat  inputfile.mp4  [outputfile.mp4]
33 |     exit /b 2
34 | )
35 | 
36 | REM 3. Prepare output filename
37 | REM ############################################################################
38 | IF NOT "%~2" == "" (
39 |     set "OUTFILE=%~2"
40 | ) else (
41 |     set "OUTFILE=%INFILE:~0,-4%-converted.mp4"
42 | )
43 | 
44 | 
45 | REM 4. Do conversion
46 | REM ############################################################################
47 | echo Calling ffmpeg to convert: %INFILE% --to--^> %OUTFILE%
48 | ffmpeg -i "%INFILE%" ^
49 |     -b:a 32k -ac 1 ^
50 |     -vf scale="'w=-2:h=trunc(min(ih,480)/2)*2'" ^
51 |     -crf 32 ^
52 |     -profile:v baseline -level 3.0 -preset slow -v error -strict -2 -stats ^
53 |     -y "%OUTFILE%"
54 | 
55 | 
56 | echo Conversion done.
57 | endlocal
58 | 


--------------------------------------------------------------------------------
/resources/scripts/convertvideo.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Video conversion and compression script                 Learning Equality 2018
 3 | # Usage:
 4 | #   ./convertvideo.sh  inputfile.mp4  [outputfile.mp4]
 5 | #
 6 | # This script will perform the following conversion steps:
 7 | #   - Apply CRF 32 compression (very aggressive; may need to adjust below)
 8 | #   - Limit the audio track to 32k/sec
 9 | #   - Resize the video to max_height=480
10 | # You can manually edit the command below to customize the oprations performed.
11 | set -e
12 | 
13 | 
14 | # 1. Check we have ffmpeg
15 | ################################################################################
16 | if [ ! -x "$(command -v ffmpeg)" ]
17 | then
18 |   echo "Error: ffmpeg not installed. Please download from https://www.ffmpeg.org/"
19 |   exit 1
20 | fi
21 | 
22 | # 2. Parse input filename
23 | ################################################################################
24 | if [ ! -z "$1" ]
25 | then
26 |     INFILE=$1;
27 | else
28 |     echo "ERROR: Missing argument <inputfile.mp4>"
29 |     echo "Usage:   ./convertvideo.sh  inputfile.mp4  [outputfile.mp4]"
30 |     exit 2
31 | fi
32 | 
33 | # 3. Prepare output filename
34 | ################################################################################
35 | DEFULTPREFIX="converted-"
36 | if [ ! -z "$2" ]
37 | then
38 |     OUTFILE=$2;
39 | else
40 |     filename=$(basename -- "$INFILE");
41 |     filename="${filename%.*}";
42 |     extension="${filename##*.}";
43 |     OUTFILE=$DEFULTPREFIX"$filename"".mp4";
44 | fi
45 | 
46 | 
47 | # 4. Do conversion
48 | ################################################################################
49 | echo "Calling ffmpeg to convert: $INFILE --> $OUTFILE"
50 | ffmpeg -i "$INFILE" \
51 |     -b:a 32k -ac 1 \
52 |     -vf scale="'w=-2:h=trunc(min(ih,480)/2)*2'" \
53 |     -crf 32 \
54 |     -profile:v baseline -level 3.0 -preset slow -v error -strict -2 -stats \
55 |     -y "$OUTFILE"
56 | 
57 | 
58 | echo "Conversion done."
59 | 


--------------------------------------------------------------------------------
/resources/templates/csv_channel/Channel.csv:
--------------------------------------------------------------------------------
1 | Title,Description,Domain,Source ID,Language,Thumbnail
2 | 


--------------------------------------------------------------------------------
/resources/templates/csv_channel/Content.csv:
--------------------------------------------------------------------------------
1 | Path *,Title *,Source ID,Description,Author,Language,License ID *,License Description,Copyright Holder,Thumbnail
2 | 


--------------------------------------------------------------------------------
/resources/templates/csv_channel/ExerciseQuestions.csv:
--------------------------------------------------------------------------------
1 | Source ID *,Question ID *,Question type *,Question *,Option A,Option B,Option C,Option D,Option E,Options F...,Correct Answer *,Correct Answer 2,Correct Answer 3,Hint 1,Hint 2,Hint 3,Hint 4,Hint 5,Hint 6+
2 | 


--------------------------------------------------------------------------------
/resources/templates/csv_channel/Exercises.csv:
--------------------------------------------------------------------------------
1 | Path *,Title *,Source ID *,Description,Author,Language,License ID *,License Description,Copyright Holder,Number Correct,Out of Total,Randomize,Thumbnail
2 | 


--------------------------------------------------------------------------------
/resources/templates/csv_channel/csvchef.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from ricecooker.chefs import LineCook
 3 | 
 4 | 
 5 | class CsvChef(LineCook):
 6 |     """
 7 |     Sushi chef for creating Kolibri Studio channels from local files and metdata
 8 |     provided in Channel.csv and Content.csv.
 9 |     """
10 | 
11 |     # no custom methods needed: the `LineCook` base class will do the cheffing.
12 |     # Run `python csvchef.py -h` to see all the supported command line options
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     chef = CsvChef()
17 |     chef.main()
18 | 


--------------------------------------------------------------------------------
/ricecooker/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | __author__ = "Learning Equality"
 4 | __email__ = "info@learningequality.org"
 5 | __version__ = "0.8.0"
 6 | 
 7 | 
 8 | import sys
 9 | 
10 | if sys.version_info < (3, 9, 0):
11 |     raise RuntimeError("Ricecooker only supports Python 3.9+")
12 | 


--------------------------------------------------------------------------------
/ricecooker/classes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/ricecooker/classes/__init__.py


--------------------------------------------------------------------------------
/ricecooker/exceptions.py:
--------------------------------------------------------------------------------
 1 | # Exceptions that might be raised during tree uploading process
 2 | 
 3 | 
 4 | class InvalidCommandException(Exception):
 5 |     """InvalidCommandException: raised when unrecognized command is entered"""
 6 | 
 7 |     def __init__(self, *args, **kwargs):
 8 |         Exception.__init__(self, *args, **kwargs)
 9 | 
10 | 
11 | class InvalidUsageException(Exception):
12 |     """InvalidUsageException: raised when command line syntax is invalid"""
13 | 
14 |     def __init__(self, *args, **kwargs):
15 |         Exception.__init__(self, *args, **kwargs)
16 | 
17 | 
18 | class InvalidFormatException(Exception):
19 |     """InvalidFormatException: raised when file format is unrecognized"""
20 | 
21 |     def __init__(self, *args, **kwargs):
22 |         Exception.__init__(self, *args, **kwargs)
23 | 
24 | 
25 | class FileNotFoundException(Exception):
26 |     """FileNotFoundException: raised when file path is not found"""
27 | 
28 |     def __init__(self, *args, **kwargs):
29 |         Exception.__init__(self, *args, **kwargs)
30 | 
31 | 
32 | class UnknownContentKindError(Exception):
33 |     """UnknownContentKindError: raised when content kind is unrecognized"""
34 | 
35 |     def __init__(self, *args, **kwargs):
36 |         Exception.__init__(self, *args, **kwargs)
37 | 
38 | 
39 | class UnknownQuestionTypeError(Exception):
40 |     """UnknownQuestionTypeError: raised when question type is unrecognized"""
41 | 
42 |     def __init__(self, *args, **kwargs):
43 |         Exception.__init__(self, *args, **kwargs)
44 | 
45 | 
46 | class UnknownFileTypeError(Exception):
47 |     """UnknownFileTypeError: raised when file type is unrecognized"""
48 | 
49 |     def __init__(self, *args, **kwargs):
50 |         Exception.__init__(self, *args, **kwargs)
51 | 
52 | 
53 | class UnknownLicenseError(Exception):
54 |     """UnknownLicenseError: raised when license is unrecognized"""
55 | 
56 |     def __init__(self, *args, **kwargs):
57 |         Exception.__init__(self, *args, **kwargs)
58 | 
59 | 
60 | class InvalidNodeException(Exception):
61 |     """InvalidNodeException: raised when node is improperly formatted"""
62 | 
63 |     def __init__(self, *args, **kwargs):
64 |         Exception.__init__(self, *args, **kwargs)
65 | 
66 | 
67 | class InvalidQuestionException(Exception):
68 |     """InvalidQuestionException: raised when question is improperly formatted"""
69 | 
70 |     def __init__(self, *args, **kwargs):
71 |         Exception.__init__(self, *args, **kwargs)
72 | 
73 | 
74 | def raise_for_invalid_channel(channel):
75 |     pass
76 | 


--------------------------------------------------------------------------------
/ricecooker/managers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/ricecooker/managers/__init__.py


--------------------------------------------------------------------------------
/ricecooker/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/ricecooker/utils/__init__.py


--------------------------------------------------------------------------------
/ricecooker/utils/audio.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import subprocess
 3 | from enum import Enum
 4 | 
 5 | LOGGER = logging.getLogger("AudioResource")
 6 | LOGGER.setLevel(logging.DEBUG)
 7 | 
 8 | 
 9 | class AudioCompressionError(Exception):
10 |     """
11 |     Custom error returned when `ffmpeg` compression exits with a non-zero status.
12 |     """
13 | 
14 | 
15 | AudioEncoding = Enum("AudioEncoding", ["CBR", "VBR"])
16 | 
17 | # Allowed Constant Bit Rate values for MP3 encoding.
18 | CBR_VALUES = {8, 16, 24, 32, 40, 48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320}
19 | # Allowed Variable Bit Rate values for MP3 encoding.
20 | VBR_VALUES = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
21 | 
22 | 
23 | def compress_audio(
24 |     source_file_path,
25 |     target_file,
26 |     overwrite=False,
27 |     encoding=AudioEncoding.CBR,
28 |     bit_rate=96,
29 |     vbr=7,
30 | ):
31 |     """
32 |     Compress audio at `source_file_path` using setting provided:
33 |       - encoding: Use Constant or Variable Bit Rate encoding (default CBR)
34 |       - bit_rate (int): CBR bit_rate
35 |       - vbr (int): lame setting for VBR
36 |     Save compressed output audio to `target_file`.
37 |     """
38 | 
39 |     if not isinstance(encoding, AudioEncoding):
40 |         raise TypeError("encoding value must be {} enum value".format(AudioEncoding))
41 | 
42 |     if not isinstance(bit_rate, int):
43 |         raise TypeError("bit_rate must be an integer")
44 | 
45 |     if bit_rate not in CBR_VALUES:
46 |         raise ValueError("bit_rate must be one of {}".format(CBR_VALUES))
47 | 
48 |     if not isinstance(vbr, int):
49 |         raise TypeError("vbr must be an integer")
50 | 
51 |     if vbr not in VBR_VALUES:
52 |         raise ValueError("vbr must be one of {}".format(VBR_VALUES))
53 | 
54 |     if encoding is AudioEncoding.CBR:
55 |         option_name = "-b:a"
56 |         value = bit_rate
57 |     else:
58 |         option_name = "-qscale:a"
59 |         value = vbr
60 | 
61 |     # run command
62 |     command = [
63 |         "ffmpeg",
64 |         "-y" if overwrite else "-n",
65 |         "-i",
66 |         source_file_path,
67 |         "-codec:a",
68 |         "libmp3lame",
69 |         option_name,
70 |         str(value),
71 |         target_file,
72 |     ]
73 |     try:
74 |         subprocess.check_output(command, stderr=subprocess.STDOUT)
75 |     except subprocess.CalledProcessError as e:
76 |         raise AudioCompressionError("{}: {}".format(e, e.output))
77 |     except (BrokenPipeError, IOError) as e:
78 |         raise AudioCompressionError("{}".format(e))
79 | 


--------------------------------------------------------------------------------
/ricecooker/utils/browser.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import posixpath
 3 | import urllib
 4 | import webbrowser
 5 | from http.server import HTTPServer
 6 | from http.server import SimpleHTTPRequestHandler
 7 | 
 8 | 
 9 | def preview_in_browser(directory, filename="index.html", port=8282):
10 |     class RequestHandler(SimpleHTTPRequestHandler):
11 |         def translate_path(self, path):
12 |             # abandon query parameters
13 |             path = path.split("?", 1)[0]
14 |             path = path.split("#", 1)[0]
15 |             path = posixpath.normpath(urllib.parse.unquote(path))
16 |             words = path.split("/")
17 |             words = filter(None, words)
18 |             path = directory
19 |             for word in words:
20 |                 drive, word = os.path.splitdrive(word)
21 |                 head, word = os.path.split(word)
22 |                 if word in (os.curdir, os.pardir):
23 |                     continue
24 |                 path = os.path.join(path, word)
25 |             return path
26 | 
27 |     httpd = HTTPServer(("127.0.0.1", port), RequestHandler)
28 | 
29 |     webbrowser.open("http://127.0.0.1:{}/{}".format(port, filename))
30 | 
31 |     httpd.serve_forever()
32 | 


--------------------------------------------------------------------------------
/ricecooker/utils/caching.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from datetime import datetime
  4 | from datetime import timedelta
  5 | 
  6 | from cachecontrol import CacheControlAdapter
  7 | from cachecontrol.caches.file_cache import FileCache
  8 | from cachecontrol.heuristics import BaseHeuristic
  9 | from cachecontrol.heuristics import datetime_to_header
 10 | from cachecontrol.heuristics import expire_after
 11 | 
 12 | from ricecooker import config
 13 | from ricecooker.utils.utils import get_hash
 14 | from ricecooker.utils.utils import is_valid_url
 15 | 
 16 | 
 17 | # Cache for filenames
 18 | FILECACHE = FileCache(config.FILECACHE_DIRECTORY, forever=True)
 19 | 
 20 | 
 21 | class NeverCache(BaseHeuristic):
 22 |     """
 23 |     Don't cache the response at all.
 24 |     """
 25 | 
 26 |     def update_headers(self, response):
 27 |         return {"cache-control": "no-cache"}
 28 | 
 29 | 
 30 | class CacheForeverHeuristic(BaseHeuristic):
 31 |     """
 32 |     Cache the response effectively forever.
 33 |     """
 34 | 
 35 |     def update_headers(self, response):
 36 |         headers = {}
 37 |         expires = expire_after(timedelta(weeks=10 * 52), date=datetime.now())
 38 |         headers["expires"] = datetime_to_header(expires)
 39 |         headers["cache-control"] = "public"
 40 | 
 41 |         return headers
 42 | 
 43 | 
 44 | class InvalidatingCacheControlAdapter(CacheControlAdapter):
 45 |     """
 46 |     Cache control adapter that deletes items from the cache as they're requested.
 47 |     Default heuristic is also set to a non-caching heuristic.
 48 |     """
 49 | 
 50 |     def __init__(self, heuristic=None, *args, **kw):
 51 |         if not heuristic:
 52 |             heuristic = NeverCache()
 53 |         super(InvalidatingCacheControlAdapter, self).__init__(
 54 |             *args, heuristic=heuristic, **kw
 55 |         )
 56 | 
 57 |     def send(self, request, **kw):
 58 | 
 59 |         # delete any existing cached value from the cache
 60 |         try:
 61 |             cache_url = self.controller.cache_url(request.url)
 62 |             self.cache.delete(cache_url)
 63 |         except FileNotFoundError:
 64 |             pass
 65 | 
 66 |         resp = super(InvalidatingCacheControlAdapter, self).send(request, **kw)
 67 | 
 68 |         return resp
 69 | 
 70 | 
 71 | def generate_key(action, path_or_id, settings=None, default=" (default)"):
 72 |     """generate_key: generate key used for caching
 73 |     Args:
 74 |         action (str): how video is being processed (e.g. COMPRESSED or DOWNLOADED)
 75 |         path_or_id (str): path to video or youtube_id
 76 |         settings (dict): settings for compression or downloading passed in by user
 77 |         default (str): if settings are None, default to this extension (avoid overwriting keys)
 78 |     Returns: filename
 79 |     """
 80 |     if settings and "postprocessors" in settings:
 81 |         # get determinisic dict serialization for nested dicts under Python 3.5
 82 |         settings_str = json.dumps(settings, sort_keys=True)
 83 |     else:
 84 |         # keep using old strategy to avoid invalidating all chef caches
 85 |         settings_str = (
 86 |             "{}".format(str(sorted(settings.items()))) if settings else default
 87 |         )
 88 |     return "{}: {} {}".format(action.upper(), path_or_id, settings_str)
 89 | 
 90 | 
 91 | def set_cache_data(key, file_metadata):
 92 |     if not key:
 93 |         return None
 94 |     FILECACHE.set(key, bytes(json.dumps(file_metadata), "utf-8"))
 95 | 
 96 | 
 97 | def get_cache_data(key):
 98 |     if not key:
 99 |         return None
100 |     file_metadata = FILECACHE.get(key)
101 | 
102 |     if not file_metadata:
103 |         return None
104 |     file_metadata = file_metadata.decode("utf-8")
105 | 
106 |     try:
107 |         file_metadata = json.loads(file_metadata)
108 |     except json.JSONDecodeError:
109 |         file_metadata = {
110 |             "filename": file_metadata,
111 |         }
112 |     if not os.path.exists(config.get_storage_path(file_metadata["filename"])):
113 |         return None
114 |     return file_metadata
115 | 
116 | 
117 | def get_cache_filename(key):
118 |     cache_file = get_cache_data(key)
119 |     if not cache_file:
120 |         return None
121 |     return cache_file["filename"]
122 | 
123 | 
124 | def cache_is_outdated(path, cache_file):
125 |     outdated = True
126 |     if not cache_file:
127 |         return True
128 | 
129 |     if is_valid_url(path):
130 |         # Downloading is expensive, so always use cache if we don't explicitly try to update.
131 |         outdated = False
132 |     else:
133 |         # check if the on disk file has changed
134 |         cache_hash = get_hash(path)
135 |         outdated = not cache_hash or not cache_file.startswith(cache_hash)
136 | 
137 |     return outdated
138 | 


--------------------------------------------------------------------------------
/ricecooker/utils/encodings.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import re
 3 | 
 4 | BASE64_REGEX_STR = r"data:image\/([A-Za-z]*);base64,((?:[A-Za-z0-9+\/]{4})*(?:[A-Za-z0-9+\/]{2}==|[A-Za-z0-9+\/]{3}=)*)"
 5 | BASE64_REGEX = re.compile(BASE64_REGEX_STR, flags=re.IGNORECASE)
 6 | 
 7 | 
 8 | def get_base64_encoding(text):
 9 |     """get_base64_encoding: Get the first base64 match or None
10 |     Args:
11 |         text (str): text to check for base64 encoding
12 |     Returns: First match in text
13 |     """
14 |     return BASE64_REGEX.search(text)
15 | 
16 | 
17 | def write_base64_to_file(encoding, fpath_out):
18 |     """write_base64_to_file: Convert base64 image to file
19 |     Args:
20 |         encoding (str): base64 encoded string
21 |         fpath_out (str): path to file to write
22 |     Returns: None
23 |     """
24 | 
25 |     encoding_match = get_base64_encoding(encoding)
26 | 
27 |     assert encoding_match, "Error writing to file: Invalid base64 encoding"
28 | 
29 |     with open(fpath_out, "wb") as target_file:
30 |         target_file.write(base64.decodebytes(encoding_match.group(2).encode("utf-8")))
31 | 
32 | 
33 | def encode_file_to_base64(fpath_in, prefix):
34 |     """encode_file_to_base64: gets base64 encoding of file
35 |     Args:
36 |         fpath_in (str): path to file to encode
37 |         prefix (str): file data for encoding (e.g. 'data:image/png;base64,')
38 |     Returns: base64 encoding of file
39 |     """
40 |     with open(fpath_in, "rb") as file_obj:
41 |         return prefix + base64.b64encode(file_obj.read()).decode("utf-8")
42 | 


--------------------------------------------------------------------------------
/ricecooker/utils/html_writer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import zipfile
  3 | 
  4 | from ricecooker.utils.downloader import read
  5 | 
  6 | 
  7 | class HTMLWriter:
  8 |     """
  9 |     Class for writing zipfiles
 10 |     """
 11 | 
 12 |     zf = None  # Zip file to write to
 13 |     write_to_path = None  # Where to write zip file
 14 | 
 15 |     def __init__(self, write_to_path, mode="w"):
 16 |         """Args: write_to_path: (str) where to write zip file"""
 17 |         self.map = {}  # Keeps track of content to write to csv
 18 |         self.write_to_path = write_to_path  # Where to write zip file
 19 |         self.mode = mode  # What mode to open zipfile in
 20 | 
 21 |     def __enter__(self):
 22 |         """Called when opening context (e.g. with HTMLWriter() as writer: )"""
 23 |         self.open()
 24 |         return self
 25 | 
 26 |     def __exit__(self, type, value, traceback):
 27 |         """Called when closing context"""
 28 |         self.close()
 29 | 
 30 |     def _write_to_zipfile(self, filename, content):
 31 |         if not self.contains(filename):
 32 |             info = zipfile.ZipInfo(filename, date_time=(2013, 3, 14, 1, 59, 26))
 33 |             info.comment = "HTML FILE".encode()
 34 |             info.compress_type = zipfile.ZIP_STORED
 35 |             info.create_system = 0
 36 |             self.zf.writestr(info, content)
 37 | 
 38 |     def _copy_to_zipfile(self, filepath, arcname=None):
 39 |         filename = arcname or filepath
 40 |         if not self.contains(filename):
 41 |             self.zf.write(filepath, arcname=arcname)
 42 | 
 43 |     """ USER-FACING METHODS """
 44 | 
 45 |     def open(self):
 46 |         """open: Opens zipfile to write to
 47 |         Args: None
 48 |         Returns: None
 49 |         """
 50 |         self.zf = zipfile.ZipFile(self.write_to_path, self.mode)
 51 | 
 52 |     def close(self):
 53 |         """close: Close zipfile when done
 54 |         Args: None
 55 |         Returns: None
 56 |         """
 57 |         index_present = self.contains("index.html")
 58 |         self.zf.close()  # Make sure zipfile closes no matter what
 59 |         if not index_present:
 60 |             raise ReferenceError(
 61 |                 "Invalid Zip at {}: missing index.html file (use write_index_contents method)".format(
 62 |                     self.write_to_path
 63 |                 )
 64 |             )
 65 | 
 66 |     def contains(self, filename):
 67 |         """contains: Checks if filename is in the zipfile
 68 |         Args: filename: (str) name of file to check
 69 |         Returns: boolean indicating whether or not filename is in the zip
 70 |         """
 71 |         return filename in self.zf.namelist()
 72 | 
 73 |     def write_contents(self, filename, contents, directory=None):
 74 |         """write_contents: Write contents to filename in zip
 75 |         Args:
 76 |             contents: (str) contents of file
 77 |             filename: (str) name of file in zip
 78 |             directory: (str) directory in zipfile to write file to (optional)
 79 |         Returns: path to file in zip
 80 |         """
 81 |         filepath = (
 82 |             "{}/{}".format(directory.rstrip("/"), filename) if directory else filename
 83 |         )
 84 |         self._write_to_zipfile(filepath, contents)
 85 |         return filepath
 86 | 
 87 |     def write_file(self, filepath, filename=None, directory=None):
 88 |         """write_file: Write local file to zip
 89 |         Args:
 90 |             filepath: (str) location to local file
 91 |             directory: (str) directory in zipfile to write file to (optional)
 92 |         Returns: path to file in zip
 93 | 
 94 |         Note: filepath must be a relative path
 95 |         """
 96 |         arcname = None
 97 |         if filename or directory:
 98 |             directory = directory.rstrip("/") + "/" if directory else ""
 99 |             filename = filename or os.path.basename(filepath)
100 |             arcname = "{}{}".format(directory, filename)
101 |         self._copy_to_zipfile(filepath, arcname=arcname)
102 |         return arcname or filepath
103 | 
104 |     def write_url(self, url, filename, directory=None):
105 |         """write_url: Write contents from url to filename in zip
106 |         Args:
107 |             url: (str) url to file to download
108 |             filename: (str) name of file in zip
109 |             directory: (str) directory in zipfile to write file to (optional)
110 |         Returns: path to file in zip
111 |         """
112 |         filepath = (
113 |             "{}/{}".format(directory.rstrip("/"), filename) if directory else filename
114 |         )
115 |         if not self.contains(filepath):
116 |             self._write_to_zipfile(filepath, read(url))
117 |         return filepath
118 | 
119 |     def write_index_contents(self, contents):
120 |         """write_index_contents: Write main index file to zip
121 |         Args:
122 |             contents: (str) contents of file
123 |         Returns: path to file in zip
124 |         """
125 |         self._write_to_zipfile("index.html", contents)
126 | 


--------------------------------------------------------------------------------
/ricecooker/utils/kolibripreview.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse
 3 | import os
 4 | import shutil
 5 | import sys
 6 | 
 7 | 
 8 | def validate(srcdir):
 9 |     """
10 |     Check if `srcdir` has an index.html in it.
11 |     """
12 |     indexpath = os.path.join(srcdir, "index.html")
13 |     if not os.path.exists(indexpath):
14 |         print("Missing index.html file in", srcdir)
15 |         return False
16 |     return True
17 | 
18 | 
19 | def main(args):
20 |     """
21 |     Command line utility for previewing HTML5App content in Kolbri.
22 |     """
23 |     if not os.path.exists(args.srcdir) or not os.path.isdir(args.srcdir):
24 |         print("Error:", args.srcdir, "is not a directory.")
25 |         sys.exit(1)
26 |     if not validate(args.srcdir):
27 |         print("Validation failed; exiting.")
28 |         sys.exit(2)
29 |     # Write the contents of `srcdir` to `destzip`
30 |     destzipbase, _ = os.path.splitext(args.destzip)
31 |     shutil.make_archive(destzipbase, "zip", args.srcdir)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     parser = argparse.ArgumentParser(description=main.__doc__)
36 |     parser.add_argument(
37 |         "--srcdir", help="HTML5 webroot (source directory)", default="."
38 |     )
39 |     parser.add_argument(
40 |         "--destzip",
41 |         help="Path to a HTML5 zip file in local Kolibri installation",
42 |         required=True,
43 |     )
44 |     args = parser.parse_args()
45 |     main(args)
46 | 


--------------------------------------------------------------------------------
/ricecooker/utils/paths.py:
--------------------------------------------------------------------------------
 1 | import ntpath
 2 | import os
 3 | from pathlib import Path
 4 | 
 5 | 
 6 | def dir_exists(filepath):
 7 |     file_ = Path(filepath)
 8 |     return file_.is_dir()
 9 | 
10 | 
11 | def file_exists(filepath):
12 |     my_file = Path(filepath)
13 |     return my_file.is_file()
14 | 
15 | 
16 | def get_name_from_url(url):
17 |     """
18 |     get the filename from a url
19 |     url = http://abc.com/xyz.txt
20 |     get_name_from_url(url) -> xyz.txt
21 |     """
22 | 
23 |     head, tail = ntpath.split(url)
24 |     params_index = tail.find("&")
25 |     if params_index != -1:
26 |         tail = tail[:params_index]
27 |     params_index = tail.find("?")
28 |     if params_index != -1:
29 |         tail = tail[:params_index]
30 | 
31 |     basename = ntpath.basename(url)
32 |     params_b_index = basename.find("&")
33 |     if params_b_index != -1:
34 |         basename = basename[:params_b_index]
35 |     return tail or basename
36 | 
37 | 
38 | def get_name_from_url_no_ext(url):
39 |     """
40 |     get the filename without the extension name from a url
41 |     url = http://abc.com/xyz.txt
42 |     get_name_from_url(url) -> xyz
43 |     """
44 |     path = get_name_from_url(url)
45 |     return os.path.splitext(path)[0]
46 | 
47 | 
48 | def build_path(levels):
49 |     """
50 |     make a linear directory structure from a list of path levels names
51 |     levels = ["chefdir", "trees", "test"]
52 |     builds ./chefdir/trees/test/
53 |     """
54 |     path = os.path.join(*levels)
55 |     if not dir_exists(path):
56 |         os.makedirs(path)
57 |     return path
58 | 


--------------------------------------------------------------------------------
/ricecooker/utils/pipeline/__init__.py:
--------------------------------------------------------------------------------
 1 | import mimetypes
 2 | import os
 3 | from copy import deepcopy
 4 | from typing import Dict
 5 | from typing import Optional
 6 | 
 7 | from .convert import ConversionStageHandler
 8 | from .extract_metadata import ExtractMetadataStageHandler
 9 | from .file_handler import CompositeHandler
10 | from .transfer import DownloadStageHandler
11 | from ricecooker.utils.pipeline.context import FileMetadata
12 | 
13 | 
14 | # Do this to prevent import of broken Windows filetype registry that makes guesstype not work.
15 | # https://www.thecodingforums.com/threads/mimetypes-guess_type-broken-in-windows-on-py2-7-and-python-3-x.952693/
16 | mimetypes.init([os.path.abspath(os.path.join(os.path.dirname(__file__), "mime.types"))])
17 | 
18 | 
19 | class FilePipeline(CompositeHandler):
20 |     """
21 |     A class to manage a sequence of handlers and execute them in order.
22 |     Each handler should be a subclass of Handler.
23 |     The pipeline object will store global context that will be passed to each handler,
24 |     but will be overridden by the context generated during the course of a file's processing.
25 | 
26 |     This pipeline can be customized by passing `children` as an argument to the constructor.
27 | 
28 |     For example to add a custom stage to the pipeline, you can do:
29 |     ```python
30 |     from ricecooker.utils.pipeline import FilePipeline
31 |     from ricecooker.utils.pipeline.custom_stage import CustomStageHandler
32 |     pipeline = FilePipeline(children=[CustomStageHandler()])
33 |     ```
34 | 
35 |     To just modify one of the existing stages, you can do:
36 |     ```python
37 |     from ricecooker.utils.pipeline import FilePipeline
38 |     from ricecooker.utils.pipeline.convert import ConversionStageHandler
39 |     from ricecooker.utils.pipeline.extract_metadata import ExtractMetadataStageHandler
40 |     from ricecooker.utils.pipeline.transfer import DownloadStageHandler
41 |     from ricecooker.utils.pipeline.transfer import DiskResourceHandler
42 | 
43 |     download_stage = DownloadStageHandler(children=[DiskResourceHandler()])
44 |     pipeline = FilePipeline(children=[download_stage, ConversionStageHandler(), ExtractMetadataStageHandler()])
45 |     ```
46 |     This will replace the default `DownloadStageHandler` with a new one that has a `DiskResourceHandler` as its only child.
47 |     """
48 | 
49 |     DEFAULT_CHILDREN = [
50 |         DownloadStageHandler,
51 |         ConversionStageHandler,
52 |         ExtractMetadataStageHandler,
53 |     ]
54 | 
55 |     def execute(
56 |         self,
57 |         path: str,
58 |         context: Optional[Dict] = None,
59 |         skip_cache: Optional[bool] = False,
60 |     ) -> list[FileMetadata]:
61 |         """
62 |         Execute the pipeline for a given file path.
63 |         """
64 |         context = context or {}
65 |         file_metadata_list = [FileMetadata(path=path)]
66 |         for handler in self._children:
67 |             updated_file_metadata_list = []
68 |             for file_metadata in file_metadata_list:
69 |                 if handler.should_handle(file_metadata.path):
70 |                     # Pass in any context from the previous handler
71 |                     scoped_context = deepcopy(context)
72 |                     scoped_context.update(file_metadata.to_dict())
73 |                     # Execute the handler and get the new list of metadata
74 |                     new_metadata_list = handler.execute(
75 |                         file_metadata.path,
76 |                         context=scoped_context,
77 |                         skip_cache=skip_cache,
78 |                     )
79 |                     for new_metadata in new_metadata_list:
80 |                         # For each new metadata in the returned list
81 |                         # make a unique copy of the existing metadata and
82 |                         # merge the new metadata into the existing metadata
83 |                         updated_file_metadata_list.append(
84 |                             file_metadata.merge(new_metadata)
85 |                         )
86 |                 else:
87 |                     # Otherwise, it's a noop
88 |                     updated_file_metadata_list.append(file_metadata)
89 |             file_metadata_list = updated_file_metadata_list
90 |         return file_metadata_list
91 | 


--------------------------------------------------------------------------------
/ricecooker/utils/pipeline/context.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import asdict
 2 | from dataclasses import dataclass
 3 | from typing import Optional
 4 | from typing import Type
 5 | 
 6 | 
 7 | class AutoDataClassMetaClass(type):
 8 |     def __new__(mcs, name: str, bases: tuple, namespace: dict) -> Type:
 9 |         cls = super().__new__(mcs, name, bases, namespace)
10 |         return dataclass(frozen=True)(cls)
11 | 
12 | 
13 | @dataclass
14 | class ContentNodeMetadata:
15 |     """
16 |     A dataclass for storing metadata about a content node.
17 |     """
18 | 
19 |     title: Optional[str] = None
20 |     description: Optional[str] = None
21 |     thumbnail: Optional[str] = None
22 |     license: Optional[str] = None
23 |     license_description: Optional[str] = None
24 |     author: Optional[str] = None
25 |     aggregator: Optional[str] = None
26 |     copyright_holder: Optional[str] = None
27 |     provider: Optional[str] = None
28 |     grade_levels: Optional[list[str]] = None
29 |     categories: Optional[list[str]] = None
30 |     resource_types: Optional[list[str]] = None
31 |     learning_activities: Optional[list[str]] = None
32 |     accessibility_labels: Optional[list[str]] = None
33 |     learner_needs: Optional[list[str]] = None
34 |     role: Optional[str] = None
35 |     source_id: Optional[str] = None
36 |     kind: Optional[str] = None
37 |     extra_fields: Optional[dict] = None
38 | 
39 | 
40 | def _recursive_update(target, source):
41 |     for k, v in source.items():
42 |         if k in target and isinstance(v, dict):
43 |             target[k] = _recursive_update(target[k], v)
44 |         else:
45 |             target[k] = v
46 |     return target
47 | 
48 | 
49 | @dataclass
50 | class FileMetadata:
51 |     filename: Optional[str] = None
52 |     path: Optional[str] = None
53 |     original_filename: Optional[str] = None
54 |     language: Optional[str] = None
55 |     duration: Optional[int] = None
56 |     license: Optional[str] = None
57 |     license_description: Optional[str] = None
58 |     preset: Optional[str] = None
59 |     content_node_metadata: Optional[ContentNodeMetadata] = None
60 | 
61 |     def to_dict(self):
62 |         return asdict(
63 |             self, dict_factory=lambda x: {k: v for k, v in x if v is not None}
64 |         )
65 | 
66 |     def merge(self, other):
67 |         """
68 |         Create a new FileMetadata object by the result of overwriting self
69 |         fields with other fields when defined.
70 |         """
71 |         new_dict = _recursive_update(self.to_dict(), other.to_dict())
72 |         return self.__class__(**new_dict)
73 | 
74 | 
75 | class ContextMetadata(metaclass=AutoDataClassMetaClass):
76 |     def to_dict(self):
77 |         return asdict(self)
78 | 


--------------------------------------------------------------------------------
/ricecooker/utils/pipeline/exceptions.py:
--------------------------------------------------------------------------------
 1 | class NoOperationRequiredException(Exception):
 2 |     pass
 3 | 
 4 | 
 5 | class InvalidFileException(Exception):
 6 |     pass
 7 | 
 8 | 
 9 | class ExpectedFileException(Exception):
10 |     pass
11 | 


--------------------------------------------------------------------------------
/ricecooker/utils/pipeline/extract_metadata.py:
--------------------------------------------------------------------------------
 1 | from le_utils.constants import file_formats
 2 | from le_utils.constants import format_presets
 3 | 
 4 | from .file_handler import ExtensionMatchingHandler
 5 | from .file_handler import StageHandler
 6 | from ricecooker.utils.pipeline.context import ContentNodeMetadata
 7 | from ricecooker.utils.pipeline.context import FileMetadata
 8 | from ricecooker.utils.utils import extract_path_ext
 9 | from ricecooker.utils.videos import extract_duration_of_media
10 | from ricecooker.utils.videos import guess_video_preset_by_resolution
11 | 
12 | 
13 | PRESETS_FROM_EXTENSIONS = {
14 |     file_formats.MP3: format_presets.AUDIO,
15 |     file_formats.EPUB: format_presets.EPUB,
16 |     file_formats.PDF: format_presets.DOCUMENT,
17 |     file_formats.H5P: format_presets.H5P_ZIP,
18 |     file_formats.BLOOMPUB: format_presets.BLOOMPUB,
19 |     file_formats.BLOOMD: format_presets.BLOOMPUB,
20 |     file_formats.HTML5: format_presets.HTML5_ZIP,
21 | }
22 | 
23 | KIND_FROM_PRESET = {p.id: p.kind for p in format_presets.PRESETLIST}
24 | 
25 | 
26 | class MetadataExtractor(ExtensionMatchingHandler):
27 |     def infer_metadata(self, path):
28 |         return {}
29 | 
30 |     def infer_preset(self, path):
31 |         ext = extract_path_ext(path)
32 |         return PRESETS_FROM_EXTENSIONS.get(ext)
33 | 
34 |     def handle_file(self, path):
35 |         metadata = self.infer_metadata(path)
36 |         preset = self.infer_preset(path)
37 |         if preset:
38 |             metadata["preset"] = preset
39 |             kind = KIND_FROM_PRESET.get(preset)
40 |             if kind:
41 |                 metadata["content_node_metadata"] = metadata.get(
42 |                     "content_node_metadata", ContentNodeMetadata()
43 |                 )
44 |                 metadata["content_node_metadata"].kind = kind
45 |         return FileMetadata(**metadata)
46 | 
47 | 
48 | class MediaMetadataExtractorMixin:
49 |     def infer_metadata(self, path):
50 |         return {
51 |             "duration": extract_duration_of_media(path, extract_path_ext(path)),
52 |         }
53 | 
54 | 
55 | class AudioMetadataExtractor(MediaMetadataExtractorMixin, MetadataExtractor):
56 |     EXTENSIONS = {file_formats.MP3}
57 | 
58 | 
59 | class EPUBMetadataExtractor(MetadataExtractor):
60 |     EXTENSIONS = {file_formats.EPUB}
61 | 
62 | 
63 | class PDFMetadataExtractor(MetadataExtractor):
64 |     EXTENSIONS = {file_formats.PDF}
65 | 
66 | 
67 | class HTML5MetadataExtractor(MetadataExtractor):
68 |     EXTENSIONS = {file_formats.HTML5}
69 | 
70 | 
71 | class H5PMetadataExtractor(MetadataExtractor):
72 |     EXTENSIONS = {file_formats.H5P}
73 | 
74 | 
75 | class BloomPubMetadataExtractor(MetadataExtractor):
76 |     EXTENSIONS = {file_formats.BLOOMPUB, file_formats.BLOOMD}
77 | 
78 | 
79 | class VideoMetadataExtractor(MediaMetadataExtractorMixin, MetadataExtractor):
80 |     EXTENSIONS = {file_formats.MP4, file_formats.WEBM}
81 | 
82 |     def infer_preset(self, path):
83 |         return guess_video_preset_by_resolution(path)
84 | 
85 | 
86 | class ExtractMetadataStageHandler(StageHandler):
87 |     STAGE = "EXTRACT_METADATA"
88 |     DEFAULT_CHILDREN = [
89 |         AudioMetadataExtractor,
90 |         EPUBMetadataExtractor,
91 |         PDFMetadataExtractor,
92 |         H5PMetadataExtractor,
93 |         HTML5MetadataExtractor,
94 |         BloomPubMetadataExtractor,
95 |         VideoMetadataExtractor,
96 |     ]
97 | 


--------------------------------------------------------------------------------
/ricecooker/utils/tokens.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | try:  # to support Python 2.x.
 5 |     input = raw_input
 6 | except NameError:
 7 |     pass
 8 | 
 9 | 
10 | from ricecooker import config
11 | 
12 | 
13 | def get_env(envvar):
14 |     """
15 |     Reads an environment variable `envvar` if it is defined or returns None.
16 |     """
17 |     if envvar not in os.environ:
18 |         return None
19 |     else:
20 |         return os.environ[envvar]
21 | 
22 | 
23 | def get_content_curation_token(args_token):
24 |     """
25 |     Get the token through one of four possible ways. Input `args_token` can be
26 |     1. path to a token-containing file (path)
27 |     2. actual token (str) in which case there's nothing to get just pass along
28 |     3. `#` (default value when no --token is given on command line)
29 |     3a. if environment variable STUDIO_TOKEN exists, we'll use that
30 |     3b. else we prompt the user interactively
31 |     """
32 |     if args_token != "#":  # retrieval methods 1, 2
33 |         if os.path.isfile(args_token):
34 |             with open(args_token, "r") as fobj:
35 |                 return fobj.read().strip()
36 |         else:
37 |             return args_token
38 |     else:  # retrieval strategies 3
39 |         token = get_env("STUDIO_TOKEN") or get_env("CONTENT_CURATION_TOKEN")
40 |         if token is not None:
41 |             return token  # 3a
42 |         else:
43 |             return prompt_token(config.DOMAIN)  # 3b
44 | 
45 | 
46 | def prompt_token(domain):
47 |     """
48 |     Prompt user to enter content curation server authentication token.
49 |     Args: domain (str): domain to authenticate user
50 |     Returns: token
51 |     """
52 |     token = input("\nEnter content curation server token ('q' to quit): ").lower()
53 |     if token == "q":
54 |         sys.exit()
55 |     else:
56 |         return token.strip()
57 | 
58 | 
59 | # SUSHI_BAR_TOKEN = get_env('SUSHI_BAR_TOKEN')  # TODO in near future
60 | 


--------------------------------------------------------------------------------
/ricecooker/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import os
 3 | import re
 4 | import shutil
 5 | from urllib.parse import urlparse
 6 | 
 7 | from ricecooker import config
 8 | 
 9 | 
10 | VALID_UUID_REGEX = re.compile("^([a-f0-9]{32})$")
11 | 
12 | 
13 | def is_valid_uuid_string(uuid_str):
14 |     """
15 |     Check if a string is a valid UUID.
16 |     """
17 |     return isinstance(uuid_str, str) and VALID_UUID_REGEX.match(uuid_str)
18 | 
19 | 
20 | def make_dir_if_needed(path):
21 |     """
22 |     Check if the dir exists, and if not, create it. If the directory exists, just return it
23 |     rather than throwing an error.
24 | 
25 |     :param path: A string representing a directory on disk.
26 |     :return: A path to the directory that is guaranteed to exist.
27 |     """
28 | 
29 |     if not os.path.exists(path):
30 |         os.makedirs(path)
31 |     return path
32 | 
33 | 
34 | class VideoURLFormatError(Exception):
35 |     def __init__(self, url, expected_format):
36 |         self.message = (
37 |             "The video at {} does not appear to be a proper {} video URL.".format(
38 |                 url, expected_format
39 |             )
40 |         )
41 | 
42 | 
43 | def extract_path_ext(path, default_ext=None):
44 |     """
45 |     Extract file extension (without dot) from `path` or return `default_ext` if
46 |     path does not contain a valid extension.
47 |     """
48 |     path = urlparse(path).path
49 |     _, ext = os.path.splitext(path)
50 |     # Remove the leading "." from the extension
51 |     ext = ext[1:] if ext else ext
52 |     if not ext and default_ext:
53 |         ext = default_ext
54 |     if not ext:
55 |         raise ValueError("No extension in path {} and default_ext is None".format(path))
56 |     return ext.lower()
57 | 
58 | 
59 | def get_hash(filepath):
60 |     file_hash = hashlib.md5()
61 |     with open(filepath, "rb") as fobj:
62 |         for chunk in iter(lambda: fobj.read(2097152), b""):
63 |             file_hash.update(chunk)
64 |     return file_hash.hexdigest()
65 | 
66 | 
67 | def is_valid_url(path):
68 |     """
69 |     Return `True` if path is a valid URL, else `False` if path is a local path.
70 |     """
71 |     parts = urlparse(path)
72 |     return parts.scheme != "" and parts.netloc != ""
73 | 
74 | 
75 | def copy_file_to_storage(srcfilename, ext=None):
76 |     """
77 |     Copy `srcfilename` (filepath) to destination.
78 |     :rtype: None
79 |     """
80 |     if ext is None:
81 |         ext = extract_path_ext(srcfilename)
82 | 
83 |     hash = get_hash(srcfilename)
84 |     filename = "{}.{}".format(hash, ext)
85 |     try:
86 |         shutil.copy(srcfilename, config.get_storage_path(filename))
87 |     except shutil.SameFileError:
88 |         pass
89 | 
90 |     return filename
91 | 


--------------------------------------------------------------------------------
/ricecooker/utils/web.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module contains tools for parsing and handling HTML and other web content.
 3 | Note that we could not use html for the module name as recent versions of Python
 4 | include their own html module.
 5 | """
 6 | import os
 7 | 
 8 | from bs4 import BeautifulSoup
 9 | 
10 | 
11 | class HTMLParser:
12 |     """
13 |     HTMLParser contains a set of functions for parsing, scraping, and updating an HTML page.
14 |     """
15 | 
16 |     def __init__(self, filename=None, html=None):
17 |         self.filename = filename
18 |         self.html = html
19 |         self.link_tags = {
20 |             "a": "href",
21 |             "audio": "src",
22 |             "img": "src",
23 |             "link": "href",
24 |             "script": "src",
25 |         }
26 | 
27 |     def get_links(self):
28 |         """
29 |         Retrieves all links contained within the page.
30 | 
31 |         :return: A list of local and remote URLs in the page.
32 |         """
33 |         basename = None
34 |         if self.html is None:
35 |             basename = os.path.basename(self.filename)
36 |             self.html = open(self.filename).read()
37 |         soup = BeautifulSoup(self.html, "html.parser")
38 | 
39 |         extracted_links = []
40 |         for tag_name in self.link_tags:
41 |             tags = soup.find_all(tag_name)
42 |             for tag in tags:
43 |                 link = tag.get(self.link_tags[tag_name])
44 |                 # don't include links to ourselves or # links
45 |                 # TODO: Should this part be moved to get_local_files instead?
46 |                 if (
47 |                     link
48 |                     and (basename and not link.startswith(basename))
49 |                     and not link.strip().startswith("#")
50 |                 ):
51 |                     if "?" in link:
52 |                         link, query = link.split("?")
53 |                     if "#" in link:
54 |                         link, marker = link.split("#")
55 |                     extracted_links.append(link)
56 | 
57 |         return extracted_links
58 | 
59 |     def get_local_files(self):
60 |         """
61 |         Returns a list of files that are contained in the same directory as the HTML page or in its subdirectories.
62 | 
63 |         :return: A list of local files
64 |         """
65 |         links = self.get_links()
66 |         local_links = []
67 |         for link in links:
68 |             # NOTE: This technically fails to handle file:// URLs, but we're highly unlikely to see
69 |             # file:// URLs in any distributed package, so this is simpler than parsing out the protocol.
70 |             if "://" not in link:
71 |                 local_links.append(link)
72 | 
73 |         return local_links
74 | 
75 |     def replace_links(self, links_to_replace):
76 |         """
77 |         Updates page links using the passed in replacement dictionary.
78 | 
79 |         :param links_to_replace: A dictionary of OriginalURL -> ReplacementURL key value pairs.
80 |         :return: An HTML string of the page with all links replaced.
81 |         """
82 |         if self.html is None:
83 |             self.html = open(self.filename).read()
84 |         soup = BeautifulSoup(self.html, "html.parser")
85 | 
86 |         for tag_name in self.link_tags:
87 |             tags = soup.find_all(tag_name)
88 |             for tag in tags:
89 |                 link = tag.get(self.link_tags[tag_name])
90 |                 if link in links_to_replace:
91 |                     tag[self.link_tags[tag_name]] = links_to_replace[link]
92 | 
93 |         return soup.prettify()
94 | 


--------------------------------------------------------------------------------
/ricecooker/utils/zip.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | import zipfile
 4 | 
 5 | 
 6 | def _read_file(path):
 7 |     with open(path, "rb") as f:
 8 |         return f.read()
 9 | 
10 | 
11 | def create_predictable_zip(path, entrypoint=None, file_converter=None):
12 |     """
13 |     Create a zip file with predictable sort order and metadata so that MD5 will
14 |     stay consistent if zipping the same content twice.
15 |     Args:
16 |         path (str): absolute path either to a directory to zip up, or an existing zip file to convert.
17 |         entrypoint (str or None): if specified, a relative file path in the zip to serve as the first page to load
18 |     Returns: path (str) to the output zip file
19 |     """
20 |     extension = "zip"
21 |     # if path is a directory, recursively enumerate all the files under the directory
22 |     if os.path.isdir(path):
23 |         paths = []
24 | 
25 |         for root, directories, filenames in os.walk(path):
26 |             paths += [
27 |                 os.path.join(root, filename)[len(path) + 1 :] for filename in filenames
28 |             ]
29 | 
30 |         def reader(x):
31 |             return _read_file(os.path.join(path, x))
32 | 
33 |     # otherwise, if it's a zip file, open it up and pull out the list of names
34 |     elif os.path.isfile(path):
35 |         extension = os.path.splitext(path)[1]
36 |         inputzip = zipfile.ZipFile(path)
37 |         paths = inputzip.namelist()
38 | 
39 |         def reader(x):
40 |             return inputzip.read(x)
41 | 
42 |     # create a temporary zip file path to write the output into
43 |     zippathfd, zippath = tempfile.mkstemp(suffix=".{}".format(extension))
44 | 
45 |     with zipfile.ZipFile(zippath, "w", compression=zipfile.ZIP_DEFLATED) as outputzip:
46 |         # loop over the file paths in sorted order, to ensure a predictable zip
47 |         for filepath in sorted(paths):
48 |             write_file_to_zip_with_neutral_metadata(
49 |                 outputzip,
50 |                 filepath,
51 |                 file_converter(filepath, reader)
52 |                 if file_converter
53 |                 else reader(filepath),
54 |             )
55 |         os.fdopen(zippathfd).close()
56 |     return zippath
57 | 
58 | 
59 | def write_file_to_zip_with_neutral_metadata(zfile, filepath, content):
60 |     """
61 |     Write the string `content` to `filepath` in the open ZipFile `zfile`.
62 |     Args:
63 |         zfile (ZipFile): open ZipFile to write the content into
64 |         filepath (str): the file path within the zip file to write into
65 |         content (str): the content to write into the zip
66 |     Returns: None
67 |     """
68 |     # Convert any windows file separators to unix style for consistent
69 |     # file paths in the zip file
70 |     filepath = filepath.replace("\\", "/")
71 |     info = zipfile.ZipInfo(filepath, date_time=(2015, 10, 21, 7, 28, 0))
72 |     info.compress_type = zipfile.ZIP_DEFLATED
73 |     info.comment = "".encode()
74 |     info.create_system = 0
75 |     zfile.writestr(info, content)
76 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.1.0
 3 | commit = True
 4 | tag = True
 5 | 
 6 | [bumpversion:file:setup.py]
 7 | search = version='{current_version}'
 8 | replace = version='{new_version}'
 9 | 
10 | [bumpversion:file:ricecooker/__init__.py]
11 | search = __version__ = '{current_version}'
12 | replace = __version__ = '{new_version}'
13 | 
14 | [bdist_wheel]
15 | universal = 1
16 | 
17 | [flake8]
18 | exclude = docs, **/site-packages/**, examples
19 | ignore = E226,E203,E41,W503,E741
20 | max-line-length = 160
21 | max-complexity = 10
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from setuptools import find_packages
 4 | from setuptools import setup
 5 | 
 6 | import ricecooker
 7 | 
 8 | 
 9 | readme = open("README.md").read()
10 | 
11 | with open("docs/history.rst") as history_file:
12 |     history = history_file.read()
13 | 
14 | setup(
15 |     name="ricecooker",
16 |     version=ricecooker.__version__,
17 |     description="API for adding content to the Kolibri content curation server",
18 |     long_description=readme + "\n\n" + history,
19 |     long_description_content_type="text/markdown",
20 |     author="Learning Equality",
21 |     author_email="dev@learningequality.org",
22 |     url="https://github.com/learningequality/ricecooker",
23 |     packages=find_packages(),
24 |     package_dir={"ricecooker": "ricecooker"},
25 |     entry_points={
26 |         "console_scripts": [
27 |             "corrections = ricecooker.utils.corrections:correctionsmain",
28 |         ]
29 |     },
30 |     include_package_data=True,
31 |     install_requires=[
32 |         "requests>=2.11.1",
33 |         "le_utils>=0.2.10",
34 |         "requests_file",
35 |         "beautifulsoup4>=4.6.3,<4.9.0",  # pinned to match versions in le-pycaption
36 |         "selenium==4.31.0",
37 |         "yt-dlp>=2024.12.23",
38 |         "html5lib",
39 |         "cachecontrol==0.14.3",
40 |         "filelock==3.18.0",  # This is needed, but not specified as a dependency by cachecontrol
41 |         "css-html-js-minify==2.5.5",
42 |         "pypdf2==1.26.0",
43 |         "dictdiffer>=0.8.0",
44 |         "Pillow==11.2.1",
45 |         "colorlog>=4.1.0,<6.9",
46 |         "chardet==5.2.0",
47 |         "ffmpy>=0.2.2",
48 |         "pdf2image==1.17.0",
49 |         "le-pycaption>=2.2.0a1",
50 |         "EbookLib>=0.17.1",
51 |         "filetype>=1.1.0",
52 |         "urllib3==2.4.0",
53 |         "langcodes[data]==3.5.0",
54 |     ],
55 |     extras_require={
56 |         "test": [
57 |             "requests-cache==1.2.1",
58 |             "pytest==8.3.5",
59 |             "pytest-env==1.1.5",
60 |             "vcrpy==7.0.0; python_version >='3.10'",
61 |             "mock==5.2.0",
62 |         ],
63 |         "dev": [
64 |             "pre-commit>=4.1.0",
65 |         ],
66 |         "google_drive": ["google-api-python-client", "google-auth"],
67 |     },
68 |     python_requires=">=3.9, <3.13",
69 |     license="MIT license",
70 |     zip_safe=False,
71 |     keywords="ricecooker",
72 |     classifiers=[
73 |         "Intended Audience :: Developers",
74 |         "Development Status :: 5 - Production/Stable",
75 |         "License :: OSI Approved :: MIT License",
76 |         "Programming Language :: Python :: 3.9",
77 |         "Programming Language :: Python :: 3.10",
78 |         "Programming Language :: Python :: 3.11",
79 |         "Programming Language :: Python :: 3.12",
80 |         "Natural Language :: English",
81 |         "Topic :: Education",
82 |     ],
83 |     test_suite="tests",
84 | )
85 | 


--------------------------------------------------------------------------------
/tests/chefs/fake_chef.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/chefs/fake_chef.py


--------------------------------------------------------------------------------
/tests/media_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/__init__.py


--------------------------------------------------------------------------------
/tests/media_utils/files/Wilhelm_Scream.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/Wilhelm_Scream.mp3


--------------------------------------------------------------------------------
/tests/media_utils/files/assets/css/empty.css:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/assets/css/empty.css


--------------------------------------------------------------------------------
/tests/media_utils/files/assets/css/empty2.css:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/assets/css/empty2.css


--------------------------------------------------------------------------------
/tests/media_utils/files/assets/images/4933759886_098e9acf93_m.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/assets/images/4933759886_098e9acf93_m.jpg


--------------------------------------------------------------------------------
/tests/media_utils/files/assets/images/copyright.txt:
--------------------------------------------------------------------------------
1 | File: 4933759886_098e9acf93_m.jpg
2 | Source: https://flic.kr/p/8vYNVC
3 | License: CC BY 2.0
4 | 


--------------------------------------------------------------------------------
/tests/media_utils/files/assets/js/empty.js:
--------------------------------------------------------------------------------
1 | // regex to match folder called examples and any subfolders
2 | 


--------------------------------------------------------------------------------
/tests/media_utils/files/audio/file_example_MP3_700KB.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/audio/file_example_MP3_700KB.mp3


--------------------------------------------------------------------------------
/tests/media_utils/files/file_metadata.txt:
--------------------------------------------------------------------------------
1 | Wilhelm_Scream.mp3: public domain, retrieved from https://en.wikipedia.org/wiki/File:Wilhelm_Scream.ogg
2 | 


--------------------------------------------------------------------------------
/tests/media_utils/files/generate_thumbnail/sample.epub:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/generate_thumbnail/sample.epub


--------------------------------------------------------------------------------
/tests/media_utils/files/generate_thumbnail/sample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/generate_thumbnail/sample.pdf


--------------------------------------------------------------------------------
/tests/media_utils/files/generate_thumbnail/sample.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/generate_thumbnail/sample.zip


--------------------------------------------------------------------------------
/tests/media_utils/files/kepub.epub:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/kepub.epub


--------------------------------------------------------------------------------
/tests/media_utils/files/page_with_links.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <link rel="stylesheet" type="text/css" href="assets/css/empty.css" />
 6 |     <link rel="stylesheet" type="text/css" href=assets/css/empty2.css />
 7 |     <script src="assets/js/empty.js?v=2"></script>
 8 |     <title>A Link Between Webs</title>
 9 | </head>
10 | <body>
11 |     <header>
12 |         <a href="page_with_links.html">Home</a> | <a href="#jquery">jQuery Tutorial</a>
13 |     </header>
14 |     <img src="assets/images/4933759886_098e9acf93_m.jpg" />
15 |     <p><a href="the_spanish_inquisition.html#nooneexpects">And now for something completely different...</a></p>
16 |     <p><a href="http://www.learningequality.org">Learning not equal? We can help!</a></p>
17 |     <p>This is not a/link/to/index.html</p>
18 |     <p>http://shouldntbeextracted.com</p>
19 |     <audio src="Wilhelm_Scream.mp3"></audio>
20 |     <h1>jQuery for ubernerds, chapter 1.</h1>
21 |     <p>Importing jQuery via script tag:</p>
22 |         <blockquote>
23 |         <pre>
24 |             &lt;script src="jquery.js"&gt;&lt;/script&gt;
25 |         </pre>
26 | 
27 |         </blockquote>
28 | </body>
29 | </html>
30 | 


--------------------------------------------------------------------------------
/tests/media_utils/files/subtitles/empty.ttml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <tt xmlns="http://www.w3.org/2006/04/ttaf1" xmlns:tts="http://www.w3.org/2006/04/ttaf1#styling"
 3 |       xml:lang="en">
 4 |   <head>
 5 |     <styling>
 6 |       <style id="defaultSpeaker" tts:fontSize="12px" tts:fontFamily="SansSerif" tts:fontWeight="normal" tts:fontStyle="normal" tts:textDecoration="none" tts:color="white" tts:backgroundColor="black" tts:textAlign="left" />
 7 |       <style id="defaultCaption" tts:fontSize="12px" tts:fontFamily="SansSerif" tts:fontWeight="normal" tts:fontStyle="normal" tts:textDecoration="none" tts:color="white" tts:backgroundColor="black" tts:textAlign="left" />
 8 |     </styling>
 9 |   </head>
10 |   <body id="thebody" style="defaultCaption">
11 |     <div xml:lang="en"/>
12 |   </body>
13 | </tt>
14 | 


--------------------------------------------------------------------------------
/tests/media_utils/files/subtitles/encapsulated.sami:
--------------------------------------------------------------------------------
 1 | <SAMI><HEAD><TITLE>NOVA3213</TITLE><STYLE TYPE="text/css">
 2 | <!--
 3 | P { margin-left:  1pt;
 4 |     margin-right: 1pt;
 5 |     margin-bottom: 2pt;
 6 |     margin-top: 2pt;
 7 |     text-align: center;
 8 |     font-size: 10pt;
 9 |     font-family: Arial;
10 |     font-weight: normal;
11 |     font-style: normal;
12 |     color: #ffeedd; }
13 | .ENCC {Name: English; lang: en; SAMI_Type: CC;}
14 | --></STYLE></HEAD><BODY>
15 | <SYNC start="9209"><P class="ENCC">
16 |        ( clock ticking )
17 | </P></SYNC>
18 | <SYNC start="12312"><P class="ENCC">&nbsp;</P></SYNC>
19 | <SYNC start="14848"><P class="ENCC">
20 |     MAN:<br/>
21 |     When we think<br/>
22 |     \u266a ...say bow, wow, \u266a
23 | </P></SYNC>
24 | <SYNC start="17000"><P class="ENCC">
25 |   <SPAN Style="text-align:right;">we have this vision of Einstein</SPAN>
26 | </P></SYNC>
27 | <SYNC start="18752"><P class="ENCC">
28 |     <br/>
29 |     as an old, wrinkly man<br/>
30 |     with white hair.
31 | </P></SYNC>
32 | <SYNC start="20887"><P class="ENCC">
33 |     MAN 2:<br/>
34 |     E equals m c-squared is<br/>
35 |     not about an old Einstein.
36 | </P></SYNC>
37 | <SYNC start="26760"><P class="ENCC">
38 |     MAN 2:<br/>
39 |     It's all about an eternal Einstein.
40 | </P></SYNC>
41 | <SYNC start="32200"><P class="ENCC">
42 |     &lt;LAUGHING &amp; WHOOPS!&gt;
43 | </P></SYNC>
44 | </BODY></SAMI>
45 | 


--------------------------------------------------------------------------------
/tests/media_utils/files/subtitles/encapsulated.vtt:
--------------------------------------------------------------------------------
 1 | WEBVTT
 2 | 
 3 | 00:09.209 --> 00:12.312
 4 | ( clock ticking )
 5 | 
 6 | 00:14.848 --> 00:17.000
 7 | MAN:
 8 | When we think
 9 | \u266a ...say bow, wow, \u266a
10 | 
11 | 00:17.000 --> 00:18.752 align:right
12 | we have this vision of Einstein
13 | 
14 | 00:18.752 --> 00:20.887
15 | &nbsp;
16 | as an old, wrinkly man
17 | with white hair.
18 | 
19 | 00:20.887 --> 00:26.760
20 | MAN 2:
21 | E equals m c-squared is
22 | not about an old Einstein.
23 | 
24 | 00:26.760 --> 00:32.200
25 | MAN 2:
26 | It's all about an eternal Einstein.
27 | 
28 | 00:32.200 --> 00:36.200
29 | &lt;LAUGHING &amp; WHOOPS!>
30 | 


--------------------------------------------------------------------------------
/tests/media_utils/files/subtitles/not.txt:
--------------------------------------------------------------------------------
1 | This file doesn't contain subtitles nor isn't it a subtitle format.
2 | 


--------------------------------------------------------------------------------
/tests/media_utils/files/thumbnails/BRAlogo1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/thumbnails/BRAlogo1.png


--------------------------------------------------------------------------------
/tests/media_utils/files/thumbnails/toosquare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/thumbnails/toosquare.png


--------------------------------------------------------------------------------
/tests/media_utils/files/thumbnails/tootall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/thumbnails/tootall.png


--------------------------------------------------------------------------------
/tests/media_utils/files/thumbnails/toowide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/media_utils/files/thumbnails/toowide.png


--------------------------------------------------------------------------------
/tests/media_utils/test_audio.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import atexit
  4 | import os
  5 | import tempfile
  6 | 
  7 | import pytest
  8 | import requests_cache
  9 | from conftest import download_fixture_file
 10 | 
 11 | from ricecooker.utils import audio
 12 | from ricecooker.utils import videos
 13 | 
 14 | 
 15 | # cache, so we don't keep requesting the full audio
 16 | requests_cache.install_cache("audio_cache")
 17 | 
 18 | 
 19 | # FIXTURES
 20 | ################################################################################
 21 | 
 22 | 
 23 | @pytest.fixture
 24 | def audio_file():
 25 |     source_url = "https://archive.org/download/sound247/sound247.mp3"
 26 |     local_path = os.path.abspath(
 27 |         os.path.join(
 28 |             os.path.dirname(__file__),
 29 |             "..",
 30 |             "testcontent",
 31 |             "downloaded",
 32 |             "audio_media_test.mp3",
 33 |         )
 34 |     )
 35 |     download_fixture_file(source_url, local_path)
 36 |     assert os.path.exists(local_path)
 37 |     f = open(local_path, "rb")
 38 |     f.close()
 39 |     return f  # returns a closed file descriptor which we use for name attribute
 40 | 
 41 | 
 42 | @pytest.fixture
 43 | def bad_audio():
 44 |     with TempFile(suffix=".mp3") as f:
 45 |         f.write(b"noaudiohere. ffmpeg soshould error")
 46 |         f.flush()
 47 |     return f  # returns a temporary file with a closed file descriptor
 48 | 
 49 | 
 50 | # TESTS
 51 | ################################################################################
 52 | 
 53 | 
 54 | class Test_compress_video:
 55 |     def test_compression_works(self, audio_file):
 56 |         duration = videos.extract_duration_of_media(audio_file.name, "mp3")
 57 |         with TempFile(suffix=".mp3") as vout:
 58 |             audio.compress_audio(audio_file.name, vout.name, overwrite=True)
 59 |             compressed_duration = videos.extract_duration_of_media(vout.name, "mp3")
 60 |             assert duration == compressed_duration
 61 | 
 62 |     def test_raises_for_bad_file(self, bad_audio):
 63 |         with TempFile(suffix=".mp4") as vout:
 64 |             with pytest.raises(audio.AudioCompressionError):
 65 |                 audio.compress_audio(bad_audio.name, vout.name, overwrite=True)
 66 | 
 67 | 
 68 | # Helper class for cross-platform temporary files
 69 | ################################################################################
 70 | 
 71 | 
 72 | def remove_temp_file(*args, **kwargs):
 73 |     filename = args[0]
 74 |     try:
 75 |         os.remove(filename)
 76 |     except FileNotFoundError:
 77 |         pass
 78 |     assert not os.path.exists(filename)
 79 | 
 80 | 
 81 | class TempFile(object):
 82 |     """
 83 |     tempfile.NamedTemporaryFile deletes the file as soon as the filehandle is closed.
 84 |     This is OK on unix but on Windows the file can't be used by other commands
 85 |     (i.e. ffmpeg) unti the file is closed.
 86 |     Temporary files are instead deleted when we quit.
 87 |     """
 88 | 
 89 |     def __init__(self, *args, **kwargs):
 90 |         # all parameters will be passed to NamedTemporaryFile
 91 |         self.args = args
 92 |         self.kwargs = kwargs
 93 | 
 94 |     def __enter__(self):
 95 |         # create a temporary file as per usual, but set it up to be deleted once we're done
 96 |         self.f = tempfile.NamedTemporaryFile(*self.args, delete=False, **self.kwargs)
 97 |         atexit.register(remove_temp_file, self.f.name)
 98 |         return self.f
 99 | 
100 |     def __exit__(self, _type, value, traceback):
101 |         self.f.close()
102 | 


--------------------------------------------------------------------------------
/tests/media_utils/test_proxy.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | from ricecooker.utils import proxy
 6 | from ricecooker.utils.youtube import YouTubeResource
 7 | 
 8 | 
 9 | YOUTUBE_TEST_VIDEO = "https://www.youtube.com/watch?v=C0DPdy98e4c"
10 | YOUTUBE_TEST_PLAYLIST = "https://www.youtube.com/playlist?list=PL472BC6F4F2C3ABEF"
11 | 
12 | 
13 | # This test takes a few minutes, but is very useful for checking that the proxy is not being ignored,
14 | # so mark it to run when the PYTEST_RUN_SLOW env var is set.
15 | @pytest.mark.skipif(
16 |     "PYTEST_RUN_SLOW" not in os.environ,
17 |     reason="This test takes several minutes to complete.",
18 | )
19 | def test_bad_proxies_get_banned(tmp_path):
20 |     # create some fake proxies...
21 |     FAKE_PROXIES = [
22 |         "122.123.123.123:1234",
23 |         "142.123.1.234:123345",
24 |         "156.245.233.211:12323",
25 |         "11.22.33.44:123",
26 |     ]
27 |     # initialize PROXY_LIST to known-bad proxies to check that they get banned
28 |     proxy.PROXY_LIST = FAKE_PROXIES.copy()
29 | 
30 |     video = YouTubeResource(YOUTUBE_TEST_VIDEO)
31 |     video.download(tmp_path)
32 | 
33 |     # Fake proxies should get added to BROKEN_PROXIES
34 |     assert set(FAKE_PROXIES).issubset(set(proxy.BROKEN_PROXIES))
35 | 
36 | 
37 | @pytest.mark.skipif(
38 |     "PYTEST_RUN_SLOW" not in os.environ,
39 |     reason="This test can take several minutes to complete.",
40 | )
41 | def test_proxy_download(tmp_path):
42 |     proxy.get_proxies(refresh=True)
43 |     assert len(proxy.PROXY_LIST) > 1
44 | 
45 |     video = YouTubeResource(YOUTUBE_TEST_VIDEO)
46 |     video.download(tmp_path)
47 | 
48 |     temp_files = os.listdir(os.path.join(tmp_path, "Watch"))
49 |     has_video = False
50 |     for afile in temp_files:
51 |         if afile.endswith(".mp4"):
52 |             has_video = True
53 | 
54 |     assert has_video, "Video file not found"
55 | 
56 | 
57 | @pytest.mark.skipif(
58 |     "PYTEST_RUN_SLOW" not in os.environ,
59 |     reason="This test can take several minutes to complete.",
60 | )
61 | def test_proxy_playlist_download(tmp_path):
62 |     playlist = YouTubeResource(YOUTUBE_TEST_PLAYLIST)
63 |     playlist.download(tmp_path)
64 | 
65 |     temp_files = os.listdir(os.path.join(tmp_path, "Playlist"))
66 |     expected = [
67 |         "zbkizy-Y3qw.jpg",
68 |         "oXnzstpBEOg.mp4",
69 |         "oXnzstpBEOg.jpg",
70 |         "zbkizy-Y3qw.mp4",
71 |     ]
72 | 
73 |     assert set(temp_files) == set(expected)
74 | 


--------------------------------------------------------------------------------
/tests/media_utils/test_web.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from ricecooker.utils import web
 4 | 
 5 | test_dir = os.path.dirname(__file__)
 6 | 
 7 | 
 8 | def test_get_links():
 9 |     filename = os.path.abspath(os.path.join(test_dir, "files", "page_with_links.html"))
10 |     parser = web.HTMLParser(filename)
11 |     links = parser.get_links()
12 | 
13 |     expected_links = [
14 |         "assets/css/empty.css",
15 |         "assets/css/empty2.css",
16 |         "assets/js/empty.js",
17 |         "assets/images/4933759886_098e9acf93_m.jpg",
18 |         "the_spanish_inquisition.html",
19 |         "http://www.learningequality.org",
20 |         "Wilhelm_Scream.mp3",
21 |     ]
22 | 
23 |     # make sure the link order is the same to do an equality test
24 |     links.sort()
25 |     expected_links.sort()
26 | 
27 |     assert links == expected_links
28 | 
29 | 
30 | def test_get_local_files():
31 |     filename = os.path.abspath(os.path.join(test_dir, "files", "page_with_links.html"))
32 |     parser = web.HTMLParser(filename)
33 |     links = parser.get_local_files()
34 | 
35 |     expected_links = [
36 |         "assets/css/empty.css",
37 |         "assets/css/empty2.css",
38 |         "assets/js/empty.js",
39 |         "assets/images/4933759886_098e9acf93_m.jpg",
40 |         "the_spanish_inquisition.html",
41 |         "Wilhelm_Scream.mp3",
42 |     ]
43 | 
44 |     # make sure the link order is the same to do an equality test
45 |     links.sort()
46 |     expected_links.sort()
47 | 
48 |     assert links == expected_links
49 | 
50 | 
51 | def test_replace_links():
52 |     filename = os.path.abspath(os.path.join(test_dir, "files", "page_with_links.html"))
53 |     parser = web.HTMLParser(filename)
54 | 
55 |     original_links = [
56 |         "assets/css/empty.css",
57 |         "assets/css/empty2.css",
58 |         "assets/js/empty.js",
59 |         "assets/images/4933759886_098e9acf93_m.jpg",
60 |         "the_spanish_inquisition.html",
61 |         "Wilhelm_Scream.mp3",
62 |     ]
63 | 
64 |     replacement_links = {}
65 |     for link in original_links:
66 |         replacement_links[link] = "/zipcontent/012343545454645454/{}".format(link)
67 | 
68 |     new_html = parser.replace_links(replacement_links)
69 | 
70 |     new_parser = web.HTMLParser(html=new_html)
71 |     links = new_parser.get_links()
72 | 
73 |     for link in links:
74 |         assert link == replacement_links[link]
75 | 


--------------------------------------------------------------------------------
/tests/pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/pipeline/__init__.py


--------------------------------------------------------------------------------
/tests/test_argparse.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import pytest
  4 | from mock import patch
  5 | 
  6 | from ricecooker.chefs import SushiChef
  7 | from ricecooker.exceptions import InvalidUsageException
  8 | 
  9 | 
 10 | @pytest.fixture
 11 | def cli_args_and_expected():
 12 |     defaults = {
 13 |         "command": "uploadchannel",
 14 |         "update": False,
 15 |         "verbose": True,
 16 |         "debug": False,
 17 |         "warn": False,
 18 |         "quiet": False,
 19 |         "compress": False,
 20 |         "thumbnails": False,
 21 |         "download_attempts": 3,
 22 |         "resume": False,
 23 |         "step": "LAST",
 24 |         "prompt": False,
 25 |         "reset_deprecated": False,
 26 |         "stage": True,
 27 |         "stage_deprecated": False,
 28 |         "publish": False,
 29 |         "sample": None,
 30 |     }
 31 |     return [
 32 |         {  # this used to be the old recommended CLI args to run chefs
 33 |             "cli_input": "./sushichef.py -v --reset --token=letoken",
 34 |             "expected_args": dict(defaults, token="letoken", reset_deprecated=True),
 35 |             "expected_options": {},
 36 |         },
 37 |         {  # nowadays we've changed the CLI defaults so don't need to specify these
 38 |             "cli_input": "./sushichef.py --token=letoken",
 39 |             "expected_args": dict(defaults, token="letoken"),
 40 |             "expected_options": {},
 41 |         },
 42 |         {
 43 |             "cli_input": "./sushichef.py --token=letoken --resume --step=START_UPLOAD",
 44 |             "expected_args": dict(
 45 |                 defaults, token="letoken", resume=True, step="START_UPLOAD"
 46 |             ),
 47 |             "expected_options": {},
 48 |         },
 49 |         {
 50 |             "cli_input": "./sushichef.py --token=letoken lang=fr",
 51 |             "expected_args": dict(defaults, token="letoken"),
 52 |             "expected_options": dict(lang="fr"),
 53 |         },
 54 |         {
 55 |             "cli_input": "./sushichef.py --token=letoken somethin=else extrakey=extraval",
 56 |             "expected_args": dict(defaults, token="letoken"),
 57 |             "expected_options": dict(somethin="else", extrakey="extraval"),
 58 |         },
 59 |         {
 60 |             "cli_input": (
 61 |                 "./sushichef.py -uv --warn --compress --download-attempts=4 "
 62 |                 "--token=besttokenever --resume --step=PUBLISH_CHANNEL --prompt --deploy --publish"
 63 |             ),
 64 |             "expected_args": dict(
 65 |                 defaults,
 66 |                 update=True,
 67 |                 warn=True,
 68 |                 compress=True,
 69 |                 download_attempts=4,
 70 |                 token="besttokenever",
 71 |                 resume=True,
 72 |                 step="PUBLISH_CHANNEL",
 73 |                 prompt=True,
 74 |                 stage=False,
 75 |                 publish=True,
 76 |             ),
 77 |             "expected_options": {},
 78 |         },
 79 |     ]
 80 | 
 81 | 
 82 | def chef_arg_parser(cli_input):
 83 |     """
 84 |     Takes a string `cli_input` and parses it using the SushiChef arg parser.
 85 |     Returns tuple of args and options.
 86 |     """
 87 |     test_argv = cli_input.split(" ")
 88 |     with patch.object(sys, "argv", test_argv):
 89 |         chef = SushiChef()
 90 |         args, options = chef.parse_args_and_options()
 91 |     assert args is not None, "argparse parsing failed"
 92 |     return args, options
 93 | 
 94 | 
 95 | """ *********** CLI ARGUMENTS TESTS *********** """
 96 | 
 97 | 
 98 | def test_basic_command_line_args_and_options(cli_args_and_expected):
 99 |     for case in cli_args_and_expected:
100 |         cli_input = case["cli_input"]
101 |         expected_args = case["expected_args"]
102 |         expected_options = case["expected_options"]
103 | 
104 |         args, options = chef_arg_parser(cli_input)
105 | 
106 |         # print('observed', args, options)
107 |         # print('expected', expected_args, expected_options)
108 | 
109 |         for arg, val in expected_args.items():
110 |             assert args[arg] == val
111 |         for opt, val in expected_options.items():
112 |             assert options[opt] == val
113 | 
114 | 
115 | def test_cannot_publish_without_deploy():
116 |     bad_cli_input = "./sushichef.py --token=letoken --publish"
117 |     with pytest.raises(InvalidUsageException):
118 |         args, options = chef_arg_parser(bad_cli_input)
119 | 
120 |     good_cli_input = "./sushichef.py --token=letoken --deploy --publish"
121 |     args, options = chef_arg_parser(good_cli_input)
122 |     assert not args["stage"]
123 |     assert args["publish"]
124 | 


--------------------------------------------------------------------------------
/tests/test_csv_metadata.py:
--------------------------------------------------------------------------------
 1 | """ Tests for CSV exercises channel logic """
 2 | import os
 3 | import tempfile
 4 | 
 5 | import pytest
 6 | 
 7 | from ricecooker.chefs import LineCook
 8 | from ricecooker.utils.jsontrees import read_tree_from_json
 9 | from ricecooker.utils.metadata_provider import CsvMetadataProvider
10 | 
11 | 
12 | @pytest.fixture
13 | def channeldir():
14 |     return os.path.join(
15 |         "tests", "testchannels", "csv_channel_with_exercises", "channeldir"
16 |     )
17 | 
18 | 
19 | def test_exercises_metadata_provider(channeldir):
20 |     _, channeldirname = os.path.split(channeldir)
21 |     mp = CsvMetadataProvider(channeldir)
22 |     assert mp is not None, "CsvMetadataProvider does not exist"
23 |     mp.validate_headers()
24 |     assert mp.has_exercises(), "has exercises"
25 |     assert (
26 |         mp.get_channel_info()["source_id"] == "csv_channel_with_exercises"
27 |     ), "check source id"
28 |     #
29 |     assert len(mp.contentcache.keys()) == 8, "Found too many items"
30 |     assert len(mp.get_exercises_for_dir((channeldirname,))) == 1, "one exercise in root"
31 |     assert (
32 |         len(mp.get_exercises_for_dir((channeldirname, "exercises"))) == 3
33 |     ), "3 exercise in exercises/"
34 | 
35 | 
36 | def test_exercises_linecook(channeldir):
37 |     tmpdir_path = tempfile.mkdtemp()
38 | 
39 |     linecook = LineCook()
40 |     linecook.TREES_DATA_DIR = tmpdir_path
41 |     linecook.RICECOOKER_JSON_TREE = "test_ricecooker_json_tree.json"
42 | 
43 |     args = dict(
44 |         channeldir=channeldir,
45 |         channelinfo="Channel.csv",
46 |         contentinfo="Content.csv",
47 |         exercisesinfo="Exercises.csv",
48 |         questionsinfo="ExerciseQuestions.csv",
49 |         token="???",
50 |     )
51 |     options = {}
52 |     linecook.pre_run(args, options)
53 | 
54 |     jsontree_path = os.path.join(tmpdir_path, linecook.RICECOOKER_JSON_TREE)
55 |     assert os.path.exists(jsontree_path), "output json exists"
56 |     test_tree = read_tree_from_json(jsontree_path)
57 |     assert len(test_tree["children"]) == 3, "exercise node + two dirs"
58 | 
59 |     # cleanup
60 |     os.remove(jsontree_path)
61 |     os.rmdir(tmpdir_path)
62 | 


--------------------------------------------------------------------------------
/tests/test_downloader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | 
 4 | from ricecooker.utils import downloader
 5 | 
 6 | 
 7 | class TestArchiver(unittest.TestCase):
 8 |     def test_get_archive_filename_absolute(self):
 9 |         link = "https://learningequality.org/kolibri.png"
10 | 
11 |         urls_to_replace = {}
12 |         result = downloader.get_archive_filename(
13 |             link, download_root="./", resource_urls=urls_to_replace
14 |         )
15 | 
16 |         expected = os.path.join("learningequality.org", "kolibri.png")
17 | 
18 |         assert result == expected
19 |         assert urls_to_replace[link] == expected
20 | 
21 |     def test_get_archive_filename_relative(self):
22 |         link = "../kolibri.png"
23 |         page_link = "https://learningequality.org/team/index.html"
24 | 
25 |         urls_to_replace = {}
26 |         result = downloader.get_archive_filename(
27 |             link, page_url=page_link, download_root="./", resource_urls=urls_to_replace
28 |         )
29 | 
30 |         expected = os.path.join("learningequality.org", "kolibri.png")
31 | 
32 |         assert result == expected
33 |         assert urls_to_replace[link] == expected
34 | 
35 |     def test_get_archive_filename_with_query(self):
36 |         link = "../kolibri.png?1.2.3"
37 |         page_link = "https://learningequality.org/team/index.html"
38 | 
39 |         urls_to_replace = {}
40 |         result = downloader.get_archive_filename(
41 |             link, page_url=page_link, download_root="./", resource_urls=urls_to_replace
42 |         )
43 | 
44 |         expected = os.path.join("learningequality.org", "kolibri_1.2.3.png")
45 | 
46 |         assert result == expected
47 |         assert urls_to_replace[link] == expected
48 | 
49 |         link = "../kolibri.png?v=1.2.3&i=u"
50 |         page_link = "https://learningequality.org/team/index.html"
51 | 
52 |         urls_to_replace = {}
53 |         result = downloader.get_archive_filename(
54 |             link, page_url=page_link, download_root="./", resource_urls=urls_to_replace
55 |         )
56 | 
57 |         expected = os.path.join("learningequality.org", "kolibri_v_1.2.3_i_u.png")
58 | 
59 |         assert result == expected
60 |         assert urls_to_replace[link] == expected
61 | 
62 |     def test_archive_path_as_relative_url(self):
63 |         link = "../kolibri.png?1.2.3"
64 |         page_link = "https://learningequality.org/team/index.html"
65 |         page_filename = downloader.get_archive_filename(page_link, download_root="./")
66 |         link_filename = downloader.get_archive_filename(
67 |             link, page_url=page_link, download_root="./"
68 |         )
69 |         rel_path = downloader.get_relative_url_for_archive_filename(
70 |             link_filename, page_filename
71 |         )
72 |         assert rel_path == "../kolibri_1.2.3.png"
73 | 


--------------------------------------------------------------------------------
/tests/test_licenses.py:
--------------------------------------------------------------------------------
  1 | """ Tests for license getting and serialization """
  2 | import json
  3 | 
  4 | import pytest
  5 | from le_utils.constants.licenses import ALL_RIGHTS_RESERVED
  6 | from le_utils.constants.licenses import CC_BY
  7 | from le_utils.constants.licenses import CC_BY_NC
  8 | from le_utils.constants.licenses import CC_BY_NC_ND
  9 | from le_utils.constants.licenses import CC_BY_NC_SA
 10 | from le_utils.constants.licenses import CC_BY_ND
 11 | from le_utils.constants.licenses import CC_BY_SA
 12 | from le_utils.constants.licenses import PUBLIC_DOMAIN
 13 | from le_utils.constants.licenses import SPECIAL_PERMISSIONS
 14 | 
 15 | from ricecooker.classes.licenses import get_license
 16 | 
 17 | 
 18 | """ *********** LICENSE FIXTURES *********** """
 19 | 
 20 | 
 21 | @pytest.fixture
 22 | def license_objects():
 23 |     regular_ids = [
 24 |         CC_BY,
 25 |         CC_BY_SA,
 26 |         CC_BY_ND,
 27 |         CC_BY_NC,
 28 |         CC_BY_NC_SA,
 29 |         CC_BY_NC_ND,
 30 |         ALL_RIGHTS_RESERVED,
 31 |         PUBLIC_DOMAIN,
 32 |     ]
 33 |     license_objects = []
 34 |     for regular_id in regular_ids:
 35 |         # with desciption and copyright_holder
 36 |         licence_obj = get_license(
 37 |             regular_id, copyright_holder="Some name", description="Le description"
 38 |         )
 39 |         assert licence_obj, "licence_obj should exist"
 40 |         license_objects.append(licence_obj)
 41 | 
 42 |         # with desciption only
 43 |         licence_obj = get_license(regular_id, description="Le description solo2")
 44 |         assert licence_obj, "licence_obj should exist"
 45 |         license_objects.append(licence_obj)
 46 | 
 47 |         # with copyright_holder only
 48 |         licence_obj = get_license(regular_id, copyright_holder="Some name3")
 49 |         assert licence_obj, "licence_obj should exist"
 50 |         license_objects.append(licence_obj)
 51 | 
 52 |         # bare
 53 |         licence_obj = get_license(regular_id)
 54 |         assert licence_obj, "licence_obj should exist"
 55 |         license_objects.append(licence_obj)
 56 | 
 57 |     return license_objects
 58 | 
 59 | 
 60 | @pytest.fixture
 61 | def special_license():
 62 |     return get_license(
 63 |         SPECIAL_PERMISSIONS,
 64 |         copyright_holder="Authorov",
 65 |         description="Only for use offline",
 66 |     )
 67 | 
 68 | 
 69 | """ *********** LICENSE TESTS *********** """
 70 | 
 71 | 
 72 | def test_the_license_fixtures(license_objects, special_license):
 73 |     assert len(license_objects) > 4
 74 |     assert special_license.license_id == SPECIAL_PERMISSIONS
 75 |     assert special_license.description
 76 | 
 77 | 
 78 | def test_bad_special_license():
 79 |     try:
 80 |         get_license(SPECIAL_PERMISSIONS, description=None)
 81 |         assert False, "Should not come here because of missing description"
 82 |     except AssertionError:
 83 |         assert True, "SPECIAL_PERMISSIONS without description should raise an exception"
 84 | 
 85 | 
 86 | def _compare_licence_objects(obj1, obj2):
 87 |     same = True
 88 |     if not obj1.license_id == obj2.license_id:
 89 |         same = False
 90 |     if not obj1.description == obj2.description:
 91 |         same = False
 92 |     if not obj1.copyright_holder == obj2.copyright_holder:
 93 |         same = False
 94 |     return same
 95 | 
 96 | 
 97 | def test_license_serilizibility(license_objects, special_license):
 98 |     orig_licenses = license_objects
 99 |     orig_licenses.append(special_license)
100 |     for licence_orig in orig_licenses:
101 |         # serizlize
102 |         license_dict = licence_orig.as_dict()
103 |         license_json = json.dumps(license_dict)
104 |         # deserizlize
105 |         license_copy_dict = json.loads(license_json)
106 |         license_copy = get_license(**license_copy_dict)
107 | 
108 |         same_attributes = _compare_licence_objects(licence_orig, license_copy)
109 |         assert same_attributes, "License attributes not the same after serizlize"
110 | 


--------------------------------------------------------------------------------
/tests/test_links.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from ricecooker.utils.html import replace_links
 4 | 
 5 | 
 6 | def test_replace_absolute_links():
 7 |     a_content = '<a href="http://replace.me/link/to/page.html">'
 8 |     noscheme_a_content = '<a href="//replace.me/link/to/page.html">'
 9 |     root_a_content = '<a href="/link/to/page.html">'
10 | 
11 |     img_content = '<img src="http://replace.me/img/hello.jpg">'
12 | 
13 |     img_srcset_content = '<img srcset="http://replace.me/img/hello.jpg 1x, http://replace.me/img/hello.jpg 2x">'
14 | 
15 |     urls_to_replace = {
16 |         "http://replace.me/img/hello.jpg": "img/hello.jpg",
17 |         "http://replace.me/link/to/page.html": "link/to/page.html",
18 |     }
19 | 
20 |     output = replace_links(img_content, urls_to_replace)
21 |     assert output == '<img src="img/hello.jpg">'
22 | 
23 |     output = replace_links(a_content, urls_to_replace)
24 |     assert output == '<a href="link/to/page.html">'
25 | 
26 |     output = replace_links(noscheme_a_content, urls_to_replace)
27 |     assert output == '<a href="link/to/page.html">'
28 | 
29 |     output = replace_links(root_a_content, urls_to_replace)
30 |     assert output == '<a href="link/to/page.html">'
31 | 
32 |     output = replace_links(img_srcset_content, urls_to_replace)
33 |     assert output == '<img srcset="img/hello.jpg 1x, img/hello.jpg 2x">'
34 | 
35 | 
36 | def test_replace_relative_links():
37 |     a_content = '<a href="http://replace.me/link/to/page.html">'
38 |     noscheme_a_content = '<a href="//replace.me/link/to/page.html">'
39 |     root_a_content = '<a href="/link/to/page.html">'
40 | 
41 |     img_content = '<img src="http://replace.me/img/hello.jpg">'
42 | 
43 |     img_srcset_content = '<img srcset="http://replace.me/img/hello.jpg 1x, http://replace.me/img/hello.jpg 2x">'
44 | 
45 |     urls_to_replace = {
46 |         "http://replace.me/img/hello.jpg": "replace.me/img/hello.jpg",
47 |         "http://replace.me/link/to/page.html": "replace.me/link/to/page.html",
48 |     }
49 |     content_dir = os.path.join("replace.me", "link", "from")
50 |     download_root = "."
51 | 
52 |     output = replace_links(
53 |         img_content,
54 |         urls_to_replace,
55 |         download_root=download_root,
56 |         content_dir=content_dir,
57 |         relative_links=True,
58 |     )
59 |     assert output == '<img src="../../img/hello.jpg">'
60 | 
61 |     output = replace_links(
62 |         a_content,
63 |         urls_to_replace,
64 |         download_root=download_root,
65 |         content_dir=content_dir,
66 |         relative_links=True,
67 |     )
68 |     assert output == '<a href="../to/page.html">'
69 | 
70 |     output = replace_links(
71 |         noscheme_a_content,
72 |         urls_to_replace,
73 |         download_root=download_root,
74 |         content_dir=content_dir,
75 |         relative_links=True,
76 |     )
77 |     assert output == '<a href="../to/page.html">'
78 | 
79 |     output = replace_links(
80 |         root_a_content,
81 |         urls_to_replace,
82 |         download_root=download_root,
83 |         content_dir=content_dir,
84 |         relative_links=True,
85 |     )
86 |     assert output == '<a href="../to/page.html">'
87 | 
88 |     output = replace_links(
89 |         img_srcset_content,
90 |         urls_to_replace,
91 |         download_root=download_root,
92 |         content_dir=content_dir,
93 |         relative_links=True,
94 |     )
95 |     assert output == '<img srcset="../../img/hello.jpg 1x, ../../img/hello.jpg 2x">'
96 | 


--------------------------------------------------------------------------------
/tests/test_requests.py:
--------------------------------------------------------------------------------
 1 | """ Tests for handling requests to Kolibri Studio """
 2 | import copy
 3 | import uuid
 4 | 
 5 | import pytest
 6 | from le_utils.constants import licenses
 7 | 
 8 | from ricecooker.classes.nodes import DocumentNode
 9 | from ricecooker.classes.nodes import TopicNode
10 | from ricecooker.exceptions import InvalidNodeException
11 | from ricecooker.managers.tree import ChannelManager
12 | 
13 | 
14 | """ *********** TOPIC FIXTURES *********** """
15 | 
16 | 
17 | @pytest.fixture
18 | def topic_id():
19 |     return "topic-id"
20 | 
21 | 
22 | @pytest.fixture
23 | def topic_content_id(channel_domain_namespace, topic_id):
24 |     return uuid.uuid5(channel_domain_namespace, topic_id)
25 | 
26 | 
27 | @pytest.fixture
28 | def topic_node_id(channel_node_id, topic_content_id):
29 |     return uuid.uuid5(channel_node_id, topic_content_id.hex)
30 | 
31 | 
32 | @pytest.fixture
33 | def topic(topic_id):
34 |     return TopicNode(topic_id, "Topic")
35 | 
36 | 
37 | @pytest.fixture
38 | def invalid_topic(topic_id):
39 |     topic = TopicNode(topic_id, "Topic")
40 |     topic.title = None
41 |     return topic
42 | 
43 | 
44 | """ *********** LOCAL DOCUMENT FIXTURES *********** """
45 | 
46 | 
47 | @pytest.fixture
48 | def invalid_document(document_file):
49 |     node = DocumentNode("invalid", "Document", licenses.CC_BY, files=[document_file])
50 |     node.license = None
51 |     return node
52 | 
53 | 
54 | """ *********** TREE FIXTURES *********** """
55 | 
56 | 
57 | @pytest.fixture
58 | def tree(channel, topic, document):
59 |     topic.add_child(document)
60 |     channel.add_child(topic)
61 |     return ChannelManager(channel)
62 | 
63 | 
64 | @pytest.fixture
65 | def invalid_tree(invalid_channel, invalid_topic, invalid_document):
66 |     invalid_topic.add_child(invalid_document)
67 |     invalid_channel.add_child(invalid_topic)
68 |     return ChannelManager(invalid_channel)
69 | 
70 | 
71 | @pytest.fixture
72 | def invalid_tree_2(channel, topic, invalid_document):
73 |     channel_copy = copy.deepcopy(channel)
74 |     topic_copy = copy.deepcopy(topic)
75 |     topic_copy.add_child(invalid_document)
76 |     channel_copy.add_child(topic_copy)
77 |     return ChannelManager(channel_copy)
78 | 
79 | 
80 | """ TESTS """
81 | 
82 | 
83 | def test_validate(tree, invalid_tree, invalid_tree_2):
84 |     assert tree.validate(), "Tree should pass validation"
85 |     pytest.raises(InvalidNodeException, invalid_tree.validate)
86 |     pytest.raises(InvalidNodeException, invalid_tree_2.validate)
87 | 


--------------------------------------------------------------------------------
/tests/test_settings.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from mock import patch
 4 | 
 5 | from ricecooker import chefs
 6 | 
 7 | 
 8 | settings = {"thumbnails": True, "compress": True}
 9 | 
10 | 
11 | def test_settings_unset_default():
12 |     chef = chefs.SushiChef()
13 | 
14 |     for setting in settings:
15 |         assert chef.get_setting(setting) is None
16 |         assert chef.get_setting(setting, default=False) is False
17 | 
18 | 
19 | def test_settings():
20 |     chef = chefs.SushiChef()
21 | 
22 |     for setting in settings:
23 |         value = settings[setting]
24 |         chef.SETTINGS[setting] = value
25 |         assert chef.get_setting(setting) == value
26 |         assert chef.get_setting(setting, default=None) == value
27 | 
28 | 
29 | def test_cli_args_override_settings():
30 |     """
31 |     For settings that can be controlled via the command line, ensure that the command line setting
32 |     takes precedence over the default setting.
33 |     """
34 | 
35 |     test_argv = ["sushichef.py", "--compress", "--thumbnails", "--token", "12345"]
36 | 
37 |     with patch.object(sys, "argv", test_argv):
38 |         chef = chefs.SushiChef()
39 |         chef.SETTINGS["thumbnails"] = False
40 |         chef.SETTINGS["compress"] = False
41 | 
42 |         assert chef.get_setting("thumbnails") is False
43 |         assert chef.get_setting("compress") is False
44 | 
45 |         chef.parse_args_and_options()
46 |         assert chef.get_setting("thumbnails") is True
47 |         assert chef.get_setting("compress") is True
48 | 
49 |     test_argv = ["sushichef.py", "--compress", "--thumbnails", "--token", "12345"]
50 | 
51 |     with patch.object(sys, "argv", test_argv):
52 |         chef = chefs.SushiChef()
53 | 
54 |         assert len(chef.SETTINGS) == 0
55 | 
56 |         assert chef.get_setting("thumbnails") is None
57 |         assert chef.get_setting("compress") is None
58 | 
59 |         chef.parse_args_and_options()
60 |         assert chef.get_setting("thumbnails") is True
61 |         assert chef.get_setting("compress") is True
62 | 
63 |     # now test without setting the flags
64 |     test_argv = ["sushichef.py", "--token", "12345"]
65 | 
66 |     with patch.object(sys, "argv", test_argv):
67 |         chef = chefs.SushiChef()
68 |         chef.SETTINGS["thumbnails"] = False
69 |         chef.SETTINGS["compress"] = False
70 | 
71 |         assert chef.get_setting("thumbnails") is False
72 |         assert chef.get_setting("compress") is False
73 | 
74 |         chef.parse_args_and_options()
75 |         assert chef.get_setting("thumbnails") is False
76 |         assert chef.get_setting("compress") is False
77 | 


--------------------------------------------------------------------------------
/tests/test_zip.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import os
  3 | import tempfile
  4 | 
  5 | import pytest
  6 | 
  7 | from ricecooker.utils.zip import create_predictable_zip
  8 | 
  9 | 
 10 | # The MD5s in this object are generated by running this file as a script
 11 | # they should not be updated as they are now our baseline for what our predictable zip should produce
 12 | # so any changes to the implementation should not change these values, if they do, it's a bug.
 13 | TEST_CASES = {
 14 |     "nested_text": {
 15 |         "files": {"folder/nested.txt": "Nested content", "test.txt": "Hello World"},
 16 |         "expected_md5": "220f0d36a5150d3912a0eebee2738d80",  # Generated by running this file as a script
 17 |     },
 18 |     "reversed": {
 19 |         "files": {"b.txt": "content b", "a.txt": "content a"},
 20 |         "expected_md5": "5f3c72e2f32c5b7919cd6c31e5f169cd",  # Generated by running this file as a script
 21 |     },
 22 |     "binaryFiles": {
 23 |         "files": {
 24 |             "image.png": b"PNG\x89\x50\x4E\x47\x0D\x0A\x1A\x0A",
 25 |             "data.bin": bytes([0xFF, 0xD8, 0xFF, 0xE0]),
 26 |             "text.txt": "Mixed content",
 27 |         },
 28 |         "expected_md5": "18ba9ca5ba2ed25ada40111fcc055a82",  # Generated by running this file as a script
 29 |     },
 30 |     "nested_binary": {
 31 |         "files": {
 32 |             "folder/image.png": b"PNG\x89\x50\x4E\x47\x0D\x0A\x1A\x0A",
 33 |             "test.txt": "Hello World",
 34 |         },
 35 |         "expected_md5": "0fdfc3bd5b661ae3cde677d542426386",  # Generated by running this file as a script
 36 |     },
 37 |     "simple_binary": {
 38 |         "files": {"test.bin": bytes([0x00, 0x01, 0x02, 0x03])},
 39 |         "expected_md5": "461a08dc38d2b7dae48c2bc2e641b958",  # We'll generate this
 40 |     },
 41 | }
 42 | 
 43 | 
 44 | def create_test_files(files):
 45 |     temp_dir = tempfile.mkdtemp()
 46 |     for path, content in files.items():
 47 |         full_path = os.path.join(temp_dir, path.replace("/", os.sep))
 48 |         os.makedirs(os.path.dirname(full_path), exist_ok=True)
 49 |         mode = "wb" if isinstance(content, bytes) else "w"
 50 |         with open(full_path, mode) as f:
 51 |             f.write(content)
 52 |     return temp_dir
 53 | 
 54 | 
 55 | def generate_md5(temp_dir, entrypoint=None):
 56 |     zip_path = create_predictable_zip(temp_dir, entrypoint=entrypoint)
 57 |     with open(zip_path, "rb") as f:
 58 |         md5 = hashlib.md5(f.read()).hexdigest()
 59 |     os.remove(zip_path)
 60 |     return md5
 61 | 
 62 | 
 63 | def cleanup(temp_dir):
 64 |     for root, dirs, files in os.walk(temp_dir, topdown=False):
 65 |         for name in files:
 66 |             os.remove(os.path.join(root, name))
 67 |         for name in dirs:
 68 |             os.rmdir(os.path.join(root, name))
 69 |     os.rmdir(temp_dir)
 70 | 
 71 | 
 72 | @pytest.mark.parametrize("case_name,case", TEST_CASES.items())
 73 | def test_predictable_zip(case_name, case):
 74 |     temp_dir = create_test_files(case["files"])
 75 |     try:
 76 |         md5 = generate_md5(temp_dir, case.get("entrypoint"))
 77 |         assert md5 == case["expected_md5"], f"MD5 mismatch for {case_name}"
 78 |     finally:
 79 |         cleanup(temp_dir)
 80 | 
 81 | 
 82 | def test_order_independence():
 83 |     reversed_files = dict(reversed(list(TEST_CASES["reversed"]["files"].items())))
 84 |     temp_dir1 = create_test_files(TEST_CASES["reversed"]["files"])
 85 |     temp_dir2 = create_test_files(reversed_files)
 86 |     try:
 87 |         md5_1 = generate_md5(temp_dir1)
 88 |         md5_2 = generate_md5(temp_dir2)
 89 |         assert md5_1 == md5_2
 90 |     finally:
 91 |         cleanup(temp_dir1)
 92 |         cleanup(temp_dir2)
 93 | 
 94 | 
 95 | if __name__ == "__main__":
 96 |     for name, case in TEST_CASES.items():
 97 |         temp_dir = create_test_files(case["files"])
 98 |         md5 = generate_md5(temp_dir, case.get("entrypoint"))
 99 |         print(f"MD5 for {name}: {md5}")
100 |         cleanup(temp_dir)
101 | 


--------------------------------------------------------------------------------
/tests/testchannels/csv_channel_with_exercises/Channel.csv:
--------------------------------------------------------------------------------
1 | Title,Description,Domain,Source ID,Language,Thumbnail
2 | Test CSV channel with Exercises,This channel was created from the files in the channeldir/ directory and the metadata stored in CSV files,source.org,csv_channel_with_exercises,en,channeldir/channel_thumbnail.jpg
3 | 


--------------------------------------------------------------------------------
/tests/testchannels/csv_channel_with_exercises/Content.csv:
--------------------------------------------------------------------------------
1 | Path *,Title *,Source ID,Description,Author,Language,License ID *,License Description,Copyright Holder,Thumbnail
2 | channeldir/contentnodes,Content Nodes,3be352f9,Put folder description here,,en,,,,
3 | channeldir/contentnodes/audio,Audio Files,09219f2e,Put folder description here,,en,,,,
4 | channeldir/exercises,Exercises,fafafa007,"This doesn’t contain any files, but will be populated with some of the exercises from Exercises.csv",First Last (author's name),en,CC BY,,Copyright holder name,
5 | 


--------------------------------------------------------------------------------
/tests/testchannels/csv_channel_with_exercises/ExerciseQuestions.csv:
--------------------------------------------------------------------------------
 1 | Source ID *,Question ID *,Question type *,Question *,Option A,Option B,Option C,Option D,Option E,Options F...,Correct Answer *,Correct Answer 2,Correct Answer 3,Hint 1,Hint 2,Hint 3,Hint 4,Hint 5,Hint 6+
 2 | exrc1,1,single_selection,What is your 2+2?,1,2,3,4,5,,4,,,Add the two numbers together.,,,,,
 3 | exrc1,2,multiple_selection,Select all the solution to x^2=4?,-2,-1,0,1,2,,-2,2,,Quadratic equations have multiple solutoins.,Which number times itself gives 4?,Is there another number that also works?,,,
 4 | exrc1,3,input_question,What is the next integer after 2?,,,,,,,3,,,"Imagine the number line, what comes to the right of the number two?",,,,,
 5 | exrc2,1,single_selection,What is your 2+2?,1,2,3,4,5,,4,,,Add the two numbers together.,,,,,
 6 | exrc2,2,multiple_selection,Select all the solution to x^2=4?,-2,-1,0,1,2,,-2,2,,Quadratic equations have multiple solutoins.,Which number times itself gives 4?,Is there another number that also works?,,,
 7 | exrc2,3,input_question,What is the next integer after 2?,,,,,,,3,,,"Imagine the number line, what comes to the right of the number two?",,,,,
 8 | exrc3,1,single_selection,What is your 2+2?,1,2,3,4,5,,4,,,Add the two numbers together.,,,,,
 9 | exrc4,2,single_selection,"What is the area of the circle shown below
10 | 
11 | ![](figures/exrc4/circle-of-radius-2.png)",$\pi$,$2\pi$,$3\pi$,$4\pi$,$5\pi$,,$4\pi$,,,The area of a circle is proportional to the square of its radius.,The formula is $A=\pi r^2$.,In this case the circle has radius $r=2$ so the area of the circle is $A=4\pi$.,,,
12 | exrc5,3,multiple_selection,Select all the triangles.,![](figures/exrc5/triangle1.png),![](figures/exrc5/hexagon.png),![](figures/exrc5/triangle2.png),![](figures/exrc5/triangle3.png),![](figures/exrc5/octagon.png),![](figures/exrc5/square.png),![](figures/exrc5/triangle1.png),![](figures/exrc5/triangle2.png),![](figures/exrc5/triangle3.png),A triangle is a geometrical shape with three sides and three vertices.,,,,,
13 | 


--------------------------------------------------------------------------------
/tests/testchannels/csv_channel_with_exercises/Exercises.csv:
--------------------------------------------------------------------------------
1 | Path *,Title *,Source ID *,Description,Author,Language,License ID *,License Description,Copyright Holder,Number Correct,Out of Total,Randomize,Thumbnail
2 | channeldir/exercise1,First Exercise,exrc1,This is a really math exercise that will appear in the channel root.,Ivan Savov,en,CC BY,,Learning Equality,1,2,TRUE,channeldir/algebra_exercise_thumb.png
3 | channeldir/contentnodes/audio/Wzexercise,Second Exercise,exrc2,An exrcise ,Ivan Savov,fr,CC BY,,Learning Equality,,,FALSE,channeldir/contentnodes/audio/WZ_exercise_thumbnail.png
4 | channeldir/exercises/exercise3,Third Exercise,exrc3,An exercise in the subfolder exercsies/,Ivan Savov,en,CC BY,,Learning Equality,,,FALSE,
5 | channeldir/exercises/exercise4,Fourth Exercise,exrc4,An exercise that shows figures in question and use of LaTeX markup,Ivan Savov,en,CC BY,,Learning Equality,,,FALSE,
6 | channeldir/exercises/exercise5,Fifth Exercise,exrc5,An exercise which shows figures as answers,Ivan Savov,en,CC BY,,Learning Equality,,,FALSE,
7 | 


--------------------------------------------------------------------------------
/tests/testchannels/csv_channel_with_exercises/channeldir/algebra_exercise_thumb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testchannels/csv_channel_with_exercises/channeldir/algebra_exercise_thumb.png


--------------------------------------------------------------------------------
/tests/testchannels/csv_channel_with_exercises/channeldir/channel_thumbnail.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testchannels/csv_channel_with_exercises/channeldir/channel_thumbnail.jpg


--------------------------------------------------------------------------------
/tests/testchannels/csv_channel_with_exercises/channeldir/contentnodes/audio/WZ_exercise_thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testchannels/csv_channel_with_exercises/channeldir/contentnodes/audio/WZ_exercise_thumbnail.png


--------------------------------------------------------------------------------
/tests/testchannels/csv_channel_with_exercises/channeldir/exercises/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testchannels/csv_channel_with_exercises/channeldir/exercises/.gitkeep


--------------------------------------------------------------------------------
/tests/testcontent/downloaded/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/downloaded/.gitkeep


--------------------------------------------------------------------------------
/tests/testcontent/exercises/eb3f3bf7c317408ee90995b5bcf4f3a59606aedd-data.json:
--------------------------------------------------------------------------------
1 | svgDataeb3f3bf7c317408ee90995b5bcf4f3a59606aedd({"range":[[-5,5.625],[-40,130]],"labels":[{"content":"\\small{1}","coordinates":[1,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{2}","coordinates":[2,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{3}","coordinates":[3,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{4}","coordinates":[4,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{\\llap{-}2}","coordinates":[-2,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{\\llap{-}3}","coordinates":[-3,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{\\llap{-}4}","coordinates":[-4,0],"alignment":"below","typesetAsMath":true,"style":{}},{"content":"\\small{16}","coordinates":[0,16],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{32}","coordinates":[0,32],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{48}","coordinates":[0,48],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{64}","coordinates":[0,64],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{80}","coordinates":[0,80],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{96}","coordinates":[0,96],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{112}","coordinates":[0,112],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"\\small{\\llap{-}32}","coordinates":[0,-32],"alignment":"left","typesetAsMath":true,"style":{}},{"content":"y","coordinates":[0,120],"alignment":"above","typesetAsMath":true,"style":{}},{"content":"x","coordinates":[5,0],"alignment":"right","typesetAsMath":true,"style":{}},{"content":"\\blueD{y=f(x)}","coordinates":[2,100],"typesetAsMath":true,"style":{}}]});


--------------------------------------------------------------------------------
/tests/testcontent/exercises/no-wifi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/exercises/no-wifi.png


--------------------------------------------------------------------------------
/tests/testcontent/exercises/perseus_question_new_bar_graphs.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "answerArea": {
  3 |     "calculator": false,
  4 |     "chi2Table": false,
  5 |     "periodicTable": false,
  6 |     "tTable": false,
  7 |     "zTable": false
  8 |   },
  9 |   "hints": [
 10 |     {
 11 |       "content": "The bottom bar lines up to $\\purpleD{6}$.   \n\n![](web+graphie://cdn.kastatic.org/ka-perseus-graphie/d855aefe9a722f9a794b0883ebcdb8c37b4ba0c7)\n\nWhich type of fruit has $\\purpleD{6}$ in Luigi's home?",
 12 |       "images": {
 13 |         "web+graphie://cdn.kastatic.org/ka-perseus-graphie/d855aefe9a722f9a794b0883ebcdb8c37b4ba0c7": {
 14 |           "height": 330,
 15 |           "width": 404
 16 |         }
 17 |       },
 18 |       "replace": false,
 19 |       "widgets": {}
 20 |     },
 21 |     {
 22 |       "content": "Kind of fruit | Number\n:- | :-: \nOranges | $\\purpleD{6}$ \n\nLuigi has $\\purpleD{6}$ oranges.  So, the bottom bar should be labeled $\\purpleD{\\text{Oranges}}$.",
 23 |       "images": {},
 24 |       "replace": false,
 25 |       "widgets": {}
 26 |     },
 27 |     {
 28 |       "content": "Now let's label the other bars to match the table.",
 29 |       "images": {},
 30 |       "replace": false,
 31 |       "widgets": {}
 32 |     },
 33 |     {
 34 |       "content": "Here is the completed graph:\n\n![](web+graphie://cdn.kastatic.org/ka-perseus-graphie/95262ebaf42bdd1929e5d6d1e2853d3eb0a5cc74)",
 35 |       "images": {
 36 |         "web+graphie://cdn.kastatic.org/ka-perseus-graphie/95262ebaf42bdd1929e5d6d1e2853d3eb0a5cc74": {
 37 |           "height": 330,
 38 |           "width": 404
 39 |         }
 40 |       },
 41 |       "replace": false,
 42 |       "widgets": {}
 43 |     }
 44 |   ],
 45 |   "itemDataVersion": {
 46 |     "major": 0,
 47 |     "minor": 1
 48 |   },
 49 |   "question": {
 50 |     "content": "Luigi created a chart and a bar graph to show how many of each type of fruit were in his home.\n\nKind of fruit | Number \n:- | :-: \nApple | $7$ \nStrawberries | $3$ \nOranges | $6$ \nBananas| $2$ \n\n**Label each bar on the bar graph.**\n\n[[☃ label-image 1]]\n",
 51 |     "images": {},
 52 |     "widgets": {
 53 |       "label-image 1": {
 54 |         "alignment": "default",
 55 |         "graded": true,
 56 |         "options": {
 57 |           "choices": [
 58 |             "Apple",
 59 |             "Strawberries",
 60 |             "Oranges",
 61 |             "Bananas"
 62 |           ],
 63 |           "hideChoicesFromInstructions": true,
 64 |           "imageAlt": "",
 65 |           "imageHeight": 330,
 66 |           "imageUrl": "web+graphie://cdn.kastatic.org/ka-perseus-graphie/ab207c6f38c887130b68c078e6158a87aab60c45",
 67 |           "imageWidth": 404,
 68 |           "markers": [
 69 |             {
 70 |               "answers": [
 71 |                 "Strawberries"
 72 |               ],
 73 |               "label": "",
 74 |               "x": 24.1,
 75 |               "y": 17.7
 76 |             },
 77 |             {
 78 |               "answers": [
 79 |                 "Bananas"
 80 |               ],
 81 |               "label": "",
 82 |               "x": 24.4,
 83 |               "y": 35.7
 84 |             },
 85 |             {
 86 |               "answers": [
 87 |                 "Apple"
 88 |               ],
 89 |               "label": "",
 90 |               "x": 23.8,
 91 |               "y": 52.9
 92 |             },
 93 |             {
 94 |               "answers": [
 95 |                 "Oranges"
 96 |               ],
 97 |               "label": "",
 98 |               "x": 24.1,
 99 |               "y": 70.9
100 |             }
101 |           ],
102 |           "multipleAnswers": false,
103 |           "static": false
104 |         },
105 |         "static": false,
106 |         "type": "label-image",
107 |         "version": {
108 |           "major": 0,
109 |           "minor": 0
110 |         }
111 |       }
112 |     }
113 |   }
114 | }
115 | 


--------------------------------------------------------------------------------
/tests/testcontent/exercises/perseus_question_x43bbec76d5f14f88_bg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "itemDataVersion": {
 3 |     "major": 0,
 4 |     "minor": 1
 5 |   },
 6 |   "hints": [
 7 |     {
 8 |       "content": "Функцията $f$ е положителна $($т.е. $f(x)>0)$, когато графиката ѝ е над оста $x$.\n\nТова е така, защото положителните стойности на $y$ са над оста $x$, а $y=f(x)$.\n\n![graph](web+graphie://ka-perseus-graphie.s3.amazonaws.com/d8daa074ec7d09ce3819d6259b3e4670701d2540)",
 9 |       "images": {},
10 |       "widgets": {},
11 |       "replace": false
12 |     },
13 |     {
14 |       "content": "От всички изброени варианти единственият верен интервал е $-2<x<-1$.",
15 |       "images": {},
16 |       "widgets": {},
17 |       "replace": false
18 |     }
19 |   ],
20 |   "question": {
21 |     "content": "![graph](web+graphie://ka-perseus-graphie.s3.amazonaws.com/eb3f3bf7c317408ee90995b5bcf4f3a59606aedd)\n\n**Избери интервала, в който $f(x)>0$.**\n\n[[☃ radio 1]]",
22 |     "images": {},
23 |     "widgets": {
24 |       "radio 1": {
25 |         "graded": true,
26 |         "version": {
27 |           "major": 1,
28 |           "minor": 0
29 |         },
30 |         "static": false,
31 |         "type": "radio",
32 |         "options": {
33 |           "onePerLine": true,
34 |           "displayCount": null,
35 |           "choices": [
36 |             {
37 |               "content": "$-2<x<-1$",
38 |               "correct": true
39 |             },
40 |             {
41 |               "content": "$-1<x<0$"
42 |             },
43 |             {
44 |               "content": "$0<x<1$",
45 |               "isNoneOfTheAbove": false,
46 |               "correct": false
47 |             }
48 |           ],
49 |           "hasNoneOfTheAbove": false,
50 |           "multipleSelect": false,
51 |           "randomize": false,
52 |           "deselectEnabled": false
53 |         },
54 |         "alignment": "default"
55 |       }
56 |     }
57 |   },
58 |   "answerArea": {
59 |     "zTable": false,
60 |     "chi2Table": false,
61 |     "calculator": false,
62 |     "periodicTable": false,
63 |     "tTable": false
64 |   }
65 | }


--------------------------------------------------------------------------------
/tests/testcontent/exercises/perseus_question_x43bbec76d5f14f88_en.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "question": {
 3 |     "content": "![graph](web+graphie://ka-perseus-graphie.s3.amazonaws.com/eb3f3bf7c317408ee90995b5bcf4f3a59606aedd)\n\n**Select the interval where $f(x)>0$.**\n\n[[☃ radio 1]]",
 4 |     "images": {
 5 |       "web+graphie://ka-perseus-graphie.s3.amazonaws.com/eb3f3bf7c317408ee90995b5bcf4f3a59606aedd": {
 6 |         "width": 425,
 7 |         "height": 425
 8 |       }
 9 |     },
10 |     "widgets": {
11 |       "radio 1": {
12 |         "type": "radio",
13 |         "alignment": "default",
14 |         "static": false,
15 |         "graded": true,
16 |         "options": {
17 |           "choices": [
18 |             {
19 |               "content": "$-2<x<-1$",
20 |               "correct": true
21 |             },
22 |             {
23 |               "content": "$-1<x<0$"
24 |             },
25 |             {
26 |               "isNoneOfTheAbove": false,
27 |               "correct": false,
28 |               "content": "$0<x<1$"
29 |             }
30 |           ],
31 |           "randomize": false,
32 |           "multipleSelect": false,
33 |           "displayCount": null,
34 |           "hasNoneOfTheAbove": false,
35 |           "onePerLine": true,
36 |           "deselectEnabled": false
37 |         },
38 |         "version": {
39 |           "major": 1,
40 |           "minor": 0
41 |         }
42 |       }
43 |     }
44 |   },
45 |   "answerArea": {
46 |     "calculator": false,
47 |     "chi2Table": false,
48 |     "periodicTable": false,
49 |     "tTable": false,
50 |     "zTable": false
51 |   },
52 |   "itemDataVersion": {
53 |     "major": 0,
54 |     "minor": 1
55 |   },
56 |   "hints": [
57 |     {
58 |       "replace": false,
59 |       "content": "The function $f$ is positive $($i.e. $f(x)>0)$ whenever its graph is above the $x$-axis.\n\nThis is because the positive $y$-values are above the $x$-axis, and $y=f(x)$.\n\n![graph](web+graphie://ka-perseus-graphie.s3.amazonaws.com/d8daa074ec7d09ce3819d6259b3e4670701d2540)",
60 |       "images": {
61 |         "web+graphie://ka-perseus-graphie.s3.amazonaws.com/d8daa074ec7d09ce3819d6259b3e4670701d2540": {
62 |           "width": 425,
63 |           "height": 425
64 |         }
65 |       },
66 |       "widgets": {}
67 |     },
68 |     {
69 |       "replace": false,
70 |       "content": "Out of our options, the only correct interval is $-2<x<-1$.",
71 |       "images": {},
72 |       "widgets": {}
73 |     }
74 |   ]
75 | }


--------------------------------------------------------------------------------
/tests/testcontent/generated/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/generated/.gitkeep


--------------------------------------------------------------------------------
/tests/testcontent/samples/sample_doc_with_toc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/samples/sample_doc_with_toc.pdf


--------------------------------------------------------------------------------
/tests/testcontent/samples/testdocument.epub:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/samples/testdocument.epub


--------------------------------------------------------------------------------
/tests/testcontent/samples/testsubtitles_ar.srt:
--------------------------------------------------------------------------------
 1 | 1
 2 | 00:00:12,464 --> 00:00:14,979
 3 | أمضيت ما يقرب من العقدين
 4 | 
 5 | 2
 6 | 00:00:14,979 --> 00:00:18,532
 7 | ألاحظ ما يجعل البعض أكثر حظًا من غيرهم
 8 | 
 9 | 3
10 | 00:00:18,536 --> 00:00:22,119
11 | وأحاول مساعدة الناس على زيادة حظهم.
12 | 


--------------------------------------------------------------------------------
/tests/testcontent/samples/thumbnail.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/samples/thumbnail.jpg


--------------------------------------------------------------------------------
/tests/testcontent/samples/thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/samples/thumbnail.png


--------------------------------------------------------------------------------
/tests/testcontent/youtubecache/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learningequality/ricecooker/e70d42e252eeed900ea4cd491e0058e146c364df/tests/testcontent/youtubecache/.gitkeep


--------------------------------------------------------------------------------
/tests/utils/test_extensions.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ricecooker.utils.utils import extract_path_ext
 4 | 
 5 | # Tests generated by Claude Sonnet 3.7
 6 | 
 7 | 
 8 | def test_extract_path_ext_basic_file():
 9 |     """Test basic file path extension extraction."""
10 |     assert extract_path_ext("file.txt") == "txt"
11 |     assert extract_path_ext("path/to/file.jpg") == "jpg"
12 |     assert extract_path_ext("/absolute/path/to/file.pdf") == "pdf"
13 |     assert extract_path_ext("file.PNG") == "png"  # Tests lowercase conversion
14 | 
15 | 
16 | def test_extract_path_ext_windows_paths():
17 |     """Test Windows-style file paths."""
18 |     assert extract_path_ext("C:\\Users\\name\\file.docx") == "docx"
19 |     assert extract_path_ext("D:\\path\\to\\file.xlsx") == "xlsx"
20 |     assert extract_path_ext("\\\\network\\share\\file.csv") == "csv"
21 | 
22 | 
23 | def test_extract_path_ext_urls():
24 |     """Test URL path extension extraction."""
25 |     assert extract_path_ext("http://example.com/file.json") == "json"
26 |     assert extract_path_ext("https://domain.org/path/to/file.xml") == "xml"
27 |     assert extract_path_ext("ftp://files.net/downloads/file.zip") == "zip"
28 | 
29 | 
30 | def test_extract_path_ext_urls_with_query_params():
31 |     """Test URLs with query parameters."""
32 |     assert extract_path_ext("http://example.com/file.html?param=value") == "html"
33 |     assert (
34 |         extract_path_ext("https://api.domain.org/data.json?id=123&token=abc") == "json"
35 |     )
36 |     assert extract_path_ext("http://site.com/download.tar.gz?download=true") == "gz"
37 | 
38 | 
39 | def test_extract_path_ext_no_extension():
40 |     """Test paths with no extension."""
41 |     with pytest.raises(ValueError):
42 |         extract_path_ext("file_without_extension")
43 | 
44 |     with pytest.raises(ValueError):
45 |         extract_path_ext("/path/to/file")
46 | 
47 |     with pytest.raises(ValueError):
48 |         extract_path_ext("http://example.com/api/v1/resource")
49 | 
50 | 
51 | def test_extract_path_ext_empty_extension():
52 |     """Test paths with empty extension (dot at the end)."""
53 |     with pytest.raises(ValueError):
54 |         extract_path_ext("file.")
55 | 
56 |     with pytest.raises(ValueError):
57 |         extract_path_ext("http://example.com/document.")
58 | 
59 | 
60 | def test_extract_path_ext_default_ext():
61 |     """Test default extension parameter."""
62 |     assert extract_path_ext("file_without_extension", default_ext="txt") == "txt"
63 |     assert extract_path_ext("file.", default_ext="dat") == "dat"
64 |     assert (
65 |         extract_path_ext("http://example.com/api/v1/resource", default_ext="json")
66 |         == "json"
67 |     )
68 | 
69 | 
70 | def test_extract_path_ext_complex_paths():
71 |     """Test more complex path scenarios."""
72 |     assert extract_path_ext("file.tar.gz") == "gz"
73 |     assert extract_path_ext("path/to/archive.tar.bz2") == "bz2"
74 |     assert extract_path_ext("http://example.com/path/to/file.min.js") == "js"
75 | 
76 | 
77 | def test_extract_path_ext_edge_cases():
78 |     """Test edge cases."""
79 |     # URL with hash fragment
80 |     assert extract_path_ext("http://example.com/file.html#section") == "html"
81 | 
82 |     # URL with query and hash fragment
83 |     assert extract_path_ext("http://example.com/file.php?id=1#top") == "php"
84 | 
85 |     # Path with multiple dots
86 |     assert extract_path_ext("file.name.with.dots.txt") == "txt"
87 | 


--------------------------------------------------------------------------------
/tests/vcr_config.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | try:
 4 |     import vcr
 5 | 
 6 |     my_vcr = vcr.VCR(
 7 |         cassette_library_dir="tests/cassettes",
 8 |         record_mode="new_episodes",
 9 |         path_transformer=vcr.VCR.ensure_suffix(".yaml"),
10 |         filter_headers=["authorization"],
11 |     )
12 | except ImportError:
13 | 
14 |     class VCR:
15 |         def use_cassette(self, *args, **kwargs):
16 |             return pytest.mark.skip("vcrpy is not available on this Python version")
17 | 
18 |     my_vcr = VCR()
19 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py3.{9,10,11,12}
 3 | 
 4 | [testenv]
 5 | basepython =
 6 |     py3.9: python3.9
 7 |     py3.10: python3.10
 8 |     py3.11: python3.11
 9 |     py3.12: python3.12
10 | extras = test,google_drive
11 | setenv =
12 |     PYTHONPATH = {toxinidir}
13 | commands =
14 |     pytest --basetemp={envtmpdir}
15 | 


--------------------------------------------------------------------------------