├── .nvmrc ├── .npmrc ├── .gitignore ├── .github ├── dependabot.yml ├── CODEOWNERS ├── workflows │ ├── rust.yml │ ├── release.yml │ ├── prod.yml │ └── stage.yml └── labels.json ├── CODE_OF_CONDUCT.md ├── src ├── hash.rs ├── update.rs ├── package.rs ├── diff.rs ├── compress.rs └── main.rs ├── Cargo.toml ├── LICENSE ├── SECURITY.md ├── scripts └── run.sh ├── README.md └── Cargo.lock /.nvmrc: -------------------------------------------------------------------------------- 1 | v24 2 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | engine-strict=true 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /workbench 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: weekly 8 | commit-message: 9 | prefix: "ci(deps): " 10 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # MDN Differy CODEOWNERS 3 | # ---------------------------------------------------------------------------- 4 | # Order is important. The last matching pattern takes precedence. 5 | # For more detailed information, see: 6 | # https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners 7 | # ---------------------------------------------------------------------------- 8 | 9 | * @mdn/engineering 10 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Community Participation Guidelines 2 | 3 | This repository is governed by Mozilla's code of conduct and etiquette guidelines. 4 | For more details, please read the 5 | [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). 6 | 7 | ## How to Report 8 | For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page. 9 | 10 | 16 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | # No GITHUB_TOKEN permissions, as we don't use it. 13 | permissions: {} 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 21 | with: 22 | persist-credentials: false 23 | - name: Run fmt 24 | run: cargo fmt -- --check 25 | - name: Build 26 | run: cargo build --verbose 27 | - name: Run tests 28 | run: cargo test --verbose --all-features 29 | - name: Run clippy 30 | run: cargo clippy --all-features -- -D warnings 31 | -------------------------------------------------------------------------------- /src/hash.rs: -------------------------------------------------------------------------------- 1 | use async_std::{fs, path::Path}; 2 | use sha2::Digest; 3 | use walkdir::WalkDir; 4 | 5 | pub(crate) async fn hash_all( 6 | dir: &Path, 7 | out: &mut Vec<(String, String)>, 8 | base: &Path, 9 | ) -> std::io::Result<()> { 10 | for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) { 11 | let path = entry.path(); 12 | if !path.is_dir() { 13 | let hash = sha2::Sha256::digest(fs::read(entry.path()).await?); 14 | out.push(( 15 | format!("{hash:x}"), 16 | entry 17 | .path() 18 | .strip_prefix(base) 19 | .unwrap() 20 | .to_string_lossy() 21 | .to_string(), 22 | )); 23 | } 24 | } 25 | Ok(()) 26 | } 27 | -------------------------------------------------------------------------------- /src/update.rs: -------------------------------------------------------------------------------- 1 | use std::{fs::File, io::BufReader, path::Path}; 2 | 3 | use chrono::NaiveDateTime; 4 | use serde_derive::{Deserialize, Serialize}; 5 | 6 | #[derive(Deserialize, Serialize, Clone, Debug, Default)] 7 | pub(crate) struct Update { 8 | pub date: Option, 9 | pub latest: Option, 10 | pub updates: Vec, 11 | } 12 | 13 | impl Update { 14 | pub fn from_file(path: &Path) -> std::io::Result { 15 | let file = File::open(path)?; 16 | let reader = BufReader::new(file); 17 | 18 | let u = serde_json::from_reader(reader)?; 19 | 20 | Ok(u) 21 | } 22 | 23 | pub fn save(&self, path: &Path) -> std::io::Result<()> { 24 | let file = File::create(path)?; 25 | serde_json::to_writer_pretty(file, self)?; 26 | Ok(()) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "differy" 3 | version = "0.0.16" 4 | authors = ["Florian Dieminger "] 5 | description = "diff directory trees via sha256sum" 6 | edition = "2021" 7 | license = "MIT" 8 | repository = "https://github.com/fiji-flo/differy" 9 | documentation = "https://docs.rs/crate/differy" 10 | keywords = ["diff", "sha256sum"] 11 | categories = ["command-line-utilities"] 12 | readme = "README.md" 13 | include = ["Cargo.toml", "src/**/*.rs", "README.md", "LICENSE"] 14 | 15 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 16 | 17 | [dependencies] 18 | sha2 = "0.10" 19 | walkdir = "2" 20 | zip = "2" 21 | serde = "1" 22 | serde_derive = "1" 23 | serde_json = "1" 24 | 25 | [dependencies.clap] 26 | version = "4" 27 | features = ["cargo", "wrap_help"] 28 | 29 | [dependencies.chrono] 30 | version = "0.4" 31 | features = ["serde"] 32 | 33 | [dependencies.async-std] 34 | version = "1" 35 | features = ["attributes"] 36 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - v[0-9]+.* 7 | 8 | # See: https://github.com/taiki-e/upload-rust-binary-action/#example-workflow-basic-usage 9 | permissions: 10 | contents: write 11 | 12 | jobs: 13 | create-release: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 17 | with: 18 | persist-credentials: false 19 | - uses: taiki-e/create-gh-release-action@26b80501670402f1999aff4b934e1574ef2d3705 # v1.9.1 20 | env: 21 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | 23 | upload-assets: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 27 | with: 28 | persist-credentials: false 29 | - uses: taiki-e/upload-rust-binary-action@3962470d6e7f1993108411bc3f75a135ec67fc8c # v1.27.0 30 | with: 31 | bin: differy 32 | env: 33 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Florian Dieminger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Overview 4 | 5 | This policy applies to MDN's website (`developer.mozilla.org`), backend services, and GitHub repositories in the [`mdn`](https://github.com/mdn) organization. Issues affecting other Mozilla products or services should be reported through the [Mozilla Security Bug Bounty Program](https://www.mozilla.org/en-US/security/bug-bounty/). 6 | 7 | For non-security issues, please file a [content bug](https://github.com/mdn/content/issues/new/choose), a [website bug](https://github.com/mdn/fred/issues/new/choose) or a [content/feature suggestion](https://github.com/mdn/mdn/issues/new/choose). 8 | 9 | ## Reporting a Vulnerability 10 | 11 | If you discover a potential security issue, please report it privately via . 12 | 13 | If you prefer not to use HackerOne, you can report it via . 14 | 15 | ## Bounty Program 16 | 17 | Vulnerabilities in MDN may qualify for Mozilla's Bug Bounty Program. Eligibility and reward amounts are described on . 18 | 19 | Please use the above channels even if you are not interested in a bounty reward. 20 | 21 | ## Responsible Disclosure 22 | 23 | Please do not publicly disclose details until Mozilla's security team and the MDN engineering team have verified and fixed the issue. 24 | 25 | We appreciate your efforts to keep MDN and its users safe. 26 | -------------------------------------------------------------------------------- /.github/workflows/prod.yml: -------------------------------------------------------------------------------- 1 | name: Package for prod 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | notes: 7 | description: "Notes" 8 | type: string 9 | yari_branch: 10 | description: "Yari branch to build against (default: main)" 11 | type: string 12 | schedule: 13 | # * is a special character in YAML so you have to quote this string 14 | - cron: "0 */24 * * *" 15 | workflow_call: 16 | secrets: 17 | GCP_PROJECT_NAME: 18 | required: true 19 | PING_URL_ISSUES: 20 | required: false 21 | WIP_PROJECT_ID: 22 | required: true 23 | 24 | env: 25 | YARI_BRANCH: ${{ inputs.yari_branch || 'main' }} 26 | 27 | jobs: 28 | everything: 29 | environment: prod 30 | permissions: 31 | contents: read 32 | id-token: write 33 | runs-on: ubuntu-latest 34 | steps: 35 | - name: Check out repository code 36 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 37 | with: 38 | persist-credentials: false 39 | 40 | - name: Set up node 41 | uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0 42 | with: 43 | node-version-file: ".nvmrc" 44 | package-manager-cache: false 45 | 46 | - name: Authenticate with GCP 47 | uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3.0.0 48 | with: 49 | token_format: access_token 50 | service_account: deploy-prod-updates@${{ secrets.GCP_PROJECT_NAME }}.iam.gserviceaccount.com 51 | workload_identity_provider: projects/${{ secrets.WIP_PROJECT_ID }}/locations/global/workloadIdentityPools/github-actions/providers/github-actions 52 | 53 | - name: Setup gcloud 54 | uses: google-github-actions/setup-gcloud@aa5489c8933f4cc7a4f7d45035b3b1440c9c10db # v3.0.1 55 | 56 | - name: Do it! 57 | env: 58 | UPDATE_URL: https://updates.developer.mozilla.org 59 | BUCKET: updates-prod-developer-mozilla-e2b95ed55d379b1a 60 | GCS_BUCKET: updates-prod-mdn 61 | run: | 62 | ${{ github.workspace }}/scripts/run.sh 63 | 64 | - name: Ping 65 | env: 66 | ping_url: ${{ secrets.PING_URL }} 67 | if: ${{ env.ping_url != '' }} 68 | run: curl ${{ env.ping_url }} 69 | -------------------------------------------------------------------------------- /.github/workflows/stage.yml: -------------------------------------------------------------------------------- 1 | name: Package for stage 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | notes: 7 | description: "Notes" 8 | type: string 9 | yari_branch: 10 | description: "Yari branch to build against (default: main)" 11 | type: string 12 | schedule: 13 | # * is a special character in YAML so you have to quote this string 14 | - cron: "0 */24 * * *" 15 | workflow_call: 16 | secrets: 17 | GCP_PROJECT_NAME: 18 | required: true 19 | PING_URL_ISSUES: 20 | required: false 21 | WIP_PROJECT_ID: 22 | required: true 23 | 24 | env: 25 | YARI_BRANCH: ${{ inputs.yari_branch || 'main' }} 26 | 27 | jobs: 28 | everything: 29 | environment: stage 30 | permissions: 31 | contents: read 32 | id-token: write 33 | runs-on: ubuntu-latest 34 | steps: 35 | - name: Check out repository code 36 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 37 | with: 38 | persist-credentials: false 39 | 40 | - name: Set up node 41 | uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0 42 | with: 43 | node-version-file: ".nvmrc" 44 | package-manager-cache: false 45 | 46 | - name: Authenticate with GCP 47 | uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3.0.0 48 | with: 49 | token_format: access_token 50 | service_account: deploy-stage-updates@${{ secrets.GCP_PROJECT_NAME }}.iam.gserviceaccount.com 51 | workload_identity_provider: projects/${{ secrets.WIP_PROJECT_ID }}/locations/global/workloadIdentityPools/github-actions/providers/github-actions 52 | 53 | - name: Setup gcloud 54 | uses: google-github-actions/setup-gcloud@aa5489c8933f4cc7a4f7d45035b3b1440c9c10db # v3.0.1 55 | 56 | - name: Do it! 57 | env: 58 | UPDATE_URL: https://updates.developer.allizom.org 59 | BUCKET: updates-stage-developer-allizom-6d533edfe2c2c683 60 | GCS_BUCKET: updates-stage-mdn 61 | run: | 62 | ${{ github.workspace }}/scripts/run.sh 63 | 64 | - name: Ping 65 | env: 66 | ping_url: ${{ secrets.PING_URL }} 67 | if: ${{ env.ping_url != '' }} 68 | run: curl ${{ env.ping_url }} 69 | -------------------------------------------------------------------------------- /scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | set -x 7 | 8 | mkdir -p workbench 9 | export WORKBENCH=$(realpath workbench) 10 | if [ ! -d $WORKBENCH/.bin ]; then mkdir $WORKBENCH/.bin; fi 11 | export PATH=$WORKBENCH/.bin:$PATH 12 | 13 | curl -L https://github.com/mdn/differy/releases/latest/download/differy-x86_64-unknown-linux-gnu.tar.gz | tar -xz -C $WORKBENCH/.bin/ 14 | 15 | cd $WORKBENCH 16 | 17 | git clone -b $YARI_BRANCH https://github.com/mdn/yari.git 18 | git clone https://github.com/mdn/content.git 19 | git clone https://github.com/mdn/bcd-utils.git 20 | git clone https://github.com/mdn/interactive-examples.git 21 | 22 | mkdir $WORKBENCH/ghsamples 23 | cd $WORKBENCH/ghsamples 24 | 25 | # git clone https://github.com/mdn/web-tech-games.git 26 | # git clone https://github.com/mdn/learning-area.git 27 | # git clone https://github.com/mdn/css-examples.git 28 | # git clone https://github.com/mdn/imsc.git 29 | # git clone https://github.com/mdn/canvas-raycaster.git 30 | # git clone https://github.com/mdn/dom-examples.git 31 | # git clone https://github.com/mdn/webgl-examples.git 32 | # git clone https://github.com/mdn/html-examples.git 33 | 34 | cd $WORKBENCH/content 35 | export REV=$(git rev-parse --short HEAD) 36 | 37 | cd $WORKBENCH 38 | curl -O $UPDATE_URL/update.json 39 | if [ -f "update.json" ] 40 | then 41 | export LATEST=$(jq -r -c '.latest' update.json) 42 | if [ $LATEST == $REV] 43 | then 44 | echo "Bundle already exsits for $REV" 45 | exit 0 46 | fi 47 | fi 48 | 49 | cd $WORKBENCH 50 | export CONTENT_ROOT=$WORKBENCH/content 51 | export BUILD_OUT_ROOT=$WORKBENCH/build 52 | export BUILD_LIVE_SAMPLES_BASE_URL="https://live-samples.mdn.mozilla.net" 53 | mkdir -p $BUILD_OUT_ROOT 54 | 55 | cd $WORKBENCH/yari 56 | yarn 57 | yarn build:prepare 58 | yarn build 59 | 60 | cd $WORKBENCH/bcd-utils/api 61 | npm install 62 | npm run generate 63 | mkdir -p $BUILD_OUT_ROOT/bcd 64 | for query in $(<$BUILD_OUT_ROOT/allBrowserCompat.txt) 65 | do 66 | mv out/v0/current/${query}.json $BUILD_OUT_ROOT/bcd/ || true 67 | done 68 | 69 | cd $WORKBENCH/interactive-examples 70 | npm install 71 | npm run build 72 | mv docs $BUILD_OUT_ROOT/examples 73 | 74 | cd $WORKBENCH 75 | if [ -f "update.json" ] 76 | then 77 | for OLD_REV in $(jq -r -c '.updates[]' update.json) 78 | do 79 | curl -O $UPDATE_URL/packages/$OLD_REV-checksums.zip 80 | done 81 | curl -O $UPDATE_URL/packages/$LATEST-checksums.zip 82 | fi 83 | 84 | differy package $BUILD_OUT_ROOT --rev $REV 85 | cp update.json ${REV}-update.json 86 | cp ${REV}-content.json content.json 87 | 88 | # Sync to GCP 89 | gsutil -m -h "Cache-Control:public, max-age=86400" cp "${REV}-*.zip" gs://${GCS_BUCKET}/packages/ 90 | gsutil -m -h "Cache-Control:public, max-age=86400" cp "${REV}-*.json" gs://${GCS_BUCKET}/packages/ 91 | gsutil cp update.json gs://${GCS_BUCKET}/ 92 | gsutil cp content.json gs://${GCS_BUCKET}/ 93 | -------------------------------------------------------------------------------- /.github/labels.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "good first issue", 4 | "color": "028c46", 5 | "description": "A good issue for newcomers to get started with." 6 | }, 7 | { 8 | "name": "help wanted", 9 | "color": "028c46", 10 | "description": "If you know something about this, we would love your help!" 11 | }, 12 | { 13 | "name": "needs info", 14 | "color": "028c46", 15 | "description": "This needs more information to review or act on." 16 | }, 17 | { 18 | "name": "needs triage", 19 | "color": "028c46", 20 | "description": "Triage needed by staff and/or partners. Automatically applied when an issue is opened." 21 | }, 22 | { 23 | "name": "expert help needed", 24 | "color": "028c46", 25 | "description": "This needs more information from a subject matter expert (SME)." 26 | }, 27 | { 28 | "name": "idle", 29 | "color": "028c46", 30 | "description": "Issues and pull requests with no activity for three months." 31 | }, 32 | { 33 | "name": "on hold", 34 | "color": "028c46", 35 | "description": "Waiting on something else before this can be moved forward." 36 | }, 37 | { 38 | "name": "for later", 39 | "color": "028c46", 40 | "description": "Not planned at this time." 41 | }, 42 | { 43 | "name": "needs content update", 44 | "color": "028c46", 45 | "description": "Needs update to the content to support this change." 46 | }, 47 | { 48 | "name": "chore", 49 | "color": "028c46", 50 | "description": "A routine task." 51 | }, 52 | { 53 | "name": "enhancement", 54 | "color": "028c46", 55 | "description": "Improves an existing feature." 56 | }, 57 | { 58 | "name": "bug", 59 | "color": "c05964", 60 | "description": "Indicates an unexpected problem or unintended behavior." 61 | }, 62 | { 63 | "name": "wontfix", 64 | "color": "c05964", 65 | "description": "Deemed to be outside the scope of the project or would require significant time and resources to fix." 66 | }, 67 | { 68 | "name": "effort: small", 69 | "color": "866dc1", 70 | "description": "Task is a small effort." 71 | }, 72 | { 73 | "name": "effort: medium", 74 | "color": "866dc1", 75 | "description": "Task is a medium effort." 76 | }, 77 | { 78 | "name": "effort: large", 79 | "color": "866dc1", 80 | "description": "Task is large effort." 81 | }, 82 | { 83 | "name": "p0", 84 | "color": "6e8bc1", 85 | "description": "Urgent. We will address this as soon as possible." 86 | }, 87 | { 88 | "name": "p1", 89 | "color": "6e8bc1", 90 | "description": "We will address this soon and will provide capacity from our team for it in the next few releases." 91 | }, 92 | { 93 | "name": "p2", 94 | "color": "6e8bc1", 95 | "description": "We want to address this but may have other higher priority items." 96 | }, 97 | { 98 | "name": "p3", 99 | "color": "6e8bc1", 100 | "description": "We don't have visibility when this will be addressed." 101 | } 102 | ] 103 | -------------------------------------------------------------------------------- /src/package.rs: -------------------------------------------------------------------------------- 1 | use async_std::{ 2 | fs::write, 3 | path::{Path, PathBuf}, 4 | }; 5 | 6 | use crate::{ 7 | compress::{self, zip_append_buf}, 8 | diff::Diff, 9 | }; 10 | 11 | const CONTENT_FILENAME: &str = "content.zip"; 12 | const UPDATE_FILENAME: &str = "update.zip"; 13 | const REMOVED_FILENAME: &str = "removed"; 14 | const DIFF_LIST_FILENAME: &str = "diff.json"; 15 | const CONTENT_LIST_FILENAME: &str = "content.json"; 16 | const APP_PREFIX: &str = "app"; 17 | 18 | fn build_path>(base: I, file_name: &str, prefix: &str, app: bool) -> PathBuf { 19 | let mut full_name = String::new(); 20 | full_name.push_str(prefix); 21 | full_name.push('-'); 22 | if app { 23 | full_name.push_str(APP_PREFIX); 24 | full_name.push('-'); 25 | } 26 | full_name.push_str(file_name); 27 | let mut out = base.into(); 28 | out.push(full_name); 29 | out 30 | } 31 | 32 | pub(crate) async fn package_update( 33 | root: &Path, 34 | diff: &Diff, 35 | out: &Path, 36 | prefix: &str, 37 | ) -> std::io::Result<()> { 38 | let update_out = build_path(out, UPDATE_FILENAME, prefix, false); 39 | compress::zip_files(diff.update_iter(), root, &update_out, false).await?; 40 | zip_append_buf( 41 | &update_out, 42 | &[(REMOVED_FILENAME, diff.removed.join("\n").as_bytes())], 43 | )?; 44 | 45 | let update_app_out = build_path(out, UPDATE_FILENAME, prefix, true); 46 | compress::zip_files(diff.update_iter(), root, &update_app_out, true).await?; 47 | zip_append_buf( 48 | &update_app_out, 49 | &[(REMOVED_FILENAME, diff.removed.join("\n").as_bytes())], 50 | )?; 51 | 52 | let removed_out = build_path(out, REMOVED_FILENAME, prefix, false); 53 | write(removed_out, diff.removed.join("\n").as_bytes()).await?; 54 | 55 | let diff_list_out = build_path(out, DIFF_LIST_FILENAME, prefix, false); 56 | write(diff_list_out, serde_json::to_string(diff)?).await?; 57 | 58 | Ok(()) 59 | } 60 | 61 | pub(crate) async fn package_content>( 62 | root: &Path, 63 | out: &Path, 64 | prefix: &str, 65 | hashes: &[(T, T)], 66 | ) -> std::io::Result<()> { 67 | let content_out = build_path(out, CONTENT_FILENAME, prefix, false); 68 | compress::zip_dir(root, &content_out, false).await?; 69 | let content_app_out = build_path(out, CONTENT_FILENAME, prefix, true); 70 | compress::zip_dir(root, &content_app_out, true).await?; 71 | 72 | let content_list_out = build_path(out, CONTENT_LIST_FILENAME, prefix, false); 73 | let list = hashes 74 | .iter() 75 | .map(|(_, f)| f.as_ref()) 76 | .collect::>(); 77 | write(content_list_out, serde_json::to_string(&list)?).await?; 78 | Ok(()) 79 | } 80 | 81 | pub(crate) async fn package_hashes>( 82 | hashes: &[(T, T)], 83 | out: &Path, 84 | prefix: &str, 85 | ) -> std::io::Result<()> { 86 | let mut buf = vec![]; 87 | for (hash, file) in hashes { 88 | buf.extend(format!("{} {}\n", hash.as_ref(), file.as_ref()).as_bytes()) 89 | } 90 | let file_name = build_path("", "checksums", prefix, false); 91 | let mut out_file_name = out.to_path_buf(); 92 | out_file_name.push(&file_name); 93 | out_file_name.set_extension("zip"); 94 | compress::zip_content(file_name.to_str().unwrap(), &buf, &out_file_name)?; 95 | Ok(()) 96 | } 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Differy - MDN's partial update bundler 2 | 3 | Due to the nature of [yari] we don't know which articles changed since out last build. 4 | The kuma script in an unmodified file might render different that it did last time. 5 | At some point we should solve this in [yari], but now for now. 6 | 7 | ## Why 8 | 9 | We need content bundles for the offline capability of the MDN apps. Further, we need 10 | daily updates to deliver new content without downloading the huge content bundle. 11 | **Differy** creates these bundles and a GitHub action uploads them for distribution. 12 | 13 | ## Basic Usage and Example 14 | 15 | **Differy** generates updates based on a list of hashes of all files generated in previous 16 | builds. As input we additionally need a fresh build of [mdn/content] and 17 | [mdn/interactive-examples] and the short-rev of the [mdn/content] 18 | 19 | ```sh 20 | differy package $BUILD_OUT_ROOT --rev $(cd $CONTENT; git rev-parse --short HEAD) 21 | ``` 22 | 23 | On top of that we want a reference "update.json" and the checksum archives for 24 | all version we want to generate updates for. 25 | 26 | ### Output 27 | 28 | The `$BUILD_OUT_ROOT` directory should contain the artifacts of: 29 | 30 | - `yarn build:prepare && yarn build -n` from [mdn/content] 31 | - `yarn build` from [mdn/interactive-examples] 32 | 33 | **Differy** generates: 34 | 35 | - `xxxxxxxxx-content.zip`: All of `$BUILD_OUT_ROOT` 36 | - `xxxxxxxxx-app-content.zip`: All of `$BUILD_OUT_ROOT` 37 | - `xxxxxxxxx-content.json`: a json file containing the names of all 38 | content files. 39 | with modified links to _interactive-examples_ 40 | - `xxxxxxxxx-yyyyyyyyy-update.zip`: the changed files between `xxxxxxxxx` and 41 | `yyyyyyyyy` plus a file called `removed` containing list of all files that 42 | have been removed 43 | - `xxxxxxxxx-yyyyyyyyy-app-update.zip`: the same with modified links 44 | - `xxxxxxxxx-yyyyyyyyy-diff.json`: a json file containing the names of changed 45 | files between `xxxxxxxxx` and `yyyyyyyyy` 46 | have been removed 47 | - `update.json` the modified input file 48 | 49 | ### Example 50 | 51 | Assume we have the flowing scenario: 52 | 53 | ```sh 54 | > ls -1 $(pwd) 55 | 3bfe5e8ee-checksums.zip 56 | 723965504-checksums.zip 57 | update.json 58 | 59 | > cat update.json 60 | { 61 | "date": "2021-08-20T13:43:20.024561", 62 | "latest": "3bfe5e8ee", 63 | "updates": [ 64 | "723965504" 65 | ] 66 | } 67 | 68 | > echo $(cd $CONTENT; git rev-parse --short HEAD) 69 | c4123a3f1 70 | ``` 71 | 72 | We now run: 73 | 74 | ```sh 75 | > differy package $BUILD_OUT_ROOT --rev c4123a3f1 76 | packaging update c4123a3f1 → 3bfe5e8ee 77 | packaging update c4123a3f1 → 723965504 78 | building content for c4123a3f1 79 | ``` 80 | 81 | This will generate: 82 | 83 | ```sh 84 | > ls -1 {c4123a3f1*,update.json} 85 | 86 | c4123a3f1-3bfe5e8ee-app-update.zip 87 | c4123a3f1-3bfe5e8ee-diff.json 88 | c4123a3f1-3bfe5e8ee-update.zip 89 | c4123a3f1-723965504-app-update.zip 90 | c4123a3f1-723965504-diff.json 91 | c4123a3f1-723965504-update.zip 92 | c4123a3f1-app-content.zip 93 | c4123a3f1-checksums.zip 94 | c4123a3f1-content.zip 95 | c4123a3f1-content.json 96 | update.json 97 | ``` 98 | 99 | ## Automating and Uploading Artifacts 100 | 101 | We include a shell script that automates everything we need to generate 102 | the latest bundles. And then uploads it to GCS for distribution. 103 | 104 | Take a look at [scripts/run.sh](scripts/run.sh) 105 | 106 | [yari]: https://github.com/mdn/yari 107 | [mdn/content]: https://github.com/mdn/content 108 | [mdn/interactive-examples]: https://github.com/mdn/interactive-examples 109 | -------------------------------------------------------------------------------- /src/diff.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | 3 | use async_std::{ 4 | fs::{self, read_to_string, File}, 5 | io::prelude::WriteExt, 6 | path::Path, 7 | }; 8 | use serde_derive::Serialize; 9 | 10 | #[derive(Serialize)] 11 | pub(crate) struct Diff { 12 | pub added: Vec, 13 | pub removed: Vec, 14 | pub modified: Vec, 15 | } 16 | 17 | impl Diff { 18 | #[allow(dead_code)] 19 | pub async fn from_path(path: &Path) -> std::io::Result { 20 | let diff = read_to_string(path).await?; 21 | let mut added = vec![]; 22 | let mut removed = vec![]; 23 | let mut modified = vec![]; 24 | for line in diff.split('\n') { 25 | if let Some(file) = line.strip_prefix("+ ") { 26 | added.push(file.to_string()) 27 | } 28 | if let Some(file) = line.strip_prefix("~ ") { 29 | modified.push(file.to_string()) 30 | } 31 | if let Some(file) = line.strip_prefix("- ") { 32 | removed.push(file.to_string()) 33 | } 34 | } 35 | Ok(Self { 36 | added, 37 | removed, 38 | modified, 39 | }) 40 | } 41 | 42 | pub fn update_iter(&self) -> impl Iterator { 43 | self.added.iter().chain(self.modified.iter()) 44 | } 45 | 46 | pub async fn write(&self, out_file: &mut File) -> std::io::Result<()> { 47 | for filename in &self.removed { 48 | out_file 49 | .write_all(format!("- {filename}\n").as_bytes()) 50 | .await?; 51 | } 52 | for filename in &self.added { 53 | out_file 54 | .write_all(format!("+ {filename}\n").as_bytes()) 55 | .await?; 56 | } 57 | for filename in &self.modified { 58 | out_file 59 | .write_all(format!("~ {filename}\n").as_bytes()) 60 | .await?; 61 | } 62 | Ok(()) 63 | } 64 | } 65 | 66 | pub(crate) fn parse_hashes(hashes: &str) -> Vec<(&str, &str)> { 67 | let mut out = vec![]; 68 | for line in hashes.split('\n') { 69 | let mut split = line.split(' ').filter(|s| !s.is_empty()); 70 | if let (Some(hash), Some(file)) = (split.next(), split.next()) { 71 | out.push((hash, file)) 72 | } 73 | } 74 | out 75 | } 76 | 77 | pub(crate) fn diff, S: AsRef>( 78 | a: &[(T, T)], 79 | b: &[(S, S)], 80 | ) -> std::io::Result { 81 | let a_set: HashSet<&str> = a.iter().map(|(hash, _)| hash.as_ref()).collect(); 82 | let b_set: HashSet<&str> = b.iter().map(|(hash, _)| hash.as_ref()).collect(); 83 | 84 | let b_not_a: HashSet<_> = b_set.difference(&a_set).collect(); 85 | 86 | let a_file_set: HashSet<&str> = a.iter().map(|(_, file)| file.as_ref()).collect(); 87 | let b_file_set: HashSet<&str> = b.iter().map(|(_, file)| file.as_ref()).collect(); 88 | 89 | let a_not_b_file: HashSet<_> = a_file_set.difference(&b_file_set).collect(); 90 | let b_not_a_file: HashSet<_> = b_file_set.difference(&a_file_set).collect(); 91 | let a_and_b_file: HashSet<_> = a_file_set.intersection(&b_file_set).collect(); 92 | 93 | let removed: Vec = a 94 | .iter() 95 | .filter(|(_, file)| a_not_b_file.contains(&file.as_ref())) 96 | .map(|(_, file)| file.as_ref().to_string()) 97 | .collect(); 98 | let added: Vec = b 99 | .iter() 100 | .filter(|(_, file)| b_not_a_file.contains(&file.as_ref())) 101 | .map(|(_, file)| file.as_ref().to_string()) 102 | .collect(); 103 | 104 | let modified: Vec = b 105 | .iter() 106 | .filter(|(hash, file)| { 107 | b_not_a.contains(&hash.as_ref()) && a_and_b_file.contains(&file.as_ref()) 108 | }) 109 | .map(|(_, file)| file.as_ref().to_string()) 110 | .collect(); 111 | 112 | Ok(Diff { 113 | removed, 114 | added, 115 | modified, 116 | }) 117 | } 118 | pub(crate) async fn diff_hash_files(a: &Path, b: &Path) -> std::io::Result { 119 | let a = fs::read_to_string(a).await?; 120 | let b = fs::read_to_string(b).await?; 121 | 122 | let a = parse_hashes(&a); 123 | let b = parse_hashes(&b); 124 | 125 | diff(&a, &b) 126 | } 127 | -------------------------------------------------------------------------------- /src/compress.rs: -------------------------------------------------------------------------------- 1 | use async_std::fs::{read, read_to_string}; 2 | use async_std::path::Path; 3 | use std::io::{Read, Write}; 4 | use walkdir::WalkDir; 5 | use zip::result::ZipResult; 6 | use zip::write::SimpleFileOptions; 7 | use zip::{CompressionMethod, ZipArchive, ZipWriter}; 8 | 9 | const APP_REPLACEMENTS: &[(&str, &str)] = &[ 10 | ( 11 | "src=\\\"https://interactive-examples.mdn.mozilla.net", 12 | "src=\\\"mdn-app://examples/examples", 13 | ), 14 | ( 15 | "src=\\\"https://live-samples.mdn.mozilla.net", 16 | "src=\\\"mdn-app://live-samples", 17 | ), 18 | ]; 19 | 20 | const WEB_REPLACEMENTS: &[(&str, &str)] = &[ 21 | ( 22 | "src=\\\"https://interactive-examples.mdn.mozilla.net", 23 | "src=\\\"/examples", 24 | ), 25 | ("src=\\\"https://live-samples.mdn.mozilla.net", "src=\\\""), 26 | ]; 27 | 28 | pub fn replace(input: String, replace: &[(&str, &str)]) -> String { 29 | let mut result = String::new(); 30 | let mut last_end = 0; 31 | let mut matches = vec![]; 32 | for (from, _) in replace { 33 | matches.extend(input.match_indices(from)); 34 | } 35 | if matches.is_empty() { 36 | return input; 37 | } 38 | matches.sort_by(|(a, _), (b, _)| a.partial_cmp(b).unwrap()); 39 | for (start, part) in matches { 40 | result.push_str(unsafe { input.get_unchecked(last_end..start) }); 41 | let to = replace 42 | .iter() 43 | .find_map(|(from, to)| if *from == part { Some(to) } else { None }) 44 | .unwrap(); 45 | result.push_str(to); 46 | last_end = start + part.len(); 47 | } 48 | result.push_str(unsafe { input.get_unchecked(last_end..input.len()) }); 49 | result 50 | } 51 | 52 | pub(crate) fn zip_content(file_name: &str, content: &[u8], out_file: &Path) -> ZipResult<()> { 53 | let out_path = Path::new(out_file); 54 | let file = std::fs::File::create(out_path)?; 55 | 56 | let mut zip = ZipWriter::new(file); 57 | let options = SimpleFileOptions::default() 58 | .compression_method(CompressionMethod::DEFLATE) 59 | .unix_permissions(0o644); 60 | 61 | zip.start_file(file_name, options)?; 62 | zip.write_all(content)?; 63 | let mut w = zip.finish()?; 64 | w.flush()?; 65 | Ok(()) 66 | } 67 | 68 | pub(crate) fn unzip_content(zip_file: &Path, file_name: &str) -> ZipResult { 69 | let zipfile = std::fs::File::open(zip_file)?; 70 | let mut archive = ZipArchive::new(zipfile)?; 71 | let mut file = archive.by_name(file_name)?; 72 | 73 | let mut contents = String::new(); 74 | file.read_to_string(&mut contents)?; 75 | Ok(contents) 76 | } 77 | 78 | pub(crate) fn zip_append_buf, B: AsRef<[u8]>>( 79 | zip_file_path: &Path, 80 | files: &[(T, B)], 81 | ) -> ZipResult<()> { 82 | let file = std::fs::OpenOptions::new() 83 | .read(true) 84 | .write(true) 85 | .open(zip_file_path)?; 86 | 87 | let mut zip = ZipWriter::new_append(file)?; 88 | let options = SimpleFileOptions::default() 89 | .compression_method(CompressionMethod::DEFLATE) 90 | .unix_permissions(0o644); 91 | 92 | for (file_name, buf) in files { 93 | zip.start_file(file_name.as_ref(), options)?; 94 | zip.write_all(buf.as_ref())?; 95 | } 96 | let mut w = zip.finish()?; 97 | w.flush()?; 98 | Ok(()) 99 | } 100 | 101 | pub(crate) async fn zip_files>( 102 | files: impl Iterator, 103 | src_dir: &Path, 104 | out_file: &Path, 105 | app: bool, 106 | ) -> ZipResult<()> { 107 | let out_path = Path::new(out_file); 108 | let file = std::fs::File::create(out_path)?; 109 | 110 | let mut zip = ZipWriter::new(file); 111 | let options = SimpleFileOptions::default(); 112 | 113 | for path in files { 114 | let full_path = src_dir.join(path.as_ref()); 115 | 116 | if full_path.is_file().await { 117 | zip.start_file(path.as_ref(), options)?; 118 | 119 | if path.as_ref().ends_with("index.json") { 120 | let mut buf = read_to_string(full_path).await?; 121 | if app { 122 | buf = replace_all_app(buf); 123 | } else { 124 | buf = replace_all_web(buf); 125 | } 126 | zip.write_all(buf.as_bytes())?; 127 | } else { 128 | let buf = read(full_path).await?; 129 | zip.write_all(&buf)?; 130 | } 131 | } else { 132 | zip.add_directory(path.as_ref(), options)?; 133 | } 134 | } 135 | let mut w = zip.finish()?; 136 | w.flush()?; 137 | Ok(()) 138 | } 139 | 140 | fn replace_all_app(input: String) -> String { 141 | replace(input, APP_REPLACEMENTS) 142 | } 143 | 144 | fn replace_all_web(input: String) -> String { 145 | replace(input, WEB_REPLACEMENTS) 146 | } 147 | 148 | pub(crate) async fn zip_dir(src_dir: &Path, out_file: &Path, app: bool) -> ZipResult<()> { 149 | let path = Path::new(out_file); 150 | let file = std::fs::File::create(path)?; 151 | 152 | let mut zip = ZipWriter::new(file); 153 | let options = SimpleFileOptions::default(); 154 | 155 | for entry in WalkDir::new(src_dir).into_iter().filter_map(|e| e.ok()) { 156 | let path = entry.path(); 157 | let name = path.strip_prefix(src_dir).unwrap().to_str().unwrap(); 158 | 159 | if path.is_file() { 160 | zip.start_file(name, options)?; 161 | if name.ends_with("index.json") { 162 | let mut buf = read_to_string(path).await?; 163 | if app { 164 | buf = replace_all_app(buf); 165 | } else { 166 | buf = replace_all_web(buf); 167 | } 168 | zip.write_all(buf.as_bytes())?; 169 | } else { 170 | let buf = read(path).await?; 171 | zip.write_all(&buf)?; 172 | } 173 | } else if !name.is_empty() { 174 | zip.add_directory(name, options)?; 175 | } 176 | } 177 | let mut w = zip.finish()?; 178 | w.flush()?; 179 | Ok(()) 180 | } 181 | 182 | #[cfg(test)] 183 | mod test { 184 | use super::replace_all_web; 185 | 186 | #[test] 187 | fn test_replace_web() { 188 | let raw = r#"