├── .eslintrc.json ├── .github ├── ISSUE_TEMPLATE │ ├── general-feedback.yml │ └── topic-coverage.yml └── workflows │ └── prod.yaml ├── .gitignore ├── .htmllintrc ├── LICENSE ├── README.md ├── css ├── _footer.scss ├── ai.css └── style.scss ├── fonts ├── Cabin-VariableFont_wdth,wght.ttf ├── Inter-VariableFont_slnt,wght.ttf ├── Montserrat-VariableFont_wght.ttf └── SpaceGrotesk-VariableFont_wght.ttf ├── img ├── ai │ ├── ai-guide.jpg │ ├── ai-guide.svg │ ├── ai-header-bg.jpg │ ├── ai-header-nodes.png │ ├── carousels │ │ ├── how-llm-works │ │ │ ├── decoding.jpg │ │ │ ├── embedding.jpg │ │ │ ├── output.jpg │ │ │ ├── self-attention.jpg │ │ │ └── tokenization.jpg │ │ └── parameters │ │ │ ├── llm-01-model.jpg │ │ │ ├── llm-02-maxlength.jpg │ │ │ ├── llm-03-temp.jpg │ │ │ ├── llm-04-top-p.jpg │ │ │ └── llm-05-frequency.jpg │ └── ui │ │ ├── btn-section-lg.jpg │ │ └── colab.png ├── blog-header.png ├── colab.png ├── discord-mark-black.png ├── favicons │ ├── apple-touch-icon.png │ ├── favicon-196x196.png │ └── favicon.ico ├── image-models │ ├── autoencoder_diagram.png │ ├── clip_model_diagram.png │ ├── comfyui.png │ ├── control_net_conditioning_encoder_diagram.png │ ├── control_net_trainable_copy_diagram.png │ ├── diffusion.png │ ├── diffusion_model_diagram.png │ ├── dreambooth_diagram.png │ ├── latent_diffusion_model_diagram.png │ ├── low_rank_adaptation_diagram.png │ ├── pope_fake.jpg │ ├── purple_diffusion.png │ ├── screenshot_001.png │ ├── screenshot_002.png │ ├── screenshot_003.png │ ├── screenshot_004.png │ ├── screenshot_005.png │ ├── screenshot_006.png │ ├── screenshot_007.png │ ├── screenshot_008.png │ ├── screenshot_009.png │ ├── screenshot_010.png │ ├── screenshot_011.png │ ├── screenshot_012.png │ ├── screenshot_013.png │ ├── screenshot_014.png │ ├── screenshot_015.png │ ├── screenshot_016.png │ ├── screenshot_017.png │ ├── screenshot_018.png │ ├── screenshot_019.png │ ├── screenshot_020.png │ ├── sd_cn_inference.png │ ├── stable-video-diffusion.gif │ ├── stable_diffusion_inference_diagram.png │ ├── text_encoder_diagram.png │ ├── textual_inversion_diagram.png │ ├── unet_model_diagram.png │ ├── variational_autoencoder_diagram.png │ ├── variational_autoencoder_diagram2.png │ ├── variational_autoencoder_diagram3.png │ ├── variational_autoencoder_diagram4.png │ └── variational_autoencoder_diagram5.png ├── logos │ └── social │ │ ├── linkedin-white.svg │ │ ├── mastodon-white.svg │ │ ├── spotify-white.svg │ │ └── tiktok-white.svg ├── mozilla-256.jpg ├── mozilla-only.png ├── newsletter-image.png ├── placeholder.png ├── report.svg ├── server.svg ├── sign-up-high-res.png ├── target.svg ├── theses.svg └── world.svg ├── netlify.toml ├── package-lock.json ├── package.json ├── pages ├── README.md ├── content │ ├── ai-basics │ │ └── index.html │ ├── audio-video-models │ │ └── index.html │ ├── choosing-ml-models │ │ └── index.html │ ├── comparing-open-llms │ │ └── index.html │ ├── contributions │ │ └── index.html │ ├── embeddings-and-rag │ │ └── index.html │ ├── fine-tuning-llms │ │ └── index.html │ ├── image-models │ │ └── index.html │ ├── introduction │ │ └── index.html │ ├── llms-101 │ │ └── index.html │ ├── models-from-scratch │ │ └── index.html │ ├── notable-projects │ │ └── index.html │ └── running-llms-locally │ │ └── index.html └── index.html ├── postcss.config.js ├── scripts ├── ai.js ├── analytics.js ├── dark_mode.js ├── email.js └── form-utils.js ├── tailwind.config.js ├── templates ├── ai │ ├── _email_form.html │ ├── _footer.html │ └── _sidebar.html ├── base-ai.html ├── content │ ├── _upcoming.html │ ├── ai-basics │ │ ├── _sidebar.html │ │ ├── index-content.html │ │ └── index.md │ ├── audio-video-models │ │ ├── _sidebar.html │ │ ├── index-content.html │ │ └── index.md │ ├── build.sh │ ├── choosing-ml-models │ │ ├── _sidebar.html │ │ ├── ai-guide-evaluate-ml-results.html │ │ ├── ai-guide-evaluate-ml-results.ipynb │ │ ├── ai-guide-pick-a-model-test-a-model.html │ │ ├── ai-guide-pick-a-model-test-a-model.ipynb │ │ ├── index-content.html │ │ └── index.md │ ├── comparing-open-llms │ │ ├── _sidebar.html │ │ ├── comparing-open-llms.html │ │ ├── comparing-open-llms.ipynb │ │ ├── index-content.html │ │ └── index.md │ ├── contributions │ │ ├── _sidebar.html │ │ ├── index-content.html │ │ └── index.md │ ├── embeddings-and-rag │ │ ├── _sidebar.html │ │ ├── index-content.html │ │ └── index.md │ ├── fine-tuning-llms │ │ ├── _sidebar.html │ │ ├── index-content.html │ │ └── index.md │ ├── image-models │ │ ├── _sidebar.html │ │ ├── index-content.html │ │ └── index.md │ ├── introduction │ │ ├── _sidebar.html │ │ ├── index-content.html │ │ └── index.md │ ├── llms-101 │ │ ├── _params-carousel.html │ │ ├── _sidebar.html │ │ ├── index-content.html │ │ └── index.md │ ├── models-from-scratch │ │ ├── _sidebar.html │ │ ├── index-content.html │ │ └── index.md │ ├── notable-projects │ │ ├── _sidebar.html │ │ ├── index-content.html │ │ └── index.md │ └── running-llms-locally │ │ ├── _sidebar.html │ │ ├── index-content.html │ │ └── index.md └── partials │ ├── footer.html │ ├── image-cards │ ├── image-card.html │ └── image-cards.html │ ├── teams.html │ └── ticker.html ├── tools ├── build_ai_guide.sh └── build_ai_guide_content_pages.js └── webpack.config.js /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["eslint:recommended"], 3 | "env": { 4 | "es6": true, 5 | "node": true 6 | }, 7 | "overrides": [ 8 | { 9 | "files": "scripts/**/*.js", 10 | "env": { 11 | "browser": true 12 | } 13 | } 14 | ], 15 | "parserOptions": { 16 | "ecmaVersion": 2023, 17 | "impliedStrict": false, 18 | "sourceType": "module" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general-feedback.yml: -------------------------------------------------------------------------------- 1 | name: "General feedback" 2 | description: Provide feedback on the target audience, general approach, high-level structure, usefulness, or anything else. 3 | labels: ["general"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Please fill in a description of your feedback in the title field above 9 | --- 10 | - type: textarea 11 | id: problem 12 | attributes: 13 | label: Please describe your problem or observation 14 | description: This section should include a clear and concise description of what your problem or observation is. 15 | placeholder: e.g. I think the AI Guide should include... 16 | validations: 17 | required: true 18 | - type: textarea 19 | id: solution 20 | attributes: 21 | label: Describe the solution you'd like to see 22 | description: This section should include a description of the changes you'll like to see made to the AI Guide to fix the issue as you see it. 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/topic-coverage.yml: -------------------------------------------------------------------------------- 1 | name: "Topic coverage" 2 | description: Provide feedback on specific topic coverage (e.g. AI Basics, Language Models 101 etc), including missing topics and requests for changes to existing coverage. 3 | labels: ["topics"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Please fill in a description of your topic feedback in the title field above 9 | --- 10 | - type: input 11 | id: topic 12 | attributes: 13 | label: Sub-section title 14 | description: Fill in the specific title of the sub-section 15 | placeholder: e.g. What exactly is an LLM? 16 | - type: textarea 17 | id: problem 18 | attributes: 19 | label: Please describe your issue 20 | description: This section should include a clear and concise description of what you feel is missing or wrong. 21 | placeholder: e.g. I think topic x needs more coverage 22 | validations: 23 | required: true 24 | - type: textarea 25 | id: solution 26 | attributes: 27 | label: Describe the solution you'd like to see 28 | description: This section should include a description of the changes you'll like to see made to the curriculum to fix the issue as you see it. 29 | -------------------------------------------------------------------------------- /.github/workflows/prod.yaml: -------------------------------------------------------------------------------- 1 | name: Push main to prod 2 | on: workflow_dispatch 3 | permissions: 4 | contents: write 5 | jobs: 6 | push-prod: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v3 10 | with: 11 | # this means that it is a full history clone, not a shallow one 12 | # heroku push fails with a shallow clone 13 | fetch-depth: 0 14 | - name: git push 15 | run: | 16 | git push -f origin main:prod 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Logs 3 | logs 4 | *.log 5 | npm-debug.log* 6 | yarn-debug.log* 7 | yarn-error.log* 8 | lerna-debug.log* 9 | .pnpm-debug.log* 10 | 11 | # Diagnostic reports (https://nodejs.org/api/report.html) 12 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 13 | 14 | # Runtime data 15 | pids 16 | *.pid 17 | *.seed 18 | *.pid.lock 19 | 20 | # Directory for instrumented libs generated by jscoverage/JSCover 21 | lib-cov 22 | 23 | # Coverage directory used by tools like istanbul 24 | coverage 25 | *.lcov 26 | 27 | # nyc test coverage 28 | .nyc_output 29 | 30 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 31 | .grunt 32 | 33 | # Bower dependency directory (https://bower.io/) 34 | bower_components 35 | 36 | # node-waf configuration 37 | .lock-wscript 38 | 39 | # Compiled binary addons (https://nodejs.org/api/addons.html) 40 | build/Release 41 | 42 | # Dependency directories 43 | node_modules/ 44 | jspm_packages/ 45 | 46 | # Snowpack dependency directory (https://snowpack.dev/) 47 | web_modules/ 48 | 49 | # TypeScript cache 50 | *.tsbuildinfo 51 | 52 | # Optional npm cache directory 53 | .npm 54 | 55 | # Optional eslint cache 56 | .eslintcache 57 | 58 | # Optional stylelint cache 59 | .stylelintcache 60 | 61 | # Microbundle cache 62 | .rpt2_cache/ 63 | .rts2_cache_cjs/ 64 | .rts2_cache_es/ 65 | .rts2_cache_umd/ 66 | 67 | # Optional REPL history 68 | .node_repl_history 69 | 70 | # Output of 'npm pack' 71 | *.tgz 72 | 73 | # Yarn Integrity file 74 | .yarn-integrity 75 | 76 | # dotenv environment variable files 77 | .env 78 | .env.development.local 79 | .env.test.local 80 | .env.production.local 81 | .env.local 82 | 83 | # parcel-bundler cache (https://parceljs.org/) 84 | .cache 85 | .parcel-cache 86 | 87 | # Next.js build output 88 | .next 89 | out 90 | 91 | # Nuxt.js build / generate output 92 | .nuxt 93 | dist 94 | 95 | # Gatsby files 96 | .cache/ 97 | # Comment in the public line in if your project uses Gatsby and not Next.js 98 | # https://nextjs.org/blog/next-9-1#public-directory-support 99 | # public 100 | 101 | # vuepress build output 102 | .vuepress/dist 103 | 104 | # vuepress v2.x temp and cache directory 105 | .temp 106 | .cache 107 | 108 | # Docusaurus cache and generated files 109 | .docusaurus 110 | 111 | # Serverless directories 112 | .serverless/ 113 | 114 | # FuseBox cache 115 | .fusebox/ 116 | 117 | # DynamoDB Local files 118 | .dynamodb/ 119 | 120 | # TernJS port file 121 | .tern-port 122 | 123 | # Stores VSCode versions used for testing VSCode extensions 124 | .vscode-test 125 | 126 | # yarn v2 127 | .yarn/cache 128 | .yarn/unplugged 129 | .yarn/build-state.yml 130 | .yarn/install-state.gz 131 | .pnp.* 132 | node_modules 133 | -------------------------------------------------------------------------------- /.htmllintrc: -------------------------------------------------------------------------------- 1 | { 2 | "plugins": [], // npm modules to load 3 | "maxerr": false, 4 | 5 | "attr-bans": ["align", "background", "bgcolor", "border", "frameborder", "longdesc", "marginwidth", "marginheight", "scrolling", "style"], 6 | "attr-name-ignore-regex": false, 7 | "attr-name-style": false, 8 | "attr-new-line": false, 9 | "attr-no-dup": true, 10 | "attr-no-unsafe-char": true, 11 | "attr-order": false, 12 | "attr-quote-style": false, 13 | "attr-req-value": false, 14 | "attr-validate": true, 15 | "class-no-dup": true, 16 | "class-style": false, 17 | "doctype-first": false, 18 | "doctype-html5": false, 19 | "fig-req-figcaption": false, 20 | "focusable-tabindex-style": false, 21 | "head-req-title": true, 22 | "href-style": false, 23 | "html-req-lang": false, 24 | "html-valid-content-model": true, 25 | "id-class-ignore-regex": false, 26 | "id-class-no-ad": false, 27 | "id-class-style": false, 28 | "id-no-dup": true, 29 | "img-req-alt": true, 30 | "img-req-src": true, 31 | "indent-delta": false, 32 | "indent-style": "nonmixed", 33 | "indent-width-cont": false, 34 | "indent-width": false, 35 | "input-radio-req-name": true, 36 | "input-req-label": false, 37 | "label-req-for": true, 38 | "lang-style": "case", 39 | "line-end-style": "lf", 40 | "line-max-len-ignore-regex": false, 41 | "line-max-len": false, 42 | "line-no-trailing-whitespace": false, 43 | "link-req-noopener": true, 44 | "raw-ignore-regex": false, 45 | "spec-char-escape": false, 46 | "table-req-caption": false, 47 | "table-req-header": false, 48 | "tag-bans": ["style", "b", "i"], 49 | "tag-close": true, 50 | "tag-name-lowercase": true, 51 | "tag-name-match": true, 52 | "tag-req-attr": false, 53 | "tag-self-close": false, 54 | "text-ignore-regex": false, 55 | "title-max-len": 60, 56 | "title-no-dup": true 57 | } 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Mozilla AI Guide](/img/blog-header.png) 2 | 3 | Mozilla champions an open, accessible Internet where people have the reins. We welcome these recent amazing new AI breakthroughs. 4 | 5 | However, with substantial corporate dollars being invested into AI R&D, it's unclear for both junior and senior engineers new to the scene to identify which paths are the ones that lead to sustainable open software. We've seen this story before. 6 | 7 | Mozilla's efforts in AI are more than just technical - they're a call to action and unity across the currently fragmented open source AI Community. 8 | 9 | Our AI Guide is a living, breathing resource, rooted in collaboration and community input for experts and newcomers alike, and we invite you to build alongside us. 10 | 11 | We are currently welcoming contributions for: 12 | - Fine-tuning LLMs 13 | - Building LLMs from scratch 14 | - Multi-modal LLMs 15 | - Audio & Video models 16 | - Image models 17 | 18 | [Contribution Guide](https://ai-guide.future.mozilla.org/content/contributions/index.html) 19 | 20 | [Join our Discord →](https://discord.gg/3egbzTKhdk) 21 | 22 | PRs are open. Email ai-guide@mozilla.com if you need to reach us. 23 | 24 | # Installation 25 | 26 | ## Getting Started 27 | 28 | This project is a Static webpage with HTML, CSS and JS. We use [Webpack](https://webpack.js.org/) to bundle everything to the `dist/` folder. This project also uses [Nunjucks Templates](https://mozilla.github.io/nunjucks/) to enable the use of templating in this project. To convert Nunjucks templates to HTML, we use the [html-bundler-webpack-plugin](https://github.com/webdiscus/html-bundler-webpack-plugin). 29 | 30 | ## Installation 31 | These instruction assume you have NodeJS installed. 32 | 33 | To build AI Guide from source and run the site locally, you can 34 | clone the repo from GitHub: 35 | 36 | ``` 37 | npm install 38 | ``` 39 | 40 | Running `npm install` will install the NPM dependencies. 41 | 42 | ## Make it run 43 | 44 | Build the site and start the web server with: 45 | 46 | ``` 47 | npm start 48 | ``` 49 | 50 | That will run the webpack dev server. 51 | 52 | View the site at http://localhost:8000/ 53 | 54 | ## Build static HTML files 55 | 56 | ``` 57 | npm run build 58 | ``` 59 | 60 | The Webpack will output all HTML files to a folder called `dist`. 61 | 62 | ## AI Guide-specific instructions 63 | The AI Guide is hosted in this repo, and uses a slightly different Markdown-flavored templating system, but the same `npm` steps above. It also uses Tailwind for CSS and doesn't use Protocol. 64 | 65 | Content for the guide is generated from Markdown files in `templates/ai/content` using scripts in `tools/`. 66 | To generate fresh content: 67 | `tools/build_ai_guide.sh` 68 | 69 | Note that pages in `/pages/ai/content` should always be generated using the script above. 70 | 71 | To run the server: 72 | `tools/build_ai_guide.sh` 73 | `npx tailwindcss -w` 74 | `npm run start` 75 | 76 | Go to `/` 77 | 78 | ## Folder Hierarchy 79 | 80 | All Nunjucks files are either located in the `templates/` folder or the `pages/` folder. 81 | The `templates/` folder contains base templates that can be `extended`, or partials, which can be `included` in the files in the `pages/` folder. 82 | 83 | The `pages/` folder contains the Nunjucks files which will be compiled to HTML and used on the MIECO site. 84 | 85 | 86 | ## Deploy 87 | 88 | Branches in the pull request queue will be given a demo server by Netlify. The bot will comment on the PR with the link. 89 | 90 | The `main` branch is automatically deployed to the staging server https://mozilla-ai-guide.netlify.app/ 91 | 92 | To deploy to production push the main branch to the production branch. 93 | 94 | ``` 95 | git push origin main:prod 96 | ``` 97 | -------------------------------------------------------------------------------- /css/_footer.scss: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | $image-path: '/media/protocol/img'; 6 | 7 | @import '~@mozilla-protocol/core/protocol/css/includes/lib'; 8 | 9 | /* Protocol overrides */ 10 | 11 | // * -------------------------------------------------------------------------- */ 12 | // Footer social icon links 13 | // bedrock override until icons are added to protocol-assets 14 | // https://github.com/mozilla/protocol-assets/issues/84 15 | 16 | .mzp-c-footer-links-social { 17 | li { 18 | a { 19 | &.linkedin { 20 | background-image: url('/img/logos/social/linkedin-white.svg'); 21 | } 22 | 23 | &.tiktok { 24 | background-image: url('/img/logos/social/tiktok-white.svg'); 25 | } 26 | 27 | &.spotify { 28 | background-image: url('/img/logos/social/spotify-white.svg'); 29 | } 30 | 31 | &.mastodon { 32 | background-image: url('/img/logos/social/mastodon-white.svg'); 33 | } 34 | } 35 | } 36 | } 37 | 38 | // * -------------------------------------------------------------------------- */ 39 | // Reset social icon styles for primary footer navigation on larger screens 40 | 41 | .mzp-c-footer .mzp-c-footer-links-social { 42 | margin-bottom: $spacing-lg; // reduce spacing on mobile 43 | @media #{$mq-md} { 44 | margin-bottom: $spacing-2xl; 45 | max-width: unset; 46 | position: static; 47 | text-align: unset; 48 | 49 | li { 50 | @include bidi(((margin, 0 $spacing-md 0 0, 0 0 0 $spacing-md),)); 51 | padding: unset; 52 | } 53 | } 54 | } 55 | 56 | // * -------------------------------------------------------------------------- */ 57 | // Adjust hover/focus styles on social icons 58 | 59 | .mzp-c-footer { 60 | .mzp-c-footer-links-social li a { 61 | &:hover, 62 | &:focus, 63 | &:active { 64 | outline: 1px dotted $color-white; 65 | outline-offset: $spacing-xs; 66 | border-bottom-color: transparent; 67 | } 68 | } 69 | } 70 | 71 | // * -------------------------------------------------------------------------- */ 72 | // Keep .mzp-c-footer-heading styles on social headings 73 | // (social heading class altered to prevent Mzp Details styling/functionality) 74 | 75 | .mzp-c-footer-heading-social { 76 | @include text-body-sm; 77 | color: inherit; 78 | font-family: inherit; 79 | font-weight: 400; 80 | margin-bottom: 0; 81 | padding-bottom: $spacing-md; 82 | padding-top: $spacing-md; 83 | } 84 | -------------------------------------------------------------------------------- /css/style.scss: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | 5 | $image-path: '../../img'; 6 | $font-path: '../../fonts'; 7 | 8 | @import '@mozilla-protocol/core/protocol/css/protocol'; 9 | @import '@mozilla-protocol/core/protocol/css/includes/lib'; 10 | @import '@mozilla-protocol/core/protocol/css/components/button'; 11 | @import '@mozilla-protocol/core/protocol/css/components/breadcrumb'; 12 | @import '@mozilla-protocol/core/protocol/css/components/picto'; 13 | @import '@mozilla-protocol/core/protocol/css/components/newsletter-form'; 14 | @import '@mozilla-protocol/core/protocol/css/templates/multi-column'; 15 | @import '@mozilla-protocol/core/protocol/css/components/footer'; 16 | 17 | @import 'cohort'; 18 | @import "innovation-landing"; 19 | @import "builders-challenge"; 20 | @import "footer"; 21 | 22 | // Helper classes 23 | .gradient-bg { 24 | background: linear-gradient(rgba(0, 0, 0, 0.8),rgba(0, 0, 0, 0.8)), radial-gradient(#ffa436 0%, #FF298A 50%, #7542e4 100%) right center no-repeat; 25 | } 26 | 27 | .dark-highlight { 28 | background-color: $color-black; 29 | color: $color-white; 30 | padding: 0 $spacing-sm; 31 | box-decoration-break: clone; 32 | line-height: 1.3; 33 | } 34 | 35 | .mzp-c-navigation { 36 | background-color: $color-black; 37 | box-shadow: 0px 4px 4px rgba(0, 0, 0, 0.25); 38 | 39 | .mzp-c-navigation-container { 40 | display: flex; 41 | justify-content: space-between; 42 | align-items: center; 43 | flex-direction: column; 44 | 45 | &:after { 46 | content: none; 47 | } 48 | 49 | .mzp-c-button { 50 | display: none; 51 | } 52 | 53 | @media #{$mq-xs} { 54 | flex-direction: row; 55 | } 56 | 57 | @media #{$mq-md} { 58 | .mzp-c-button { 59 | display: block; 60 | } 61 | } 62 | } 63 | } 64 | 65 | 66 | .hero-section { 67 | background-image: url("/img/hero-bg.svg"), linear-gradient(90.29deg, rgba(0, 0, 0, 0.5) 72.2%, rgba(0, 0, 0, 0) 99.74%); 68 | background-repeat: no-repeat; 69 | background-position: right; 70 | background-color: $color-black; 71 | 72 | .hero-text-wrapper { 73 | max-width: 100%; 74 | background: linear-gradient(90deg, rgba(0,0,0,0.5014174117370784) 0%, rgba(0,0,0,0.5014174117370784) 80%, rgba(0,0,0,0) 100%); 75 | 76 | h1 { 77 | @include font-size(42px); 78 | font-weight: 500; 79 | } 80 | 81 | h2 { 82 | @include font-size(64px); 83 | } 84 | } 85 | 86 | @media #{$mq-md} { 87 | .hero-text-wrapper { 88 | max-width: 55%; 89 | } 90 | } 91 | } 92 | 93 | .benefits-section { 94 | display: grid; 95 | grid-template-columns: [start] 1fr 1fr 1fr [end]; 96 | column-gap: 24px; 97 | row-gap: 32px; 98 | 99 | .benefits-text-wrapper { 100 | grid-column: start / end; 101 | max-width: $content-md; 102 | margin: 0 auto; 103 | 104 | 105 | h2 { 106 | text-align: center; 107 | } 108 | } 109 | 110 | .why-join-container { 111 | grid-column: span 3; 112 | border: 1px solid #CCCCCC; 113 | 114 | .why-join-subheader { 115 | @include font-mozilla; 116 | @include font-size(32px); 117 | padding: $spacing-md 0; 118 | text-align: center; 119 | border-bottom: 1px solid #ccc; 120 | } 121 | 122 | @media #{$mq-md} { 123 | grid-column: span 1; 124 | } 125 | } 126 | 127 | 128 | .mzp-c-picto { 129 | padding: 0 15px; 130 | 131 | .mzp-c-picto-image { 132 | margin-bottom: 0; 133 | 134 | img { 135 | height: 75px; 136 | } 137 | } 138 | 139 | .mzp-c-picto-heading { 140 | @include font-base; 141 | @include font-size(18px); 142 | } 143 | } 144 | } 145 | 146 | .question-section .mzp-l-content { 147 | background: none; 148 | 149 | small { 150 | display: block; 151 | margin-bottom: $spacing-md; 152 | } 153 | 154 | h2 { 155 | margin-bottom: $spacing-lg; 156 | } 157 | 158 | small + h2, p + h2 { 159 | margin-top: $spacing-2xl; 160 | } 161 | } 162 | 163 | .teams-text-wrapper { 164 | max-width: $content-md; 165 | margin: 0 auto $layout-lg; 166 | 167 | 168 | h2 { 169 | text-align: center; 170 | } 171 | } 172 | 173 | .teams-grid { 174 | display: grid; 175 | grid-template-columns: 1fr; 176 | column-gap: $spacing-2xl; 177 | row-gap: $spacing-lg; 178 | 179 | .c-card { 180 | grid-column: span 1; 181 | justify-self: center; 182 | display: flex; 183 | flex-direction: column; 184 | justify-content: space-between; 185 | 186 | .c-card-company { 187 | @include font-size(14px); 188 | margin-bottom: 0; 189 | } 190 | 191 | .c-card-title { 192 | @include font-mozilla; 193 | @include font-size(32px); 194 | display: block; 195 | } 196 | 197 | .c-card-cta, .c-card-title { 198 | color: $color-white; 199 | 200 | &:hover { 201 | color: $color-white; 202 | } 203 | } 204 | 205 | .c-card-desc, .c-card-mission, .c-card-cta { 206 | @include font-size(14px); 207 | } 208 | 209 | .c-card-cta { 210 | position: relative; 211 | 212 | &:before { 213 | content: "\002b "; 214 | } 215 | } 216 | 217 | } 218 | 219 | @media #{$mq-md} { 220 | grid-template-columns: 1fr 1fr; 221 | } 222 | 223 | @media #{$mq-lg} { 224 | grid-template-columns: 1fr 1fr 1fr; 225 | } 226 | } 227 | 228 | .collaborate-title { 229 | align-self: center; 230 | } 231 | 232 | .faq-section { 233 | h2 { 234 | @include font-size(40px); 235 | text-align: center; 236 | margin-top: $spacing-xl; 237 | } 238 | 239 | h3 { 240 | @include font-size(24px); 241 | margin: $spacing-xl 0 $spacing-lg; 242 | } 243 | } 244 | 245 | .newsletter-title { 246 | text-align: center; 247 | } 248 | 249 | .mzp-c-newsletter { 250 | column-gap: $spacing-2xl; 251 | 252 | .mzp-c-newsletter-form { 253 | padding-top: 0; 254 | 255 | // initial state for the form details to be hidden 256 | .mzp-c-newsletter-details { 257 | display: none; 258 | 259 | label.mzp-u-inline { 260 | display: inline-block; 261 | } 262 | 263 | input[type="text"], textarea { 264 | width: 100%; 265 | } 266 | 267 | .interests { 268 | margin-bottom: $spacing-md; 269 | } 270 | } 271 | 272 | .mzp-c-form-submit button[type="submit"] { 273 | margin-bottom: $spacing-lg; 274 | } 275 | } 276 | } 277 | -------------------------------------------------------------------------------- /fonts/Cabin-VariableFont_wdth,wght.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/fonts/Cabin-VariableFont_wdth,wght.ttf -------------------------------------------------------------------------------- /fonts/Inter-VariableFont_slnt,wght.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/fonts/Inter-VariableFont_slnt,wght.ttf -------------------------------------------------------------------------------- /fonts/Montserrat-VariableFont_wght.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/fonts/Montserrat-VariableFont_wght.ttf -------------------------------------------------------------------------------- /fonts/SpaceGrotesk-VariableFont_wght.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/fonts/SpaceGrotesk-VariableFont_wght.ttf -------------------------------------------------------------------------------- /img/ai/ai-guide.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/ai-guide.jpg -------------------------------------------------------------------------------- /img/ai/ai-header-bg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/ai-header-bg.jpg -------------------------------------------------------------------------------- /img/ai/ai-header-nodes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/ai-header-nodes.png -------------------------------------------------------------------------------- /img/ai/carousels/how-llm-works/decoding.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/carousels/how-llm-works/decoding.jpg -------------------------------------------------------------------------------- /img/ai/carousels/how-llm-works/embedding.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/carousels/how-llm-works/embedding.jpg -------------------------------------------------------------------------------- /img/ai/carousels/how-llm-works/output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/carousels/how-llm-works/output.jpg -------------------------------------------------------------------------------- /img/ai/carousels/how-llm-works/self-attention.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/carousels/how-llm-works/self-attention.jpg -------------------------------------------------------------------------------- /img/ai/carousels/how-llm-works/tokenization.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/carousels/how-llm-works/tokenization.jpg -------------------------------------------------------------------------------- /img/ai/carousels/parameters/llm-01-model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/carousels/parameters/llm-01-model.jpg -------------------------------------------------------------------------------- /img/ai/carousels/parameters/llm-02-maxlength.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/carousels/parameters/llm-02-maxlength.jpg -------------------------------------------------------------------------------- /img/ai/carousels/parameters/llm-03-temp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/carousels/parameters/llm-03-temp.jpg -------------------------------------------------------------------------------- /img/ai/carousels/parameters/llm-04-top-p.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/carousels/parameters/llm-04-top-p.jpg -------------------------------------------------------------------------------- /img/ai/carousels/parameters/llm-05-frequency.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/carousels/parameters/llm-05-frequency.jpg -------------------------------------------------------------------------------- /img/ai/ui/btn-section-lg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/ui/btn-section-lg.jpg -------------------------------------------------------------------------------- /img/ai/ui/colab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/ai/ui/colab.png -------------------------------------------------------------------------------- /img/blog-header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/blog-header.png -------------------------------------------------------------------------------- /img/colab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/colab.png -------------------------------------------------------------------------------- /img/discord-mark-black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/discord-mark-black.png -------------------------------------------------------------------------------- /img/favicons/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/favicons/apple-touch-icon.png -------------------------------------------------------------------------------- /img/favicons/favicon-196x196.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/favicons/favicon-196x196.png -------------------------------------------------------------------------------- /img/favicons/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/favicons/favicon.ico -------------------------------------------------------------------------------- /img/image-models/autoencoder_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/autoencoder_diagram.png -------------------------------------------------------------------------------- /img/image-models/clip_model_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/clip_model_diagram.png -------------------------------------------------------------------------------- /img/image-models/comfyui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/comfyui.png -------------------------------------------------------------------------------- /img/image-models/control_net_conditioning_encoder_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/control_net_conditioning_encoder_diagram.png -------------------------------------------------------------------------------- /img/image-models/control_net_trainable_copy_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/control_net_trainable_copy_diagram.png -------------------------------------------------------------------------------- /img/image-models/diffusion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/diffusion.png -------------------------------------------------------------------------------- /img/image-models/diffusion_model_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/diffusion_model_diagram.png -------------------------------------------------------------------------------- /img/image-models/dreambooth_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/dreambooth_diagram.png -------------------------------------------------------------------------------- /img/image-models/latent_diffusion_model_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/latent_diffusion_model_diagram.png -------------------------------------------------------------------------------- /img/image-models/low_rank_adaptation_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/low_rank_adaptation_diagram.png -------------------------------------------------------------------------------- /img/image-models/pope_fake.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/pope_fake.jpg -------------------------------------------------------------------------------- /img/image-models/purple_diffusion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/purple_diffusion.png -------------------------------------------------------------------------------- /img/image-models/screenshot_001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_001.png -------------------------------------------------------------------------------- /img/image-models/screenshot_002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_002.png -------------------------------------------------------------------------------- /img/image-models/screenshot_003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_003.png -------------------------------------------------------------------------------- /img/image-models/screenshot_004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_004.png -------------------------------------------------------------------------------- /img/image-models/screenshot_005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_005.png -------------------------------------------------------------------------------- /img/image-models/screenshot_006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_006.png -------------------------------------------------------------------------------- /img/image-models/screenshot_007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_007.png -------------------------------------------------------------------------------- /img/image-models/screenshot_008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_008.png -------------------------------------------------------------------------------- /img/image-models/screenshot_009.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_009.png -------------------------------------------------------------------------------- /img/image-models/screenshot_010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_010.png -------------------------------------------------------------------------------- /img/image-models/screenshot_011.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_011.png -------------------------------------------------------------------------------- /img/image-models/screenshot_012.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_012.png -------------------------------------------------------------------------------- /img/image-models/screenshot_013.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_013.png -------------------------------------------------------------------------------- /img/image-models/screenshot_014.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_014.png -------------------------------------------------------------------------------- /img/image-models/screenshot_015.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_015.png -------------------------------------------------------------------------------- /img/image-models/screenshot_016.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_016.png -------------------------------------------------------------------------------- /img/image-models/screenshot_017.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_017.png -------------------------------------------------------------------------------- /img/image-models/screenshot_018.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_018.png -------------------------------------------------------------------------------- /img/image-models/screenshot_019.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_019.png -------------------------------------------------------------------------------- /img/image-models/screenshot_020.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/screenshot_020.png -------------------------------------------------------------------------------- /img/image-models/sd_cn_inference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/sd_cn_inference.png -------------------------------------------------------------------------------- /img/image-models/stable-video-diffusion.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/stable-video-diffusion.gif -------------------------------------------------------------------------------- /img/image-models/stable_diffusion_inference_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/stable_diffusion_inference_diagram.png -------------------------------------------------------------------------------- /img/image-models/text_encoder_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/text_encoder_diagram.png -------------------------------------------------------------------------------- /img/image-models/textual_inversion_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/textual_inversion_diagram.png -------------------------------------------------------------------------------- /img/image-models/unet_model_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/unet_model_diagram.png -------------------------------------------------------------------------------- /img/image-models/variational_autoencoder_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/variational_autoencoder_diagram.png -------------------------------------------------------------------------------- /img/image-models/variational_autoencoder_diagram2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/variational_autoencoder_diagram2.png -------------------------------------------------------------------------------- /img/image-models/variational_autoencoder_diagram3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/variational_autoencoder_diagram3.png -------------------------------------------------------------------------------- /img/image-models/variational_autoencoder_diagram4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/variational_autoencoder_diagram4.png -------------------------------------------------------------------------------- /img/image-models/variational_autoencoder_diagram5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/image-models/variational_autoencoder_diagram5.png -------------------------------------------------------------------------------- /img/logos/social/linkedin-white.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /img/logos/social/mastodon-white.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /img/logos/social/spotify-white.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /img/logos/social/tiktok-white.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /img/mozilla-256.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/mozilla-256.jpg -------------------------------------------------------------------------------- /img/mozilla-only.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/mozilla-only.png -------------------------------------------------------------------------------- /img/newsletter-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/newsletter-image.png -------------------------------------------------------------------------------- /img/placeholder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/placeholder.png -------------------------------------------------------------------------------- /img/report.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /img/server.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /img/sign-up-high-res.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/ai-guide/0a160264235123690de59d1d3eb137854ae00e37/img/sign-up-high-res.png -------------------------------------------------------------------------------- /img/target.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /img/theses.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /img/world.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /netlify.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | publish = "dist" 3 | command = "npm run build" 4 | 5 | [[headers]] 6 | for = "/*" 7 | [headers.values] 8 | X-Content-Type-Options = "nosniff" 9 | X-Frame-Options = "DENY" 10 | X-XSS-Protection = "1; mode=block" 11 | Cache-Control = "public,max-age=3600" 12 | # Content-Security-Policy = ''' 13 | # style-src 'self' 'unsafe-inline'; 14 | # default-src 'self' www.google-analytics.com; 15 | # child-src 'self' www.youtube.com www.youtube-nocookie.com www.google-analytics.com; 16 | # connect-src 'self' *.mozilla.org www.google-analytics.com; 17 | # img-src 'self' data: www.google-analytics.com; 18 | # # script-src 'self' 'unsafe-inline' 'unsafe-eval' s.ytimg.com www.youtube.com www.googletagmanager.com; 19 | # ''' 20 | 21 | [[headers]] 22 | for = "/fonts/*" 23 | [headers.values] 24 | # Temporary cache header change until we can get immutable files back 25 | # Cache-Control = "max-age=31536000,public,immutable" 26 | Cache-Control = "public,max-age=3600" 27 | 28 | [[headers]] 29 | for = "/css/*" 30 | [headers.values] 31 | # Cache-Control = "max-age=31536000,public,immutable" 32 | Cache-Control = "public,max-age=3600" 33 | 34 | [[headers]] 35 | for = "/img/*" 36 | [headers.values] 37 | # Cache-Control = "max-age=31536000,public,immutable" 38 | Cache-Control = "public,max-age=3600" 39 | 40 | [[headers]] 41 | for = "/scripts/*" 42 | [headers.values] 43 | # Cache-Control = "max-age=31536000,public,immutable" 44 | Cache-Control = "public,max-age=3600" 45 | 46 | [[headers]] 47 | for = "/content/*" 48 | [headers.values] 49 | # Cache-Control = "max-age=31536000,public,immutable" 50 | Cache-Control = "public,max-age=3600" 51 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ai-guide", 3 | "version": "1.0.1", 4 | "description": "Static webpage for the Mozilla AI Guide", 5 | "scripts": { 6 | "start": "export NODE_ENV=development && webpack serve", 7 | "build": "export NODE_ENV=production && webpack", 8 | "watch": "export NODE_ENV=development && webpack watch" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "git+https://github.com/mozilla/ai-guide.git" 13 | }, 14 | "author": "mozmeao", 15 | "license": "MPL", 16 | "bugs": { 17 | "url": "https://github.com/mozilla/ai-guide/issues" 18 | }, 19 | "homepage": "https://github.com/mozilla/ai-guide#readme", 20 | "devDependencies": { 21 | "@parcel/transformer-sass": "^2.8.3", 22 | "@tailwindcss/typography": "^0.5.9", 23 | "autoprefixer": "^10.4.14", 24 | "babel-loader": "^9.1.2", 25 | "css-loader": "^6.7.3", 26 | "daisyui": "^3.5.0", 27 | "eslint": "^8.47.0", 28 | "html-bundler-webpack-plugin": "^2.12.0", 29 | "nunjucks": "^3.2.4", 30 | "postcss": "^8.4.27", 31 | "sass-loader": "^13.2.2", 32 | "tailwindcss": "^3.3.3", 33 | "webpack": "^5.80.0", 34 | "webpack-cli": "^5.0.1", 35 | "webpack-dev-server": "^4.13.2" 36 | }, 37 | "dependencies": { 38 | "@mozilla-protocol/core": "^16.1.0", 39 | "@mozmeao/dnt-helper": "^1.0.0", 40 | "markdown": "^0.5.0", 41 | "marked": "^6.0.0", 42 | "marked-gfm-heading-id": "^3.0.5", 43 | "marked-mangle": "^1.1.1", 44 | "nunjucks-markdown": "^2.0.1", 45 | "postcss-loader": "^7.3.3", 46 | "tailwindcss-typography": "^3.1.0" 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /pages/README.md: -------------------------------------------------------------------------------- 1 | Turn markdown to html: `tools/build_ai_guide.sh` -------------------------------------------------------------------------------- /pages/content/ai-basics/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - AI Basics{% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/ai-basics/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/content/audio-video-models/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - {% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/audio-video-models/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/content/choosing-ml-models/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - Choosing ML Models{% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/choosing-ml-models/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/content/comparing-open-llms/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - Comparing Open LLMs{% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/comparing-open-llms/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/content/contributions/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - Contribution Guide{% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/contributions/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/content/embeddings-and-rag/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - {% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/embeddings-and-rag/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/content/fine-tuning-llms/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - {% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/fine-tuning-llms/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/content/image-models/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - {% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/image-models/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/content/introduction/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - Introduction{% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/introduction/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/content/llms-101/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - Language Models 101{% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/llms-101/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/content/models-from-scratch/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - {% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/models-from-scratch/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/content/notable-projects/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - Notable Projects{% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/notable-projects/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/content/running-llms-locally/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% extends "base-ai.html" %} 4 | 5 | {% block page_title %}Mozilla AI Guide - Running LLMs Locally{% endblock %} 6 | {% block page_desc %}{% endblock %} 7 | {% block main_id %}content{% endblock %} 8 | 9 | {% block content %} 10 | {% include "content/running-llms-locally/index-content.html"%} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /pages/index.html: -------------------------------------------------------------------------------- 1 | {% extends "base-ai.html" %} 2 | 3 | {% block page_title %}Mozilla AI Guide{% endblock %} 4 | {% block page_desc %}Mozilla AI Guide{% endblock %} 5 | {% block main_id %}content{% endblock %} 6 | 7 | {% block content %} 8 | {% include "content/introduction/index-content.html"%} 9 | {% endblock %} 10 | -------------------------------------------------------------------------------- /postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | } 6 | } -------------------------------------------------------------------------------- /scripts/ai.js: -------------------------------------------------------------------------------- 1 | var nunjucks = require('nunjucks'), 2 | markdown = require('nunjucks-markdown'), 3 | marked = require('marked'); 4 | 5 | var env = nunjucks.configure('templates'); 6 | 7 | // The second argument can be any function that renders markdown 8 | markdown.register(env, marked); -------------------------------------------------------------------------------- /scripts/analytics.js: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | var dntEnabled = require('@mozmeao/dnt-helper'); 8 | 9 | // Ensure window.dataLayer is always defined, even if GTM might not have loaded. 10 | window.dataLayer = window.dataLayer || []; 11 | 12 | 13 | if(!dntEnabled()) { 14 | 15 | window.gtag = function () { 16 | window.dataLayer.push(arguments); 17 | }; 18 | 19 | const gaScript = document.createElement('script'); 20 | gaScript.async = 'true'; 21 | gaScript.type = 'text/javascript'; 22 | gaScript.src = 'https://www.googletagmanager.com/gtag/js?id=G-Y4SS9F26BD'; 23 | const pageHead = document.getElementsByTagName('head')[0]; 24 | pageHead.append(gaScript); 25 | 26 | window.gtag('js', new Date()); 27 | window.gtag('config', 'G-Y4SS9F26BD'); 28 | } 29 | -------------------------------------------------------------------------------- /scripts/dark_mode.js: -------------------------------------------------------------------------------- 1 | if (localStorage.theme === 'dark' || (!('theme' in localStorage) && window.matchMedia('(prefers-color-scheme: dark)').matches)) { 2 | document.documentElement.setAttribute('data-theme', 'moz_ai_guide_base_dark') 3 | document.documentElement.classList.add('dark') 4 | } else { 5 | document.documentElement.setAttribute('data-theme', 'moz_ai_guide_base') 6 | document.documentElement.classList.remove('dark') 7 | } -------------------------------------------------------------------------------- /scripts/email.js: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | import { 8 | checkEmailValidity, 9 | clearFormErrors, 10 | errorList, 11 | disableFormFields, 12 | enableFormFields, 13 | postToEmailServer, 14 | } from "./form-utils"; 15 | 16 | import "@mozilla-protocol/core/protocol/js/protocol-newsletter.min.js"; 17 | 18 | let form; 19 | let isBuilderPage; 20 | let isMIECO; 21 | let isInnovationPage; 22 | 23 | const EmailForm = { 24 | handleFormError: (msg) => { 25 | let error; 26 | 27 | enableFormFields(form); 28 | 29 | switch (msg) { 30 | case errorList.EMAIL_INVALID_ERROR: 31 | error = form.querySelector(".error-email-invalid"); 32 | break; 33 | case errorList.PRIVACY_POLICY_ERROR: 34 | error = form.querySelector(".error-privacy-policy"); 35 | break; 36 | case errorList.NEWSLETTER_ERROR: 37 | error = form.querySelector(".error-newsletter"); 38 | break; 39 | default: 40 | error = form.querySelector(".error-try-again-later"); 41 | } 42 | 43 | if (error) { 44 | console.log(error); 45 | const errorContainer = form.querySelector(".mzp-c-form-errors"); 46 | errorContainer.classList.remove("hidden"); 47 | errorContainer.style.display = "block"; 48 | error.classList.remove("hidden"); 49 | } 50 | }, 51 | 52 | handleFormSuccess: () => { 53 | form.classList.add("hidden"); 54 | const thanks = document.getElementById("newsletter-thanks"); 55 | thanks.style.display = "block"; 56 | 57 | if (isInnovationPage) { 58 | //our design specs hide the call to action title and subtext when thank you message is displayed 59 | document.querySelector(".newsletter-cta").classList.add("hidden"); 60 | } 61 | }, 62 | 63 | validateFields: () => { 64 | const email = form.querySelector('input[type="email"]').value; 65 | const privacy = !!form.querySelector('input[name="privacy"]:checked'); 66 | const newsletters = form.querySelectorAll( 67 | 'input[name="interests"]:checked' 68 | ); 69 | 70 | // Really basic client side email validity check. 71 | if (!checkEmailValidity(email)) { 72 | EmailForm.handleFormError(errorList.EMAIL_INVALID_ERROR); 73 | return false; 74 | } 75 | 76 | // Confirm privacy policy is checked 77 | if (!privacy) { 78 | EmailForm.handleFormError(errorList.PRIVACY_POLICY_ERROR); 79 | return false; 80 | } 81 | // the form on the builder page already includes a newsletter so these aren't required 82 | if (newsletters.length === 0 && !isBuilderPage) { 83 | EmailForm.handleFormError(errorList.NEWSLETTER_ERROR); 84 | return false; 85 | } 86 | 87 | return true; 88 | }, 89 | 90 | submit: (e) => { 91 | const email = form.querySelector('input[type="email"]').value; 92 | const interests = Array.from( 93 | form.querySelectorAll('input[name="interests"]:checked') 94 | ) 95 | .map((interests) => `${interests.value}`) 96 | .join(","); 97 | 98 | e.preventDefault(); 99 | e.stopPropagation(); 100 | 101 | // Disable form fields until POST has completed. 102 | disableFormFields(form); 103 | 104 | // Clear any prior messages that might have been displayed. 105 | clearFormErrors(form); 106 | 107 | // Perform client side form field validation. 108 | if (!EmailForm.validateFields()) { 109 | return; 110 | } 111 | 112 | if (isBuilderPage) { 113 | const newsletters = 114 | interests.length > 0 115 | ? `mozilla-ai-challenge, ${interests}` 116 | : "mozilla-ai-challenge"; 117 | const params = { email, newsletters }; 118 | postToEmailServer( 119 | params, 120 | EmailForm.handleFormSuccess, 121 | EmailForm.handleFormError 122 | ); 123 | } else { 124 | const name = form.querySelector('input[id="name"]').value; 125 | const description = form.querySelector("textarea").value; 126 | 127 | const params = { 128 | email, 129 | name, 130 | description, 131 | interests, 132 | }; 133 | 134 | if (isMIECO) { 135 | // The MIECO page will only send form info to email server -> mieco@mozilla.com 136 | postToEmailServer( 137 | { ...params, message_id: "mieco" }, 138 | EmailForm.handleFormSuccess, 139 | EmailForm.handleFormError 140 | ); 141 | } 142 | if (isInnovationPage) { 143 | // On the innovation landing page the user can do the following in the form: 144 | // - Sign up for the mozilla-innovation newsletter 145 | // - Send an interest email to innovations@mozilla.com 146 | // - They can also both of the above options 147 | 148 | const website = form.querySelector('input[name="website"]'); 149 | if (interests.includes("newsletter")) { 150 | postToEmailServer( 151 | { 152 | ...params, 153 | newsletters: "mozilla-innovation", 154 | message_id: "innovations", 155 | }, 156 | EmailForm.handleFormSuccess, 157 | EmailForm.handleFormError 158 | ); 159 | } 160 | 161 | if (interests.includes("collaboration")) { 162 | postToEmailServer( 163 | { 164 | ...params, 165 | website: website?.value || "", 166 | message_id: "innovations", 167 | }, 168 | EmailForm.handleFormSuccess, 169 | EmailForm.handleFormError 170 | ); 171 | } 172 | } 173 | } 174 | }, 175 | 176 | handleCheckboxChange: ({ target }) => { 177 | const description = document.querySelector(".description"); 178 | if (description) { 179 | if (target.checked) { 180 | description.style.display = "block"; 181 | } else { 182 | description.style.display = "none"; 183 | } 184 | } 185 | }, 186 | 187 | init: () => { 188 | form = document.getElementById("newsletter-form"); 189 | isBuilderPage = form.classList.contains("builders-form"); 190 | isMIECO = form.classList.contains("mieco-form"); 191 | isInnovationPage = form.classList.contains("innovations-form"); 192 | 193 | document.body.classList.add("js"); 194 | 195 | if (!form) { 196 | return; 197 | } 198 | 199 | if (isInnovationPage) { 200 | const checkbox = form.querySelector("input#collaboration"); 201 | 202 | if (checkbox?.checked) { 203 | const description = document.querySelector(".description"); 204 | description.style.display = "block"; 205 | } 206 | 207 | checkbox.addEventListener( 208 | "change", 209 | EmailForm.handleCheckboxChange, 210 | false 211 | ); 212 | } 213 | 214 | form.addEventListener("submit", EmailForm.submit, false); 215 | }, 216 | }; 217 | 218 | EmailForm.init(); 219 | -------------------------------------------------------------------------------- /scripts/form-utils.js: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | const errorList = { 8 | EMAIL_INVALID_ERROR: 'Invalid email address', 9 | EMAIL_UNKNOWN_ERROR: 'Email address not known', 10 | NEWSLETTER_ERROR: 'Newsletter not selected', 11 | COUNTRY_ERROR: 'Country not selected', 12 | LANGUAGE_ERROR: 'Language not selected', 13 | PRIVACY_POLICY_ERROR: 'Privacy policy not checked', 14 | LEGAL_TERMS_ERROR: 'Terms not checked' 15 | }; 16 | 17 | /** 18 | * Really primitive validation (e.g a@a) 19 | * matches built-in validation in Firefox 20 | * @param {String} email 21 | * @returns {Boolean} 22 | */ 23 | function checkEmailValidity(email) { 24 | return /\S+@\S+/.test(email) && email.length <= 120; 25 | } 26 | 27 | /** 28 | * Hide all visible form error labels. 29 | * @param {HTMLFormElement} form 30 | */ 31 | function clearFormErrors(form) { 32 | const errorMsgs = form.querySelectorAll('.mzp-c-form-errors li'); 33 | 34 | const errorContainer = form.querySelector(".mzp-c-form-errors"); 35 | errorContainer.classList.add("hidden"); 36 | errorContainer.style.display = "hidden"; 37 | 38 | for (let i = 0; i < errorMsgs.length; i++) { 39 | errorMsgs[i].classList.add('hidden'); 40 | } 41 | } 42 | 43 | /** 44 | * Add disabled property to all form fields. 45 | * @param {HTMLFormElement} form 46 | */ 47 | function disableFormFields(form) { 48 | const formFields = form.querySelectorAll('input, button, select, textarea'); 49 | 50 | for (let i = 0; i < formFields.length; i++) { 51 | formFields[i].disabled = true; 52 | } 53 | } 54 | 55 | /** 56 | * Remove disabled property to all form fields. 57 | * @param {HTMLFormElement} form 58 | */ 59 | function enableFormFields(form) { 60 | const formFields = form.querySelectorAll('input, button, select, textarea'); 61 | 62 | for (let i = 0; i < formFields.length; i++) { 63 | formFields[i].disabled = false; 64 | } 65 | } 66 | 67 | function postToEmailServer(params, successCallback, errorCallback) { 68 | const xhr = new XMLHttpRequest(); 69 | let url = "https://www.mozilla.org/en-US/email-mieco/"; 70 | 71 | if (params.newsletters) { 72 | url = "https://basket.mozilla.org/news/subscribe/"; 73 | } 74 | 75 | const { email } = params; 76 | 77 | // Emails used in automation for page-level integration tests 78 | // should avoid hitting basket directly. 79 | if (email === 'success@example.com') { 80 | successCallback(); 81 | return; 82 | } else if (email === 'failure@example.com') { 83 | errorCallback(); 84 | return; 85 | } 86 | 87 | xhr.onload = function (e) { 88 | let response = e.target.response || e.target.responseText; 89 | 90 | if (typeof response !== 'object') { 91 | response = JSON.parse(response); 92 | } 93 | 94 | if (response) { 95 | if ( 96 | response.status === 'ok' && 97 | e.target.status >= 200 && 98 | e.target.status < 300 99 | ) { 100 | successCallback(); 101 | } else if (response.status === 'error' && response.desc) { 102 | errorCallback(response.desc); 103 | } else { 104 | errorCallback(); 105 | } 106 | } else { 107 | errorCallback(); 108 | } 109 | }; 110 | 111 | xhr.onerror = errorCallback; 112 | xhr.open('POST', url, true); 113 | xhr.setRequestHeader('X-Requested-With', 'XMLHttpRequest'); 114 | xhr.timeout = 5000; 115 | xhr.ontimeout = errorCallback; 116 | xhr.responseType = 'json'; 117 | 118 | if (params.newsletters) { 119 | xhr.setRequestHeader("Content-type", "application/x-www-form-urlencoded"); 120 | xhr.send(serialize(params)); 121 | } else { 122 | xhr.setRequestHeader("Content-type", "application/json"); 123 | xhr.send(JSON.stringify(params)); 124 | } 125 | } 126 | 127 | function serialize(params) { 128 | const email = encodeURIComponent(params.email); 129 | const newsletters = encodeURIComponent(params.newsletters); 130 | const sourceUrl = encodeURIComponent("https://ai-guide.future.mozilla.org"); 131 | 132 | return `email=${email}&format=H&lang=en&source_url=${sourceUrl}&newsletters=${newsletters}` 133 | } 134 | 135 | 136 | export { 137 | checkEmailValidity, 138 | clearFormErrors, 139 | errorList, 140 | disableFormFields, 141 | enableFormFields, 142 | postToEmailServer, 143 | }; 144 | -------------------------------------------------------------------------------- /tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | 3 | /** Tailwind is just for the AI Guide */ 4 | 5 | const defaultTheme = require('tailwindcss/defaultTheme') 6 | const plugin = require('tailwindcss/plugin') 7 | 8 | 9 | module.exports = { 10 | content: [ 11 | "./pages/**/*.{html,js}", 12 | "./templates/base-ai.html", 13 | "./templates/**/*.{html,js}", 14 | ], 15 | theme: { 16 | extend: { 17 | fontFamily: {}, 18 | }, 19 | }, 20 | plugins: [ 21 | require("daisyui"), 22 | require("@tailwindcss/typography"), 23 | // base 24 | plugin(function ({ addBase, theme }) { 25 | addBase({ 26 | p: { fontFamily: theme("Cabin") }, 27 | h1: { fontFamily: theme("Montserrat") }, 28 | h2: { fontFamily: theme("Montserrat") }, 29 | h3: { fontFamily: theme("Montserrat") }, 30 | }); 31 | return true; 32 | }), 33 | ], 34 | daisyui: { 35 | //daisy can only use these variables! https://daisyui.com/docs/colors/ 36 | themes: [ 37 | { 38 | moz_ai_guide_base: { 39 | primary: "#0F253A", // blue 40 | "primary-content": "#FCFBF6", 41 | "primary-focus": "#204D79", 42 | "base-content": "#0F253A", 43 | "base-100": "#FCFBF6", 44 | "base-200": "#6E7B85", 45 | "base-300": "#A7ACB9", 46 | info: "#A4E9FF", 47 | "info-content": "#D9F0F8", 48 | success: "#37D399", 49 | "success-content": "#87FBD1", 50 | warning: "#FCBC23", 51 | error: "#F87272", 52 | "error-content": "#000", 53 | "interaction-inactive": "#A9B2B2", 54 | }, 55 | moz_ai_guide_base_dark: { 56 | primary: "#0F253A", // blue 57 | "primary-content": "#FCFBF6", 58 | "primary-focus": "#204D79", 59 | "base-content": "#0F253A", 60 | "base-100": "#FCFBF6", 61 | "base-200": "#6E7B85", 62 | "base-300": "#A7ACB9", 63 | info: "#A4E9FF", 64 | "info-content": "#D9F0F8", 65 | success: "#37D399", 66 | "success-content": "#87FBD1", 67 | warning: "#FCBC23", 68 | error: "#F87272", 69 | "error-content": "#000", 70 | "interaction-inactive": "#A9B2B2", 71 | }, 72 | }, 73 | ], 74 | }, 75 | }; 76 | -------------------------------------------------------------------------------- /templates/ai/_email_form.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | 4 |
5 | 9 | 71 | 76 |
77 |
78 | -------------------------------------------------------------------------------- /templates/ai/_footer.html: -------------------------------------------------------------------------------- 1 | 30 | -------------------------------------------------------------------------------- /templates/ai/_sidebar.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | 4 | 5 | 6 | 12 | 32 | 33 |
34 | 35 | 45 | -------------------------------------------------------------------------------- /templates/base-ai.html: -------------------------------------------------------------------------------- 1 | 30 | {% set page_title %}{% block page_title %}Mozilla AI Guide{% endblock %}{% endset %} 31 | {% set page_desc %}{{ striptags|e }}{% block page_desc %}A guide to OSS AI{% endblock %}{{ endfilter }}{% endset %} 32 | 33 | 34 | 35 | 36 | 37 | 38 | {{ page_title }} 39 | 40 | 41 | {% block social_meta %} 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | {% endblock social_meta %} 50 | 51 | {% block page_css %} 52 | 53 | {% endblock %} 54 | {% block page_js %} 55 | {% endblock %} 56 | 57 | 58 | 59 | 60 | 61 | {% block header_js %} 62 | 63 | {% endblock %} 64 | 65 | 66 |
67 | 68 |
69 |
70 | 77 | {% block linkbar %} 78 | 82 | {% endblock %} 83 | {% block banner %} 84 | 92 | {% endblock %} 93 |
94 |
95 |
96 | {% block content %}{% endblock %} 97 |
98 | {% block email %} 99 | {% include "ai/_email_form.html" %} 100 | {% endblock %} 101 |
102 | 103 | {% block footer %} 104 | {% include "ai/_footer.html" %} 105 | {% endblock %} 106 |
107 | 108 | {% block sidebar %} 109 | {% include "ai/_sidebar.html" %} 110 | {% endblock %} 111 |
112 | {% block js %} 113 | {% endblock %} 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /templates/content/_upcoming.html: -------------------------------------------------------------------------------- 1 |
  • 2 | Contribute 3 | Embeddings & RAG
  • 4 |
  • 5 | Contribute 6 | Fine-Tuning LLMs
  • 7 |
  • 8 | Contribute 9 | Models From Scratch
  • 10 |
  • 11 | Contribute 12 | Audio & Video Models
  • 13 | -------------------------------------------------------------------------------- /templates/content/ai-basics/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • 2 | 3 | AI Basics 4 | 5 |
  • -------------------------------------------------------------------------------- /templates/content/ai-basics/index.md: -------------------------------------------------------------------------------- 1 | ## AI Basics 2 | 3 | #### What is AI in 2023? 4 | 5 | Artificial intelligence (AI), machine learning (ML), large language models (LLMs) and related technologies and techniques have crossed the chasm from science fiction and niche research domains into widespread awareness and widely adopted products, services, and systems in 2023. 6 | 7 | Although the history of AI research reaches back to the 1950s, the current widespread awareness of AI can be traced to a recent set of products and services using generative AI such as ChatGPT, Stable Diffusion, and the like. In particular, OpenAI's ChatGPT captured the imagination of many by removing many technical barriers to interacting with AI through “natural language” text conversations. As such, ChatGPT allowed individuals to see the power, promise, and pitfalls of such systems, and many heralded the moment as a new epoch in the history of computing. 8 | 9 | The rapid popularity of ChatGPT and other systems meant new attention and investment in the domain. Almost overnight, new applications and companies emerged that sought to make use of these technologies in new ways and new domains. As a result, a myriad of industries are finding new ways to use this technology for better decision-making, automation, and innovation. 10 | 11 | Alongside this expansion in interest and development is the need for individuals in a variety of roles to quickly ramp up their understanding of AI. However, with the increasing complexity of these models, the substantial amount of new things to learn and the extensive list of new libraries being added every single day, onboarding into the state-of-the-art AI world has become challenging for new engineers. While resources exist, many of these resources (and increasingly so) depend on proprietary technologies. Moreover, the state of knowledge is rapidly changing, and existing resources are quickly out-of-date. 12 | 13 | #### Why are people excited about LLMs? 14 | 15 | Large Language Models (LLMs) are AI models that use deep learning techniques to process and understand natural language text. These models have millions or even billions of parameters that allow them to generate human-like language output, making them ideal for tasks such as language translation, natural-sounding chatbots, document and code generation, and more. LLMs have been trained on massive amounts of data, allowing them to identify patterns in language that were previously difficult for computers to comprehend. This has led to breakthroughs in natural language processing, generated output and improved communication between humans and machines. 16 | 17 | #### Why are people concerned about LLMs? 18 | 19 | LLMs are currently used to power chatbots and a wide variety of content tools that can generate text, images, video, and even code. But the very traits that make LLMs so powerful and useful also present important questions that technologists need to consider when developing and deploying them. 20 | 21 | By mimicking human language and creativity, LLMs have the potential to transform or even automate certain tasks or jobs. The mass harvesting of data to train these systems presents largely unresolved challenges to the principles of copyright, fair use, and fair compensation. The tendency of users to view LLM-powered tools as “officious oracles” can lead humans to make flawed or harmful decisions based on the biases and misinformation these systems can produce. LLMs can generate high quality prose that naturally invites consumers to forget that LLMs offer no guarantees of communicating factual or accurate information. 22 | 23 | At Mozilla, we believe that developers should take these risks seriously and cultivate both an understanding and awareness of how their chosen AI technologies behave and impact the world. By their very nature, open source LLMs offer a greater chance to achieve those goals. 24 | 25 | #### What exactly is an LLM? 26 | 27 | At its heart, a Transformer-based Large Language Model (LLM) is essentially a computer program designed to generate text that resembles human-written content. It leverages machine learning techniques, specifically a type of neural network called a Transformer. At a high-level, a Transformer encodes linguistic patterns in the form of statistical relationships between words, and then uses those patterns to generate text. Transformers encode these semantic patterns by ingesting examples of existing text. Now let’s dig a little deeper. 28 | 29 | **Here's how it works:** 30 | 31 | 32 | {% from "partials/image-cards/image-cards.html" import imageCards %} 33 | {% set orientation = "image-top" %} 34 | {% set slides = [ 35 | { "alt": "Tokenization Visualization","title": "Tokenization", "description": "

    The LLM starts by breaking down the input text, or 'prompt', into smaller pieces known as tokens.

    ", "image": "/img/ai/carousels/how-llm-works/tokenization.jpg"}, 36 | { "alt": "Embedding Visualization","title": "Embedding", "description": "

    Next, each token is converted into a numerical representation through a process called embedding.

    ", "image": "/img/ai/carousels/how-llm-works/embedding.jpg"}, 37 | { "alt": "Self-attention Visualization","title": "Self-attention", "description": "

    Now comes the real magic of the Transformer architecture: self-attention mechanisms. These allow the model to weigh the importance of different words when predicting the next word in the sentence. Self attention mechanisms allow the model to create context-aware representations of each word.

    ", "image": "/img/ai/carousels/how-llm-works/self-attention.jpg"}, 38 | { "alt": "Decoding Visualization","title": "Decoding", "description": "

    The LLM Decoder will take the prior word sequence and contextual representation and use it to predict the next token in the sequence.

    ", "image": "/img/ai/carousels/how-llm-works/decoding.jpg"}, 39 | { "alt": "Output Visualization","title": "Output", "description": "

    Finally, the model generates new tokens, and repeats until the final response is complete.

    ", "image": "/img/ai/carousels/how-llm-works/output.jpg"} 40 | ] 41 | %} 42 | {{imageCards(slides,orientation)}} 43 |
    44 | 45 | All these steps involve complex mathematical operations and transformations. But fundamentally, what the model is doing is learning patterns in the data it was trained on, and using those patterns to generate new text that fits within the same patterns. 46 | 47 | So, in essence, a Transformer-based LLM is a cleverly designed pattern recognition system that uses learned associations between words to generate human-like text. It's like having a scribe who has read billions of bits of content and can produce text on any topic in a style or format that mirrors the content it was trained on. 48 | 49 | #### What are the pros & cons of using an LLM? 50 | 51 | Although LLMs have made it possible for computers to process human language in ways that have been previously difficult, if not impossible, they are not without trade-offs: 52 | 53 | - Pros 54 | - **Improved Token Prediction Accuracy**: LLMs are trained on large amounts of human-readable data (e.g. written text, code, and audio) and rely on state-of-the-art techniques to determine patterns within those data. The size and pattern recognition techniques (e.g. Transformers) improve the accuracy of predictions over previous systems. 55 | - **Efficiency**: With LLMs, tasks such as language translation and chatbots can be automated, potentially freeing up time for humans to focus on more complex tasks. 56 | - **Language Generation**: LLMs can generate human-like language output, making them applicable for tasks such as content creation and copywriting. 57 | - **Code Generation**: LLMs can generate code output, making them a powerful addition to developers' toolkits. 58 | - Cons 59 | - **Computational Power Requirements**: Training an LLM requires significant computational power, which can be expensive and time-consuming. 60 | - **Reproducing problematic training data**: Because LLMs are trained on massive amounts of data, they can perpetuate biases and toxicity (like hate speech) that exist in the training data. 61 | - **Lack of Transparency**: The inner workings of an LLM can be difficult to understand, which makes it challenging to identify how it arrived at a particular decision or prediction. 62 | - **LLM hallucinations**: One of the most interesting and controversial aspects of LLMs is their ability to generate realistic language output that they have never been trained on. This phenomenon is known as LLM hallucination, and it has raised concerns about the potential misuse of these models. 63 | 64 | #### What new behaviors do LLMs unlock? 65 | 66 | Large Language Models (LLMs) have unlocked a plethora of new behaviors that were previously impossible for computers to achieve. For example, LLMs can now generate highly convincing human-like text, which has led to the development of more advanced chatbots and virtual assistants. Additionally, LLMs have revolutionized the field of natural language processing by enabling machines to understand and interpret complex human language in ways that were previously impossible. This has opened up new possibilities for automated language translation, content creation, code generation, and even sentiment analysis. With continued advancements in LLM technology, we can expect to see even more exciting developments in the near future. 67 | 68 | #### When I send a Transformer-based LLM a “prompt”, what happens internally in more technical terms? 69 | 70 | 1. **Tokenization**: This is the first step in processing text data. The tokenizer converts raw text into chunks known as 'tokens'. These tokens can represent words, subwords, or even characters depending on the granularity of the tokenization process. For instance, a word-level tokenizer will convert the sentence "I love coding" into 'I', 'love', 'coding'. 71 | 2. **Embedding**: Once the text is tokenized, these tokens are transformed into dense vectors of fixed size in a high-dimensional space through the embedding layer. These embeddings capture semantic information about the words. For example, in this space, 'king' and 'queen' would be closer to each other than 'king' and 'apple'. 72 | 3. **Positional Encoding**: The encoder processes the input sequence into a context-dependent representation that captures the meaning of the text. It uses a Transformer architecture which allows it to pay varying levels of 'attention' to different parts of the input sequence at each step of the encoding process. The output of the encoder is a series of hidden states. 73 | 4. **Encoder**: The encoded input is passed through several layers (the exact number depends on the model architecture). Each layer consists of self-attention mechanisms and feed-forward neural networks. These layers allow the model to consider all parts of the input when generating each part of the output. 74 | 5. **Decoder (if applicable)**: The decoder generates output text token by token based on the hidden states from the encoder. In some models like GPT-3, the decoder is essentially the entire model. It also uses a Transformer architecture and attention mechanism to focus on the relevant parts of the input. 75 | 6. **Attention Mechanism**: This is a key part of both the encoder and decoder in a Transformer. It helps the model dynamically focus on different parts of the input sequence as it processes the data. It computes a weighted sum of input values (or 'query') based on their relevance (or 'attention scores') to the current 'key' value. 76 | 7. **Output Generation**: Finally, the model generates the output text. This is done one token at a time, with the model deciding on the most likely next token based on the current state. The process continues until the model generates a termination token or reaches a preset maximum length. 77 | 78 | Each step in this process involves complex mathematical operations and transformations. But at a high-level, the goal is to convert text into a form that the model can understand, let the model process and generate a response, and then convert that response back into human-readable text. 79 | 80 |
    81 | LLMs 101 82 |
    83 | 84 |
    85 | or 86 | contribute to this page on Github > 87 |
    88 | -------------------------------------------------------------------------------- /templates/content/audio-video-models/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • -------------------------------------------------------------------------------- /templates/content/audio-video-models/index-content.html: -------------------------------------------------------------------------------- 1 |

    Audio & Video Models

    2 |
    3 |

    If you have experience with audio & video models, please consider contributing this section to the AI Guide.

    4 |
    5 | -------------------------------------------------------------------------------- /templates/content/audio-video-models/index.md: -------------------------------------------------------------------------------- 1 | # Audio & Video Models 2 | 3 |
    4 |

    If you have experience with audio & video models, please consider contributing this section to the AI Guide.

    5 |
    6 | -------------------------------------------------------------------------------- /templates/content/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $root_path = "templates/ai/content" 4 | 5 | # test if command `marked` exists 6 | if ! command -v marked &> /dev/null 7 | then 8 | echo "marked could not be found" 9 | echo "Installing marked..." 10 | 11 | npm install -g marked 12 | fi 13 | 14 | find $root_path -name "*.md" | while read -r file; 15 | do echo "Building $file"; 16 | marked -o "${file%.md}-content.html" "$file"; 17 | done 18 | -------------------------------------------------------------------------------- /templates/content/choosing-ml-models/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • 2 | 3 | Choosing ML Models 4 | 5 |
  • -------------------------------------------------------------------------------- /templates/content/choosing-ml-models/index-content.html: -------------------------------------------------------------------------------- 1 |

    Choosing ML Models

    2 |

    This section will walk through a very specific approach to select, run and evaluate the results of ML models. We're starting with a small Language Model to illustrate this building block. It's highly recommended to follow along inside Colab so you can run the examples. All examples will run within Colab's free T4 GPU constraints.

    3 | 4 |

    First Steps With Language Models

    5 |

    Open this Notebook in Colab

    6 | 7 | 8 | {% include "content/choosing-ml-models/ai-guide-pick-a-model-test-a-model.html" %} 9 | 10 | 11 | 12 |

    Evaluating ML Model Results

    13 |

    Open this Notebook in Colab

    14 | 15 | {% include "content/choosing-ml-models/ai-guide-evaluate-ml-results.html" %} 16 | 17 | 18 |
    19 | Running LLMs Locally 20 |
    21 | 22 |
    23 | or 24 | contribute to this page on Github > 25 |
    26 | -------------------------------------------------------------------------------- /templates/content/choosing-ml-models/index.md: -------------------------------------------------------------------------------- 1 | ## Choosing ML Models 2 | 3 |

    This section will walk through a very specific approach to select, run and evaluate the results of ML models. We're starting with a small Language Model to illustrate this building block. It's highly recommended to follow along inside Colab so you can run the examples. All examples will run within Colab's free T4 GPU constraints.

    4 | 5 | #### First Steps With Language Models 6 |

    Open this Notebook in Colab

    7 | 8 | 9 | {% include "content/choosing-ml-models/ai-guide-pick-a-model-test-a-model.html" %} 10 | 11 | 12 | 13 | #### Evaluating ML Model Results 14 |

    Open this Notebook in Colab

    15 | 16 | {% include "content/choosing-ml-models/ai-guide-evaluate-ml-results.html" %} 17 | 18 | 19 |
    20 | Running LLMs Locally 21 |
    22 | 23 |
    24 | or 25 | contribute to this page on Github > 26 |
    27 | -------------------------------------------------------------------------------- /templates/content/comparing-open-llms/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • 2 | 3 | Comparing Open LLMs 4 | 5 |
  • -------------------------------------------------------------------------------- /templates/content/comparing-open-llms/index-content.html: -------------------------------------------------------------------------------- 1 |

    Comparing Open LLMs

    2 |

    Open this Notebook in Colab

    3 | 4 |

    Mistral 7B & Beyond

    5 | 6 | {% include "content/comparing-open-llms/comparing-open-llms.html" %} 7 | 8 | 9 |
    10 | ❤️ Share Your Feedback 11 |
    12 | 13 |
    14 | or 15 | contribute to this page on Github > 16 |
    17 | -------------------------------------------------------------------------------- /templates/content/comparing-open-llms/index.md: -------------------------------------------------------------------------------- 1 | ## Comparing Open LLMs 2 | 3 |

    Open this Notebook in Colab

    4 | 5 | #### Mistral 7B & Beyond 6 | 7 | {% include "content/comparing-open-llms/comparing-open-llms.html" %} 8 | 9 | 10 |
    11 | ❤️ Share Your Feedback 12 |
    13 | 14 |
    15 | or 16 | contribute to this page on Github > 17 |
    18 | -------------------------------------------------------------------------------- /templates/content/contributions/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • 2 | 3 | Contribution Guide 4 | 5 |
  • -------------------------------------------------------------------------------- /templates/content/contributions/index-content.html: -------------------------------------------------------------------------------- 1 |

    Contribution Guide

    2 |

    The AI Guide will be kickstarted by Mozilla's team but powered by community contributions. Here's how you can help!

    3 |

    Why contribute to AI Guide?

    4 |

    Mozilla champions an open, accessible Internet where people have the reins. We welcome these amazing new AI breakthroughs.

    5 |

    However, with substantial corporate dollars being invested into AI R&D, it's unclear for engineers new to the scene which paths are the ones that lead to sustainable open software. We've seen this story before.

    6 |

    Mozilla's efforts in AI are more than just technical - they're a call to action and unity across the currently fragmented open source AI Community.

    7 |

    Our AI Guide is a living, breathing resource, rooted in collaboration and community input for experts and newcomers alike, and we invite you to build alongside us.

    8 |

    Join our community →

    9 |

    How do I contribute?

    10 |

    Where the Guide needs your help:

    11 | 18 | -------------------------------------------------------------------------------- /templates/content/contributions/index.md: -------------------------------------------------------------------------------- 1 | ## Contribution Guide 2 | The AI Guide will be kickstarted by Mozilla's team but powered by community contributions. Here's how you can help! 3 | 4 | ### Why contribute to AI Guide? 5 | Mozilla champions an open, accessible Internet where people have the reins. We welcome these amazing new AI breakthroughs. 6 | 7 | However, with substantial corporate dollars being invested into AI R&D, it's unclear for engineers new to the scene which paths are the ones that lead to sustainable open software. We've seen this story before. 8 | 9 | Mozilla's efforts in AI are more than just technical - they're a call to action and unity across the currently fragmented open source AI Community. 10 | 11 | Our AI Guide is a living, breathing resource, rooted in collaboration and community input for experts and newcomers alike, and we invite you to build alongside us. 12 | 13 | [Join our community →](https://discord.gg/3egbzTKhdk) 14 | 15 | ### How do I contribute? 16 | Where the Guide needs your help: 17 | * [Submit content changes, typos or errors](https://github.com/mozilla/ai-guide/issues/new?assignees=&labels=topics&projects=&template=topic-coverage.yml) to existing pages 18 | * [Suggest missing or new content](https://github.com/mozilla/ai-guide/issues/new?assignees=&labels=topics&projects=&template=topic-coverage.yml) for the Guide 19 | * Fork the repo and create [new Python notebooks](https://github.com/mozilla/ai-guide), submit a Github PR for review 20 | * [Provide feedback](https://github.com/mozilla/ai-guide/issues/new?assignees=&labels=general&projects=&template=general-feedback.yml) on audience, tone and other content suggestions 21 | * [Share other thoughts](https://forms.gle/eYJ2s6avtCBXMUQH9) on the guide and our approach 22 | -------------------------------------------------------------------------------- /templates/content/embeddings-and-rag/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • -------------------------------------------------------------------------------- /templates/content/embeddings-and-rag/index-content.html: -------------------------------------------------------------------------------- 1 |

    Embeddings & RAG

    2 |
    3 |

    If you have experience with embeddings & RAG, please consider contributing this section to the AI Guide.

    4 |
    5 | -------------------------------------------------------------------------------- /templates/content/embeddings-and-rag/index.md: -------------------------------------------------------------------------------- 1 | # Embeddings & RAG 2 | 3 |
    4 |

    If you have experience with embeddings & RAG, please consider contributing this section to the AI Guide.

    5 |
    6 | -------------------------------------------------------------------------------- /templates/content/fine-tuning-llms/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • -------------------------------------------------------------------------------- /templates/content/fine-tuning-llms/index-content.html: -------------------------------------------------------------------------------- 1 |

    Fine-tuning LLMs

    2 |
    3 |

    If you have experience with fine-tuning LLMs, please consider contributing this section to the AI Guide.

    4 |
    5 | -------------------------------------------------------------------------------- /templates/content/fine-tuning-llms/index.md: -------------------------------------------------------------------------------- 1 | # Fine-tuning LLMs 2 | 3 |
    4 |

    If you have experience with fine-tuning LLMs, please consider contributing this section to the AI Guide.

    5 |
    6 | -------------------------------------------------------------------------------- /templates/content/image-models/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • 2 |
    3 | 4 | Image Generation 5 | 6 | 34 |
    35 |
  • 36 | -------------------------------------------------------------------------------- /templates/content/introduction/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • 2 | 3 | Introduction 4 | 5 |
  • -------------------------------------------------------------------------------- /templates/content/introduction/index-content.html: -------------------------------------------------------------------------------- 1 |

    Introduction

    2 |

    If you're new to AI

    3 |

    Read these two sections first. They will help you understand the current lay of the land, along with what all this new jargon means so that you're not completely lost.

    4 | 8 | 9 |

    Getting started with code

    10 |

    This is the first part of the "Choosing ML Models" section.

    11 | 12 | {% include "content/choosing-ml-models/ai-guide-pick-a-model-test-a-model.html" %} 13 | 14 |
    15 | Evaluating LM Results 16 |
    17 | 18 | 19 |

    Why this guide?

    20 |

    Large language and image/video generation models are opening up new ways to create, plan and execute the things we do at work and home every day.

    21 |

    So far, onboarding into the state-of-the-art within AI has been challenging for engineers of any level who are new to the AI space. Current onboarding options require piecing-together various solutions in disjointed ways. Most of these solutions are also not default open source, and there are vast amounts of new terminology, entities and libraries to learn.

    22 |

    As of this moment, the historical pattern of engineers starting and ending projects with primarily open source components is at risk. Foundation models are very expensive to train from scratch, and open source versions have a hard time competing with well resourced for-profit companies. Mozilla is planting a flag in the ground in this space and creating a new hub for open-source AI projects, kickstarting a vibrant open-source community, with clear shared goals and improved open-source visibility for this important new sector.

    23 |

    Why Mozilla?

    24 |

    Mozilla has been developing and shipping AI through its products and initiatives for many years. From Common Voice to Firefox Translate and beyond, we see in AI the potential to empower humans, make technology more accessible, and improve lives.

    25 |

    But like many others, we also see the risk of real-world harm. We don’t have to cast our gazes to some far-flung science fiction future to envision it: people are already experiencing real-world impacts from this technology, right here and now.

    26 |

    Much will depend on how this technology is developed and deployed, but it will also depend on who guides it. We believe that the open source community has a critical role to play in ensuring that AI develops in a way that is responsible, trustworthy, equitable, and safe. We hope that this guide contributes to mobilizing the open source community to join us in this important work.

    27 |
    28 | AI Basics 29 |
    30 | 31 |
    32 | or 33 | contribute to this page on Github > 34 |
    35 | -------------------------------------------------------------------------------- /templates/content/introduction/index.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | 3 | #### If you're new to AI 4 | Read these two sections first. They will help you understand the current lay of the land, along with what all this new jargon means so that you're not completely lost. 5 | 9 | 10 | #### Getting started with code 11 | This is the first part of the "Choosing ML Models" section. 12 | 13 | 14 | {% include "content/choosing-ml-models/ai-guide-pick-a-model-test-a-model.html" %} 15 | 16 |
    17 | Evaluating LM Results 18 |
    19 | 20 | 21 | #### Why this guide? 22 | Large language and image/video generation models are opening up new ways to create, plan and execute the things we do at work and home every day. 23 | 24 | So far, onboarding into the state-of-the-art within AI has been challenging for engineers of any level who are new to the AI space. Current onboarding options require piecing-together various solutions in disjointed ways. Most of these solutions are also not default open source, and there are vast amounts of new terminology, entities and libraries to learn. 25 | 26 | As of this moment, the historical pattern of engineers starting and ending projects with primarily open source components is at risk. Foundation models are very expensive to train from scratch, and open source versions have a hard time competing with well resourced for-profit companies. Mozilla is planting a flag in the ground in this space and creating a new hub for open-source AI projects, kickstarting a vibrant open-source community, with clear shared goals and improved open-source visibility for this important new sector. 27 | 28 | #### Why Mozilla? 29 | Mozilla has been developing and shipping AI through its products and initiatives for many years. From Common Voice to Firefox Translate and beyond, we see in AI the potential to empower humans, make technology more accessible, and improve lives. 30 | 31 | But like many others, we also see the risk of real-world harm. We don’t have to cast our gazes to some far-flung science fiction future to envision it: people are already experiencing real-world impacts from this technology, right here and now. 32 | 33 | Much will depend on how this technology is developed and deployed, but it will also depend on who guides it. We believe that the open source community has a critical role to play in ensuring that AI develops in a way that is responsible, trustworthy, equitable, and safe. We hope that this guide contributes to mobilizing the open source community to join us in this important work. 34 | 35 |
    36 | AI Basics 37 |
    38 | 39 |
    40 | or 41 | contribute to this page on Github > 42 |
    43 | -------------------------------------------------------------------------------- /templates/content/llms-101/_params-carousel.html: -------------------------------------------------------------------------------- 1 | {% set slides = [ 2 | { "alt": "LLM Model Parameter Visualization",title": "Model", "description": "Different models are trained on different sets of data, which influences their response.", "image": "/img/ai/carousels/llm-01-model.jpg"}, 3 | { "alt": "Max Length Parameter Visualization",title": "Max Length", "description": "Setting a limit to the number of tokens.", "image": "/img/ai/carousels/llm-02-maxlength.jpg"}, 4 | { "alt": "Temperature Parameter Visualization",title": "Temperature", "description": "Degree of randomness. Setting a high temperature may lead to more creative responses. Setting a low temperature will result in responses that are more literal.", "image": "/img/ai/carousels/llm-03-temp.jpg"}, 5 | { "alt": "Top P Parameter Visualization",title": "Top P (Probability)", "description": "Include only the top tokens whose percentage likelihood adds up to the specified “Top P” (e.g. 15%).", "image": "/img/ai/carousels/llm-04-top-p.jpg"}, 6 | { "alt": "Frequency Parameter Visualization",title": "Frequency", "description": "The higher this is set, the more that repetition of tokens present in the context will be penalized in suggestions.", "image": "/img/ai/carousels/llm-05-frequency.jpg"} 7 | ] 8 | %} 9 | 10 | 23 |
    24 | {% for slide in slides %} 25 | {{loop.index}} 26 | {% endfor %} 27 |
    -------------------------------------------------------------------------------- /templates/content/llms-101/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • 2 | 3 | Language Models 101 4 | 5 |
  • -------------------------------------------------------------------------------- /templates/content/llms-101/index-content.html: -------------------------------------------------------------------------------- 1 |

    Language Models 101

    2 |

    What's the difference between a "language model" and a "large language model"?

    3 |

    A "Large Language Model" (LLM) is a type of "Language Model" (LM) with more parameters, which allows it to generate or understand text better. The term 'large' refers to the number of parameters the model has been trained on. Usually, a LLM provides higher quality results than smaller LMs due to its ability to capture more complex patterns in the data.

    4 |

    What do these numbers mean in the names of models?

    5 |

    For example: "Vicuna-13B". The name of the model is Vicuna, and it has 13 billion parameters.

    6 |

    What is a parameter?

    7 |

    A parameter is a value that the model learns during training. These values are adjusted through a process called backpropagation, which involves calculating the error between the model's predictions and the actual output and adjusting the parameters to minimize this error. The number of parameters in an LLM is typically very large, often numbering in the millions or even billions. These parameters capture the relationships between different words and phrases in language, allowing the model to generate human-like output and make accurate predictions. Without these parameters, a language model would not be able to perform natural language processing tasks at a high level of accuracy.

    8 |

    What does “training” an ML model mean?

    9 |

    Training a model involves exposing it to large amounts of data so that it can learn patterns and make accurate predictions. During training, the parameters of the model are adjusted based on the input data and desired output. This process can take a significant amount of time and computational resources, but it is essential for achieving high levels of accuracy in natural language processing tasks.

    10 |

    How does a typical training run work?

    11 |

    During a typical training run, the language model is fed with a large amount of text data and corresponding targets. Generally, the targets are “masked” words within a training sequence that the model needs to predict. The model then uses its prediction errors as compared against these inputs to repeatedly update its parameters through an optimization algorithm like stochastic gradient descent (SGD) or Adam.

    12 |

    During training, the network makes predictions on a batch of input data and calculates the loss, which is a measure of how well the predictions match the actual targets. The optimizer then adjusts the weights of the network using backpropagation to minimize the loss. This process is repeated iteratively for a fixed number of epochs or until a convergence criterion is met. The process of updating the model's parameters continues iteratively until the model reaches a satisfactory level of accuracy on the training set.

    13 |

    It's worth noting that training a language model (especially a LLM) can be a resource-intensive process, requiring significant computational power and time.

    14 |

    What is backpropagation?

    15 |

    Backpropagation is a process used to adjust the parameters of a model during training. It involves calculating the error between the model's predictions and the actual output and adjusting the parameters to minimize this error.

    16 |

    The process starts by making a forward pass through the neural network, where input data is fed into the model, and output predictions are generated. The difference between the predicted output and the actual targets is then calculated using a loss function. This loss value is then backpropagated through the network, starting from the last layer and moving backward towards the first layer.

    17 |

    As it moves backward, each layer updates its weights based on how much it contributed to the final loss value. This process continues until all layers have updated their weights, resulting in a new set of parameters that hopefully improve the model's performance on future inputs.

    18 |

    What does “fine-tuning” a language model mean?

    19 |

    Fine-tuning a language model involves taking a pre-trained language model and providing additional training using task-specific data sets moderated by humans. This process typically requires less data than training a model from scratch and can be done relatively quickly. Fine-tuning has become increasingly popular in recent years as more pre-trained models have become available.

    20 |

    “RLHF” approach

    21 |

    The "Reinforcement Learning From Human Feedback" approach involves incorporating human feedback into machine learning models to improve their accuracy and performance. In the context of natural language processing, this might involve having humans review text generated by a model and provide feedback on its quality, which can then be used to fine-tune the model. This approach is valuable because it can be used to reduce bias and errors and make usage of AI more trustworthy.

    22 |

    What does "inference" mean?

    23 |

    Inference refers to the process of using a trained machine learning model to make predictions or decisions based on new input data. In other words, it's the application of a trained model to real-world data in order to obtain useful insights or take action based on those insights. When performing inference with a language model, the model takes in new text as input and generates output text based on what it has learned during training and fine-tuning. Inference is a critical step in the machine learning workflow, as it allows models to be used for practical applications such as chatbots, language translation, and sentiment analysis.

    24 |

    Is inference computationally expensive?

    25 |

    YES, especially for larger models with more parameters. To address this, some models use techniques such as Beam search or sampling to generate output text more efficiently. Additionally, some cloud providers offer pre-trained language models that can be accessed via APIs for a fee, which can help reduce the computational burden of running them locally.

    26 |

    What is a vector?

    27 |

    In the context of natural language processing, a vector is a list of numbers used to represent a word or phrase in a way that captures its relationship to other words or phrases within a model. A key useful feature of these vectors is that similar things cluster together.

    28 |

    Each vector in a model consists of the same list length, with every position in the list representing a semantically interesting feature discovered about the word or phrase through statistical analysis. While it's typically difficult to explain what these individual features might mean to a human, quantifying them allows vectors to be mathematically compared as coordinates in a high-dimensional space. In such a space, distance can approximate overall similarity in meaning between the words or phrases from which the vectors were derived.

    29 |

    Vectors are typically derived using techniques such as word embeddings, which maps each word to a high-dimensional vector based on its co-occurrence with other words in a large corpus of text.

    30 | 31 |

    Beam search is a search algorithm used to generate output sequences from a model during inference. It works by maintaining a set of the top k most likely sequences at each step of the generation process, where k is known as the Beam width. The algorithm then continues generating new tokens for each sequence in the set until all sequences have reached an end-of-sequence token or a maximum length has been reached. At each step, the set of possible sequences is pruned based on their likelihood according to the model's predictions, resulting in a final set of top-k output sequences.

    32 |

    What is frequency?

    33 | 34 | {% set slide = { "alt": "Frequency Parameter Visualization","title": "Frequency", "description": "

    Frequency is a parameter used in language models to control how often a token can be repeated in the generated output. It works by penalizing the model for repeating tokens that have already been used, preventing the model from generating repetitive or nonsensical text. The higher this is set, the more that repetition of tokens present in the context will be penalized in suggestions.

    ", "image": "/img/ai/carousels/parameters/llm-05-frequency.jpg"}%} 35 | {% from "partials/image-cards/image-card.html" import imageCard %} 36 | {% set orientation = "image-single" %} 37 | {{ imageCard(slide, orientation,false) }} 38 |
    39 | 40 |

    What is temperature?

    41 | 42 | {% set slide = { "alt": "Temperature Parameter Visualization","title": "Temperature", "description": "

    Temperature is a technique used in language models to control the level of randomness and creativity in the generated output during inference. It works by scaling the predicted probability distribution over possible tokens at each step by a temperature parameter, which controls how much the probabilities are 'softened' or spread out.

    Lower temperatures result in more conservative and predictable output, while higher temperatures lead to more diverse and creative output. However, setting the temperature too high can also lead to nonsensical or ungrammatical sentences. Finding the optimal temperature for a given task or application often requires experimentation and fine-tuning.

    ", "image": "/img/ai/carousels/parameters/llm-03-temp.jpg"}%} 43 | {% from "partials/image-cards/image-card.html" import imageCard %} 44 | {% set orientation = "image-single" %} 45 | {{ imageCard(slide, orientation,false) }} 46 |
    47 | 48 |

    What is sampling?

    49 |

    Sampling is another algorithm used to generate output sequences from a model during inference. Unlike Beam search, which generates only the top-k most likely sequences at each step, sampling generates output tokens probabilistically based on the model's predicted probability distribution over all possible tokens at that step. This can lead to more diverse and creative output compared to Beam search, but it can also result in less coherent or grammatical sentences if not properly controlled through techniques such as temperature scaling or nucleus sampling.

    50 |

    What is 'top_k' sampling?

    51 |

    Top-k sampling is a method used in language generation where, instead of considering all possible next words in the vocabulary, the model only considers the top 'k' most probable next words.

    52 |

    This technique helps to focus the model on likely continuations and reduces the chances of generating irrelevant or nonsensical text. It strikes a balance between creativity and coherence by limiting the pool of next word choices, but not so much that the output becomes deterministic.

    53 |

    What is 'top_p' sampling?

    54 | 55 | {% set slide = { "alt": "Top P Parameter Visualization","title": "Top P (Probability)", "description": "

    Top-p sampling, also known as nucleus sampling, is a strategy where the model considers only the smallest set of top words whose cumulative probability exceeds a threshold 'p'.

    Unlike top-k which considers a fixed number of words, top-p adapts based on the distribution of probabilities for the next word. This makes it more dynamic and flexible. It helps create diverse and sensible text by allowing less probable words to be selected when the most probable ones don't add up to 'p'.

    ", "image": "/img/ai/carousels/parameters/llm-04-top-p.jpg"}%} 56 | {% from "partials/image-cards/image-card.html" import imageCard %} 57 | {% set orientation = "image-single" %} 58 | {{ imageCard(slide, orientation,false) }} 59 |
    60 | 61 |
    62 | Choosing ML Models 63 |
    64 | 65 |
    66 | or 67 | contribute to this page on Github > 68 |
    69 | -------------------------------------------------------------------------------- /templates/content/llms-101/index.md: -------------------------------------------------------------------------------- 1 | ## Language Models 101 2 | 3 | #### What's the difference between a "language model" and a "large language model"? 4 | 5 | A "Large Language Model" (LLM) is a type of "Language Model" (LM) with more parameters, which allows it to generate or understand text better. The term 'large' refers to the number of parameters the model has been trained on. Usually, a LLM provides higher quality results than smaller LMs due to its ability to capture more complex patterns in the data. 6 | 7 | #### What do these numbers mean in the names of models? 8 | 9 | For example: "Vicuna-13B". The name of the model is Vicuna, and it has 13 billion parameters. 10 | 11 | #### What is a parameter? 12 | 13 | A parameter is a value that the model learns during training. These values are adjusted through a process called backpropagation, which involves calculating the error between the model's predictions and the actual output and adjusting the parameters to minimize this error. The number of parameters in an LLM is typically very large, often numbering in the millions or even billions. These parameters capture the relationships between different words and phrases in language, allowing the model to generate human-like output and make accurate predictions. Without these parameters, a language model would not be able to perform natural language processing tasks at a high level of accuracy. 14 | 15 | #### What does “training” an ML model mean? 16 | 17 | Training a model involves exposing it to large amounts of data so that it can learn patterns and make accurate predictions. During training, the parameters of the model are adjusted based on the input data and desired output. This process can take a significant amount of time and computational resources, but it is essential for achieving high levels of accuracy in natural language processing tasks. 18 | 19 | #### How does a typical training run work? 20 | 21 | During a typical training run, the language model is fed with a large amount of text data and corresponding targets. Generally, the targets are “masked” words within a training sequence that the model needs to predict. The model then uses its prediction errors as compared against these inputs to repeatedly update its parameters through an optimization algorithm like stochastic gradient descent (SGD) or Adam. 22 | 23 | During training, the network makes predictions on a batch of input data and calculates the loss, which is a measure of how well the predictions match the actual targets. The optimizer then adjusts the weights of the network using backpropagation to minimize the loss. This process is repeated iteratively for a fixed number of epochs or until a convergence criterion is met. The process of updating the model's parameters continues iteratively until the model reaches a satisfactory level of accuracy on the training set. 24 | 25 | It's worth noting that training a language model (especially a LLM) can be a resource-intensive process, requiring significant computational power and time. 26 | 27 | #### What is backpropagation? 28 | 29 | Backpropagation is a process used to adjust the parameters of a model during training. It involves calculating the error between the model's predictions and the actual output and adjusting the parameters to minimize this error. 30 | 31 | The process starts by making a forward pass through the neural network, where input data is fed into the model, and output predictions are generated. The difference between the predicted output and the actual targets is then calculated using a loss function. This loss value is then backpropagated through the network, starting from the last layer and moving backward towards the first layer. 32 | 33 | As it moves backward, each layer updates its weights based on how much it contributed to the final loss value. This process continues until all layers have updated their weights, resulting in a new set of parameters that hopefully improve the model's performance on future inputs. 34 | 35 | #### What does “fine-tuning” a language model mean? 36 | 37 | Fine-tuning a language model involves taking a pre-trained language model and providing additional training using task-specific data sets moderated by humans. This process typically requires less data than training a model from scratch and can be done relatively quickly. Fine-tuning has become increasingly popular in recent years as more pre-trained models have become available. 38 | 39 | #### “RLHF” approach 40 | The "Reinforcement Learning From Human Feedback" approach involves incorporating human feedback into machine learning models to improve their accuracy and performance. In the context of natural language processing, this might involve having humans review text generated by a model and provide feedback on its quality, which can then be used to fine-tune the model. This approach is valuable because it can be used to reduce bias and errors and make usage of AI more trustworthy. 41 | 42 | #### What does "inference" mean? 43 | 44 | Inference refers to the process of using a trained machine learning model to make predictions or decisions based on new input data. In other words, it's the application of a trained model to real-world data in order to obtain useful insights or take action based on those insights. When performing inference with a language model, the model takes in new text as input and generates output text based on what it has learned during training and fine-tuning. Inference is a critical step in the machine learning workflow, as it allows models to be used for practical applications such as chatbots, language translation, and sentiment analysis. 45 | 46 | #### Is inference computationally expensive? 47 | 48 | YES, especially for larger models with more parameters. To address this, some models use techniques such as Beam search or sampling to generate output text more efficiently. Additionally, some cloud providers offer pre-trained language models that can be accessed via APIs for a fee, which can help reduce the computational burden of running them locally. 49 | 50 | #### What is a vector? 51 | 52 | In the context of natural language processing, a vector is a list of numbers used to represent a word or phrase in a way that captures its relationship to other words or phrases within a model. A key useful feature of these vectors is that similar things cluster together. 53 | 54 | Each vector in a model consists of the same list length, with every position in the list representing a semantically interesting feature discovered about the word or phrase through statistical analysis. While it's typically difficult to explain what these individual features might mean to a human, quantifying them allows vectors to be mathematically compared as coordinates in a high-dimensional space. In such a space, distance can approximate overall similarity in meaning between the words or phrases from which the vectors were derived. 55 | 56 | Vectors are typically derived using techniques such as word embeddings, which maps each word to a high-dimensional vector based on its co-occurrence with other words in a large corpus of text. 57 | 58 | #### What is Beam search? 59 | 60 | Beam search is a search algorithm used to generate output sequences from a model during inference. It works by maintaining a set of the top k most likely sequences at each step of the generation process, where k is known as the Beam width. The algorithm then continues generating new tokens for each sequence in the set until all sequences have reached an end-of-sequence token or a maximum length has been reached. At each step, the set of possible sequences is pruned based on their likelihood according to the model's predictions, resulting in a final set of top-k output sequences. 61 | 62 | #### What is frequency? 63 | 64 | 65 | {% set slide = { "alt": "Frequency Parameter Visualization","title": "Frequency", "description": "

    Frequency is a parameter used in language models to control how often a token can be repeated in the generated output. It works by penalizing the model for repeating tokens that have already been used, preventing the model from generating repetitive or nonsensical text. The higher this is set, the more that repetition of tokens present in the context will be penalized in suggestions.

    ", "image": "/img/ai/carousels/parameters/llm-05-frequency.jpg"}%} 66 | {% from "partials/image-cards/image-card.html" import imageCard %} 67 | {% set orientation = "image-single" %} 68 | {{ imageCard(slide, orientation,false) }} 69 |
    70 | 71 | #### What is temperature? 72 | 73 | 74 | {% set slide = { "alt": "Temperature Parameter Visualization","title": "Temperature", "description": "

    Temperature is a technique used in language models to control the level of randomness and creativity in the generated output during inference. It works by scaling the predicted probability distribution over possible tokens at each step by a temperature parameter, which controls how much the probabilities are 'softened' or spread out.

    Lower temperatures result in more conservative and predictable output, while higher temperatures lead to more diverse and creative output. However, setting the temperature too high can also lead to nonsensical or ungrammatical sentences. Finding the optimal temperature for a given task or application often requires experimentation and fine-tuning.

    ", "image": "/img/ai/carousels/parameters/llm-03-temp.jpg"}%} 75 | {% from "partials/image-cards/image-card.html" import imageCard %} 76 | {% set orientation = "image-single" %} 77 | {{ imageCard(slide, orientation,false) }} 78 |
    79 | 80 | #### What is sampling? 81 | 82 | Sampling is another algorithm used to generate output sequences from a model during inference. Unlike Beam search, which generates only the top-k most likely sequences at each step, sampling generates output tokens probabilistically based on the model's predicted probability distribution over all possible tokens at that step. This can lead to more diverse and creative output compared to Beam search, but it can also result in less coherent or grammatical sentences if not properly controlled through techniques such as temperature scaling or nucleus sampling. 83 | 84 | #### What is 'top_k' sampling? 85 | 86 | Top-k sampling is a method used in language generation where, instead of considering all possible next words in the vocabulary, the model only considers the top 'k' most probable next words. 87 | 88 | This technique helps to focus the model on likely continuations and reduces the chances of generating irrelevant or nonsensical text. It strikes a balance between creativity and coherence by limiting the pool of next word choices, but not so much that the output becomes deterministic. 89 | 90 | #### What is 'top_p' sampling? 91 | 92 | 93 | {% set slide = { "alt": "Top P Parameter Visualization","title": "Top P (Probability)", "description": "

    Top-p sampling, also known as nucleus sampling, is a strategy where the model considers only the smallest set of top words whose cumulative probability exceeds a threshold 'p'.

    Unlike top-k which considers a fixed number of words, top-p adapts based on the distribution of probabilities for the next word. This makes it more dynamic and flexible. It helps create diverse and sensible text by allowing less probable words to be selected when the most probable ones don't add up to 'p'.

    ", "image": "/img/ai/carousels/parameters/llm-04-top-p.jpg"}%} 94 | {% from "partials/image-cards/image-card.html" import imageCard %} 95 | {% set orientation = "image-single" %} 96 | {{ imageCard(slide, orientation,false) }} 97 |
    98 | 99 |
    100 | Choosing ML Models 101 |
    102 | 103 |
    104 | or 105 | contribute to this page on Github > 106 |
    107 | -------------------------------------------------------------------------------- /templates/content/models-from-scratch/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • -------------------------------------------------------------------------------- /templates/content/models-from-scratch/index-content.html: -------------------------------------------------------------------------------- 1 |

    Models From Scratch

    2 |
    3 |

    If you have experience creating LLMs from scratch, please consider contributing this section to the AI Guide.

    4 |
    5 | -------------------------------------------------------------------------------- /templates/content/models-from-scratch/index.md: -------------------------------------------------------------------------------- 1 | # Models From Scratch 2 | 3 |
    4 |

    If you have experience creating LLMs from scratch, please consider contributing this section to the AI Guide.

    5 |
    6 | -------------------------------------------------------------------------------- /templates/content/notable-projects/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • 2 | 3 | Notable Projects 4 | 5 |
  • -------------------------------------------------------------------------------- /templates/content/notable-projects/index.md: -------------------------------------------------------------------------------- 1 |
    2 | 3 | ## Notable Projects 4 | Submit a project you love to ai-guide@mozilla.com. 5 | 6 | #### Complete LLM Solutions 7 | These projects offer a full-stack chatbot solution, meaning that there is at least an inference run-time and a UI for interacting with it. Actual feature sets may vary rather wildly beyond that. 8 | 9 | ##### [GPT4All](https://gpt4all.io) 10 | License: MIT | Built with: gpt4all, llama.cpp 11 | 12 | A one-click installer (for Windows, MacOS, and Ubuntu) for a locally-running chatbot with a native UI client. Probably the easiest to use and best all-around open source chatbot app. Supports a wide variety of model architectures (including LLaMA, LLaMA 2, GPT-J, MPT, and Falcon), makes it easy to download weights files, supports retrieval augmented generation (i.e. asking questions across local private documents), offers both CPU and GPU inference, and is highly configurable. It’s part of the overall GPT4All project from Nomic.ai. 13 | 14 | ##### [privateGPT](https://github.com/imartinez/privateGPT) 15 | License: Apache 2.0 | 16 | Built with: gpt4all, llama.cpp, langchain, sentence-transformers, chroma 17 | 18 | privateGPT is a chatbot project focused on retrieval augmented generation. It runs from the command line, easily ingests a wide variety of local document formats, and supports a variety of model architecture (by building on top of the gpt4all project). 19 | 20 | ##### [Ollama](https://github.com/jmorganca/ollama) 21 | License: MIT | 22 | Built with: llama.cpp, and a bunch of original Go code 23 | 24 | Ollama provides a one-click chatbot installer for MacOS, with version for Windows and Linux expected in the future. It runs from the command line and lacks a built-in native GUI interface (although one is available). It also lacks many of the features offered by GPT4All. But it’s extremely easy to get working, and it runs quite well on consumer-grade Apple Silicon-based MacBooks. Ultimately, it’s similar to running llama.cpp yourself, just easier. 25 | 26 | ##### [text-generation-webui](https://github.com/oobabooga/text-generation-webui) 27 | License: AGPL 3.0 28 | 29 | This project is probably the most popular open source Web-based chatbot front-end, and comes paired with most of the major inference runtimes. It provides most of the user-facing functionality found in the ChatGPT client, although with a different UI metaphor. This project has quite a lot of users and quite a number of forks, so it’s one to keep an eye on. 30 | 31 | 32 | #### Inference runtimes 33 | These projects provide tools for running LLMs for text inference activities. They roughly bifurcate on a “LLaMA vs. Hugging Face Transformers” axis. Some are command line tools, whereas others are meant to be used in a server capacity. 34 | 35 | 36 | ##### [llama.cpp](https://github.com/ggerganov/llama.cpp) 37 | License: MIT | 38 | Built with: GGML, a model quantization framework, recently replaced by GGUF 39 | 40 | llama.cpp is perhaps the most noteworthy and impactful project in open source LLM tooling to date. It enables users to do inference generation with LLaMA-architecture models in a way that is performant on consumer-grade hardware. It accomplishes this by quantizing model weights files, reducing their size and thus their memory and computational requirements. This, along with other tricks, makes it possible to run these models just with a CPU. Originally optimized for Apple Silicon, llama.cpp has grown to support Intel, as well as GPUs from Nvidia and Apple. As the underpinning of many other OSS LLM projects, this project remains highly influential. As evidence, the project maintainer recently made a major breaking change (from GGML- to GGUF-format weights) and most developers opted to come along for the ride. 41 | 42 | 43 | ##### [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) 44 | License: MIT | 45 | Built with: llama.cpp 46 | 47 | This project wraps llama.cpp to provide Python bindings. Perhaps even more significantly, it exposes an API endpoint that mimics the OpenAI API signature. This neat trick makes it relatively easy to take any software that integrates w/ the OpenAI API and “swap it out” for a locally-running open source LLM. 48 | 49 | 50 | ##### [transformers](https://github.com/huggingface/transformers) 51 | License: Apache 2.0 | 52 | Built with: Hugging Face Transformers 53 | 54 | Hugging Face’s implementation of Transformers has proven to be a critical enabling technology for open source machine learning developers. It makes it easy to, often with just a few lines of code, execute a huge variety of models. HF’s site features literally thousands of models, many of which are useful in cases beyond the content of generative chatbots. With the transformers library, they are all within easy reach. However, transformers is a GPU-centric library; it does not offer the kind of CPU-based computational frugality of something like llama.cpp. 55 | 56 | ##### [llm.rs](https://github.com/rustformers/llm) 57 | License: Apache 2.0 + MIT | 58 | 59 | Essentially a Rust implementation of llama.cpp, this young project leverages Rust’s unique advantages and already has a small ecosystem growing up around it. 60 | 61 | ##### [Text-generation-inference](https://github.com/huggingface/text-generation-inference) 62 | License: Apache 2.0 | 63 | Built with: transformers 64 | 65 | This project is the open source version of the software that powers Hugging Face’s Inference Endpoints service. As such, it’s a production-ready inference-as-a-service API that can be used to run any transformers-compatible model. 66 | 67 | ##### [VLLM](https://github.com/vllm-project/vllm) 68 | License: Apache 2.0 | 69 | Built with: transformers 70 | 71 | This recent project essentially offers a higher-performance take on text-generation-inference. That is, it provides an inference engine that is considerably faster than regular old transformers. It can also act as a server by exposing an OpenAI-compatible API (much like llama-cpp-python does). There’s been a fair amount of buzz about this project lately because it offers a relatively plug-and-play means of improving performance within the Hugging Face ecosystem. 72 | 73 | 74 | ##### [llm](https://github.com/simonw/llm) 75 | License: Apache 2.0 76 | 77 | This is a command line tool that makes it easy to experiment with a variety of open source LLMS and OpenAI tools, both via the same tool. 78 | 79 | 80 | ##### [MLC](https://github.com/mlc-ai/mlc-llm) 81 | License: Apache 2.0 82 | 83 | MLC is a relatively recent development that could merit ongoing attention. This project aims to enable “native deployment” of LLMs; this means running LLMs in a manner optimized to a wide variety of hardware platforms, including desktop/laptops, phones, and environments that lack GPUs. It’s still in its early days, but it’s already seeing some uses, including as a plug-in for llm. 84 | 85 | 86 | ##### [gpt4all-backend](https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-backend) and [gpt4all-api](https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-api) 87 | License: MIT 88 | 89 | Note that the GPT4All one-click installer is based on underlying libraries that can be used to power an inference server. It can even expose an OpenAI-compatible API endpoint. A number of OSS projects embed these libraries as a way to easily get multi-architecture LLM support. 90 | 91 | 92 | 93 | #### Retrieval augmented generation 94 | These projects support retrieval augmented generation use cases for chatbots, wherein custom external documents can be used to augment a chatbot’s responses. This often requires the use of similarity searches across vector embedding databases. 95 | 96 | 97 | ##### [FAISS](https://github.com/facebookresearch/faiss) 98 | License: MIT 99 | 100 | This project comes from Facebook and provides an easy-to-use mechanism for storing vector embeddings and then searching across them. It has proven popular with many open source developers and is rather commonly found in tutorials explaining how to do retrieval augmented generation. 101 | 102 | 103 | ##### [Chroma](https://github.com/chroma-core/chroma) 104 | License: Apache 2.0 105 | 106 | Probably the most popular open source vector database, Chroma is used by a wide variety of opens source and commercial AI products. It has a robust feature set, is easy to use and deploy, and integrates with other popular tools (like LangChain; see below). 107 | 108 | 109 | ##### [Weviate](https://github.com/weaviate/weaviate) 110 | License: BSD 3-Clause 111 | 112 | Weviate is an alternative to Chroma that is written primarily in Go (vs. Python for Chroma) and may offer better performance while boasting similar integrations. 113 | 114 | 115 | ##### [pgvector](https://github.com/pgvector/pgvector) 116 | License: PostgreSQL license? 117 | 118 | This project enables you to use the mature and beloved PostgreSQL database as a vector embeddings solution. This is included in the list not because we have much experience working with it, but simply because it represents an interesting alternative solution for developers who prefer to stick with tools they already know and have deployed in production. 119 | 120 | 121 | ##### [llama_index](https://github.com/jerryjliu/llama_index) 122 | Licensed: MIT | 123 | Built with: langchain 124 | 125 | LlamaIndex offers a robust set of integrations to data sources. This makes it easier to harvest the data that you want to provide to your chatbot, and to keep it updated. This project is built on top of LangChain (see below). 126 | 127 | ##### [LanceDB](https://lancedb.github.io/lancedb/) 128 | Licensed: Apache 129 | 130 | LanceDB is a local high-performance vector database written in Rust. A hidden gem similar to SQLite. 131 | 132 | 133 | #### Orchestration 134 | Orchestration tools help developers compose LLM-based applications by integrating and coordinating multiple tools and models. 135 | 136 | 137 | ##### [LangChain](https://github.com/langchain-ai/langchain) 138 | License: MIT 139 | 140 | LangChain is the leading open source LLM orchestration tool. A number of open source AI projects utilize LangChain under the covers, so its influence has grown steadily over the past year. There is a growing faction of open source developers who believe that LangChain is overly complex and attempts to do too much. But even if one subscribes to that belief, LangChain remains a useful resource as a way to learn and understand a variety of LLM development best practices for common use cases. 141 | 142 | 143 | ##### [simpleaichat](https://github.com/minimaxir/simpleaichat) 144 | License: MIT 145 | 146 | Created by a frustrated LangChain user as a simpler alternative, this project has some early buzz and is worth watching. It doesn’t offer nearly as much functionality as LangChain, but it’s arguably much easier to learn and use. 147 | 148 | ##### [Guardrails AI](https://github.com/guardrails-ai/guardrails) 149 | License: Apache 2.0 150 | 151 | Guardrails adds AI safety and control mechanisms to your LLM, giving you the ability to validate model output formats and content, and to automatically (and invisibly to the end user) re-query the model when its responses don’t meet your expectations or needs. 152 | 153 | 154 | #### Front-ends 155 | These projects provide chatbot-style user interfaces on top of LLM inference engines. 156 | 157 | ##### [chatbot-ui](https://github.com/mckaywrigley/chatbot-ui) 158 | License: MIT 159 | 160 | This project is an excellent carbon copy of the ChatGPT Web interface. It implements the OpenAI API, which means you can easily point it at most of the interference runtimes mentioned earlier in this document. 161 | 162 |
    163 | AI Basics 164 |
    165 | 166 |
    167 | or 168 | contribute to this page on Github > 169 |
    170 | 171 |
    172 | -------------------------------------------------------------------------------- /templates/content/running-llms-locally/_sidebar.html: -------------------------------------------------------------------------------- 1 |
  • 2 | 3 | Running LLMs Locally 4 | 5 |
  • -------------------------------------------------------------------------------- /templates/content/running-llms-locally/index-content.html: -------------------------------------------------------------------------------- 1 |

    Running LLMs Locally

    2 |

    What is a llamafile?

    3 |

    As of the now, the absolute best and easiest way to run open-source LLMs locally is to use Mozilla's new llamafile project.

    4 |

    llamafiles are executable files that run on six different operating systems (macOS, Windows, Linux, FreeBSD, OpenBSD and NetBSD). They bundle up a single model's weights along with an inference environment into A SINGLE FILE (so amazing!).

    5 |

    You can choose between a "server" version (API plus web interface) or a "command-line" version (similar to Meta's original LLaMa interface on CLI). To learn more about this project, here's the README and a long post about it from Simon Willison.

    6 |

    For our work, we will be spending some time with a small 7B parameter model, Mistral_7B, which reportedly shows stellar performance for a 7B model.

    7 |

    Installing Mistral_7B

    8 |

    Let's grab the llamafile first from the creator of llamafile Justine Tunney's Huggingface. We'll use the command line model to showcase some features.

    9 |

    Download the model by tapping this link.

    10 |

    llamafiles are a combination of Justine's cosmopolitan (native single-file executables on any platform), combined with the community's amazing work on llama.cpp, a C++ version of Meta's LLaMa that can run usably on CPUs instead of GPUs created by ggerganov.

    11 |

    Mistral-7B is a model created by French startup Mistral AI, with open weights and sources. Since it's based on the LLaMa architecture, we are able to run inference on it locally using llama.cpp, which then enables a llamafile.

    12 |

    The model we're downloading is the instruct-tuned version. This model is tuned to respond by following a system prompt with instructions.

    13 |

    To run, it's as simple as running these commands in your terminal:

    14 |
    mv mistral-7b-instruct-v0.1-Q4_K_M-main.llama mistral-7b-instruct.llamafile 
    15 | 
    16 | chmod +x mistral-7b-instruct.llamafile
    17 | 
    18 | ./mistral-7b-instruct.llamafile --interactive-first
    19 | 
    20 |

    This makes the llamafile executable, then kicks off the model with an interactive prompt. You'll see that the model is loaded into memory using a 'memory map' - you'll need roughly 5GB of RAM to enable usable inference. A GPU is not mandatory since llama.cpp is designed to utilize all available CPUs to maximize performance.

    21 |

    Interactive mode enables you to chat with the bot like you normally do with an LLM chat interface, but it's quite barebones. Using the server llamafile instead adds an option to use a self-hosted web UI with more features and OpenAI-compatible API at https://localhost:8080

    22 |

    Other models

    23 |

    Here are other interesting llamafiles:

    24 | 28 |

    Our next section will help us compare open models for our particular use-case.

    29 |
    30 | Comparing Open LLMs 31 |
    32 | 33 |
    34 | contribute to this page on Github > 35 |
    36 | -------------------------------------------------------------------------------- /templates/content/running-llms-locally/index.md: -------------------------------------------------------------------------------- 1 | ## Running LLMs Locally 2 | 3 | #### What is a llamafile? 4 | 5 | As of the now, the absolute best and easiest way to run open-source LLMs locally is to use Mozilla's new [llamafile](https://github.com/Mozilla-Ocho/llamafile) project. 6 | 7 | llamafiles are executable files that run on six different operating systems (macOS, Windows, Linux, FreeBSD, OpenBSD and NetBSD). They bundle up a single model's weights along with an inference environment into **A SINGLE FILE** (so amazing!). 8 | 9 | You can choose between a "server" version (API plus web interface) or a "command-line" version (similar to Meta's original LLaMa interface on CLI). To learn more about this project, here's the [README](https://github.com/Mozilla-Ocho/llamafile) and a [long post about it from Simon Willison](https://simonwillison.net/2023/Nov/29/llamafile/). 10 | 11 | For our work, we will be spending some time with a small 7B parameter model, _Mistral\_7B_, which reportedly shows stellar performance for a 7B model. 12 | 13 | #### Installing Mistral_7B 14 | 15 | Let's grab the llamafile first from the creator of llamafile [Justine Tunney](https://justine.lol)'s [Huggingface](https://huggingface.co/jartine/mistral-7b.llamafile). We'll use the command line model to showcase some features. 16 | 17 | Download the model by [tapping this link](https://huggingface.co/jartine/mistral-7b.llamafile/resolve/main/mistral-7b-instruct-v0.1-Q4_K_M-main.llamafile?download=true). 18 | 19 | llamafiles are a combination of Justine's [cosmopolitan](https://justine.lol/cosmopolitan/index.html) (native single-file executables on any platform), combined with the community's amazing work on [llama.cpp](https://github.com/ggerganov/llama.cpp), a C++ version of Meta's LLaMa that can run usably on CPUs instead of GPUs created by [ggerganov](https://github.com/ggerganov/llama.cpp). 20 | 21 | Mistral-7B is a model created by French startup [Mistral AI](https://mistral.ai/company/), with open weights and sources. Since it's based on the LLaMa architecture, we are able to run inference on it locally using llama.cpp, which then enables a llamafile. 22 | 23 | The model we're downloading is the `instruct`-tuned version. This model is tuned to respond by following a system prompt with instructions. 24 | 25 | To run, it's as simple as running these commands in your terminal: 26 | ``` 27 | mv mistral-7b-instruct-v0.1-Q4_K_M-main.llama mistral-7b-instruct.llamafile 28 | 29 | chmod +x mistral-7b-instruct.llamafile 30 | 31 | ./mistral-7b-instruct.llamafile --interactive-first 32 | ``` 33 | 34 | This makes the llamafile executable, then kicks off the model with an interactive prompt. You'll see that the model is loaded into memory using a 'memory map' - you'll need roughly 5GB of RAM to enable usable inference. A GPU is not mandatory since llama.cpp is designed to utilize all available CPUs to maximize performance. 35 | 36 | Interactive mode enables you to chat with the bot like you normally do with an LLM chat interface, but it's quite barebones. Using the `server` llamafile instead adds an option to use a self-hosted web UI with more features and OpenAI-compatible API at [https://localhost:8080](https://localhost:8080) 37 | 38 | 39 | #### Other models 40 | 41 | Here are other interesting llamafiles: 42 | - [LLaVa v1.5 7B](https://huggingface.co/jartine/llava-v1.5-7B-GGUF/resolve/main/llava-v1.5-7b-q4-server.llamafile?download=true): a compelling new multi-modal (takes image input) model 43 | - [WizardCoder 7B](https://huggingface.co/jartine/wizardcoder-13b-python/blob/main/wizardcoder-python-13b-server.llamafile): a code-generation model 44 | 45 | Our next section will help us compare open models for our particular use-case. 46 | 47 |
    48 | Comparing Open LLMs 49 |
    50 | 51 |
    52 | contribute to this page on Github > 53 |
    54 | -------------------------------------------------------------------------------- /templates/partials/footer.html: -------------------------------------------------------------------------------- 1 | 120 | -------------------------------------------------------------------------------- /templates/partials/image-cards/image-card.html: -------------------------------------------------------------------------------- 1 | {# layout options: 'image-top', 'image-left', 'image-right', 'image-single' #} 2 | 3 | {% macro imageCard(slide, layout='image-top', renderTitle=true) %} 4 |
    5 |
    6 | {{ slide.alt }} 7 |
    8 |
    9 | {% if renderTitle %} 10 |
    {{ slide.title }}
    11 | {% endif %} 12 |
    {{ slide.description | safe }}
    13 | 14 |
    15 |
    16 | {% endmacro %} -------------------------------------------------------------------------------- /templates/partials/image-cards/image-cards.html: -------------------------------------------------------------------------------- 1 | {% from "./image-card.html" import imageCard %} 2 | {% macro imageCards(slides, orientation) %} 3 |
    4 | {% for slide in slides %} 5 | {{ imageCard(slide, orientation) }} 6 | {% endfor %} 7 |
    8 | {% endmacro %} -------------------------------------------------------------------------------- /templates/partials/ticker.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/build_ai_guide.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | SOURCE_PATH="templates/content" 4 | WRITE_PATH=$SOURCE_PATH # replace if you want to write to a different path 5 | mkdir -p $WRITE_PATH 6 | echo ">>>> Building AI Guide from $SOURCE_PATH..." 7 | 8 | # test if command `marked` exists 9 | if ! command -v marked &> /dev/null 10 | then 11 | echo "marked could not be found" 12 | echo "Installing marked..." 13 | npm install -g marked@^6.0.0 14 | 15 | if ! command -v marked &> /dev/null 16 | then 17 | echo "Failed to install marked@^6.0.0." 18 | echo "Please install marked@^6.0.0 and try again" 19 | exit 1 20 | fi 21 | fi 22 | 23 | if ! command -v jupyter-nbconvert &> /dev/null 24 | then 25 | echo "jupyter-nbconvert could not be found" 26 | echo "Installing jupyter & jupyter-nbconvert..." 27 | pip install jupyter 28 | 29 | if ! command -v jupyter-nbconvert &> /dev/null 30 | then 31 | echo "Failed to install jupyter-nbconvert." 32 | echo "Please install jupyter & jupyter-nbconvert and try again" 33 | exit 1 34 | fi 35 | fi 36 | 37 | if ! command -v jq &> /dev/null 38 | then 39 | echo "jq could not be found" 40 | echo "Installing jq..." 41 | apt-get install -y jq 42 | 43 | if ! command -v jq &> /dev/null 44 | then 45 | echo "Failed to install jq." 46 | echo "Please install jq and try again" 47 | exit 1 48 | fi 49 | fi 50 | 51 | find $SOURCE_PATH -name "*.md" | while read -r file; 52 | do echo ">>>> Building $file"; 53 | echo 54 | NEW_PATH="${file/$SOURCE_PATH/$WRITE_PATH}" 55 | mkdir -p $(dirname $NEW_PATH) 56 | marked -o "${NEW_PATH%.md}-content.html" "${file}" 57 | echo 58 | node tools/build_ai_guide_content_pages.js "${file}" 59 | done 60 | 61 | find $SOURCE_PATH -name "*.ipynb" | while read -r file; 62 | do echo ">>>> Building notebook $file"; 63 | 64 | NP="${file/$SOURCE_PATH/$WRITE_PATH}" 65 | echo NEW_PATH=$(dirname "${NP}") 66 | echo 67 | 68 | NEWNB="${file/.ipynb/-fixed.ipynb}" 69 | OUTPUT_FILENAME=$(basename "${file/.ipynb/.html}") 70 | jq -M 'del(.metadata.widgets)' "${file}" > "${NEWNB}" 71 | jupyter-nbconvert --to html --output-dir "${NEW_PATH}" --output "${OUTPUT_FILENAME}" "${NEWNB}" --CSSHTMLHeaderPreprocessor.style=nord 72 | rm "${NEWNB}" 73 | echo 74 | done 75 | -------------------------------------------------------------------------------- /tools/build_ai_guide_content_pages.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const path = require('path'); 3 | let marked = require('marked'); 4 | let args = process.argv.slice(2); 5 | 6 | md_filename = args[0] 7 | console.log("Generating sidebar for " + md_filename) 8 | // generate url from filename, assume the url request-uri is the name of the final folder the file is in 9 | split_filename = md_filename.split('/') 10 | url = "/content/"+split_filename[split_filename.length - 2]+"/index.html" 11 | console.log("For URL (from path): "+url) 12 | 13 | function get_markdown(filename) { 14 | return fs.readFileSync(filename, 'utf8'); 15 | } 16 | 17 | function generate_id(title) { 18 | return title.toLowerCase().replace(/[^\w- ]+/g,'').replace(/ +/g,'-') 19 | } 20 | 21 | let tokens = marked.lexer(get_markdown(args[0])); 22 | let headers = tokens.filter(t => t.type == 'heading') 23 | // console.log(JSON.stringify(headers, null, 2)); 24 | 25 | mapped = headers.map(h => [ 26 | h.depth, 27 | h.text, 28 | generate_id(h.text) 29 | ]); 30 | 31 | console.log(JSON.stringify(mapped, null, 2)); 32 | 33 | let html = "
  • " 34 | let page_title = "" 35 | html += mapped.map(h => { 36 | if (h[0] == 2) { 37 | page_title = h[1] 38 | return `
    39 | 40 | ${h[1]} 41 | 42 |
  • " 48 | 49 | // console.log(html) 50 | 51 | // replace the last element of the split_filename with _sidebar.html 52 | // console.log(split_filename) 53 | base_path = split_filename.slice(0,-1).join('/') 54 | if (!fs.existsSync(base_path)){ 55 | fs.mkdirSync(base_path, { recursive: true }); 56 | } 57 | 58 | new_filename = base_path + "/_sidebar.html" 59 | console.log(">>>>> WRITING SIDEBAR: "+new_filename) 60 | fs.writeFileSync(new_filename, html, 'utf8'); 61 | 62 | full_page_template = ` 63 | 64 | {% extends "base-ai.html" %} 65 | 66 | {% block page_title %}Mozilla AI Guide - TITLE{% endblock %} 67 | {% block page_desc %}{% endblock %} 68 | {% block main_id %}content{% endblock %} 69 | 70 | {% block content %} 71 | {% include "PARTIAL_PATH"%} 72 | {% endblock %} 73 | ` 74 | full_page_filename = new_filename.replace("templates", "pages").replace("_sidebar.html", "index.html") 75 | full_page_dir = path.dirname(full_page_filename) 76 | if (!fs.existsSync(full_page_dir)){ 77 | fs.mkdirSync(full_page_dir, { recursive: true }); 78 | } 79 | 80 | console.log(">>>>> WRITING FULL PAGE: "+full_page_filename) 81 | partial_path = split_filename.slice(1,-1).join("/")+"/index-content.html" 82 | fs.writeFileSync(full_page_filename, full_page_template.replace("PARTIAL_PATH", partial_path).replace("TITLE", page_title), 'utf8'); 83 | -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | const path = require("path"); 8 | const HtmlBundlerPlugin = require("html-bundler-webpack-plugin"); 9 | const { Marked } = require("marked"); 10 | 11 | const isDev = process.env.NODE_ENV === "development"; 12 | 13 | module.exports = { 14 | mode: isDev ? "development" : "production", 15 | output: { 16 | path: path.resolve(__dirname, "dist"), 17 | publicPath: "/", 18 | clean: true, 19 | }, 20 | module: { 21 | rules: [ 22 | { 23 | test: /\.js$/, 24 | exclude: /node_modules/, 25 | use: { 26 | loader: "babel-loader", 27 | }, 28 | }, 29 | { 30 | test: /\.scss$/, 31 | include: path.resolve(__dirname, "css"), 32 | use: [ 33 | "css-loader", 34 | "sass-loader", 35 | ], 36 | }, 37 | { 38 | test: /\.css$/i, 39 | include: path.resolve(__dirname, "css"), 40 | use: [ 41 | { 42 | loader: "css-loader", 43 | options: { 44 | import: false, // disable @import at-rules handling 45 | } 46 | }, 47 | { 48 | loader: "postcss-loader", 49 | options: { 50 | postcssOptions: { 51 | plugins: { 52 | tailwindcss: {}, 53 | autoprefixer: {}, 54 | }, 55 | }, 56 | }, 57 | }, 58 | ], 59 | }, 60 | { 61 | test: /\.(png|svg|jpe?g|ico|gif)$/, 62 | type: "asset/resource", 63 | generator: { 64 | filename: "img/[name].[hash:8][ext]", 65 | }, 66 | }, 67 | { 68 | test: /\.(woff|woff2|eot|ttf|otf)$/i, 69 | type: "asset/resource", 70 | generator: { 71 | filename: "fonts/[name][ext]", 72 | }, 73 | }, 74 | ], 75 | }, 76 | // enable HMR with live reload 77 | devServer: { 78 | port: 8000, 79 | open: false, 80 | static: { 81 | directory: path.join(__dirname, "dist"), 82 | }, 83 | watchFiles: { 84 | // live reload: watch changes in source directories 85 | paths: ["pages/**/*.html", "templates/**/*.html", "css/*.scss", "scripts/*.js"], 86 | options: { 87 | usePolling: true, 88 | }, 89 | }, 90 | }, 91 | plugins: [ 92 | new HtmlBundlerPlugin({ 93 | entry: "pages/", 94 | data: { 95 | env: process.env 96 | }, 97 | js: { 98 | filename: "scripts/[name].[contenthash:8].js", 99 | }, 100 | css: { 101 | filename: "css/[name].[contenthash:8].css", 102 | }, 103 | loaderOptions: { 104 | root: __dirname, 105 | preprocessor: 'nunjucks', 106 | preprocessorOptions: { 107 | views: [ 108 | "pages", 109 | "templates", 110 | ], 111 | autoescape: true, // escape dangerous characters, defaults 'true' 112 | }, 113 | }, 114 | }), 115 | ], 116 | }; 117 | --------------------------------------------------------------------------------